diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e0dd2b855c400fb8aa8a33cc73fe2acfe0d54b8 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "down_proj", + "q_proj", + "k_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e2a72dea5c8bfe53ca97ce06b10d5791dcbf361 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dabdc79eeea56bbec7f760233c65518d1f7c78e69bc33e03ea66362ed2ab12d1 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..33a23d7dda2b4b8eeb4728e6d4658501a035d523 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.6049, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.5677, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.287, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1397, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0795, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 1.0153, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1696, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1496, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8637, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9228, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.9077, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.9028, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.9086, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9552, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9274, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.9081, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.8028, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.8879, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9053, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8821, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9305, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8461, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.9011, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8867, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8389, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9627, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8125, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8371, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8698, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.795, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8308, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8484, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9724, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.8933, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9189, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9257, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8358, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9147, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8726, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8943, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8694, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8393, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8746, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8654, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9969, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.7979, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8569, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8575, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9296, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8336, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.861, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8934, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8696, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8737, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8898, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9416, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7131, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9024, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8106, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.818, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8374, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8278, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9448, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8176, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9006, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8019, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8697, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8485, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8067, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8093, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8593, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8346, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8768, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8154, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7885, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7218, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8328, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8158, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8299, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8389, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8949, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8906, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8657, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8198, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7598, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8781, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9329, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8624, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7555, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.8942, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8751, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8816, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8395, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7847, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8718, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8546, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.864, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.828, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8531, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8737, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8595, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8554, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.9207, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7204, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8894, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9081, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8254, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7317, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8093, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.91, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8295, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8139, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.83, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.888, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7776, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.9289, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7565, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8483, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9144, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.8004, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7999, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8308, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.7993, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8012, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9826, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8274, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8353, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7645, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.8052, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8088, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8813, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8412, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8056, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8288, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.7976, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8435, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8213, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8252, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8717, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8021, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9097, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8138, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8239, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.8928, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8394, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8145, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.793, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8447, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8043, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8184, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8438, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8593, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9214, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.9089, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7958, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7539, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9521, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.8231670013217997e+17, + "train_loss": 0.8739353289270098, + "train_runtime": 606.183, + "train_samples_per_second": 8.248, + "train_steps_per_second": 0.259 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8231670013217997e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..18950b792acbcb6a369153de20f06e59efe4517d --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "gate_proj", + "o_proj", + "up_proj", + "down_proj", + "q_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65c517b1528ec3f28f99de1049365d61e7c6bf42 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6d67f6f22e19587f0f4829988e80c0a3c96544c49f9f708a35c49218d2b0fe +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3daf46e4d9ea4c2cbccea5695657be56784f5903 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.5817, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.5711, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.3182, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1334, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0625, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 0.9888, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1405, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1542, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.849, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.906, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.9003, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.9034, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.9186, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.961, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9243, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.9142, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7989, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.8994, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9093, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.9071, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9132, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8535, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.8986, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8829, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8102, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9561, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8218, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8366, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8768, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.803, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8192, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8371, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9333, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.908, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.938, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9311, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8424, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9229, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8747, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8947, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8787, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8314, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8731, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8511, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9869, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.8125, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.868, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8703, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9422, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8392, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.86, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8937, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8762, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8871, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8835, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9378, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7139, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9047, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8321, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.857, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8319, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8344, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9341, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8111, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.8999, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8366, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8498, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8503, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8007, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8063, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8664, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8396, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8645, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.824, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.8026, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7482, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8448, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8229, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8355, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8457, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8829, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8783, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8575, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8192, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7474, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8726, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.952, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8611, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7553, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.9075, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8751, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8663, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8329, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7882, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8751, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8498, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8738, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8388, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8448, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.877, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8464, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8673, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.9089, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7131, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.892, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9104, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8284, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7382, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8112, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8984, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8132, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.811, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8368, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8836, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7717, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.9265, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7461, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8586, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9126, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.8097, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7964, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8424, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.793, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8117, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9946, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8304, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8332, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7745, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.795, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8032, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.869, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8405, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.791, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8274, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.8043, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.85, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8213, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8231, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8606, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8114, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9088, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8104, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8277, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.897, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8368, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8255, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.8072, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8417, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8052, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8311, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8345, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8417, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9181, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.8762, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7942, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7772, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9354, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.8389854578121114e+17, + "train_loss": 0.8740508131160858, + "train_runtime": 613.0383, + "train_samples_per_second": 8.156, + "train_steps_per_second": 0.256 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8389854578121114e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9017ad519ba2fd3fb6c719230c5a9581d1045468 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16591481703ff23a6ac67715d1bbbf7502fc16b2 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f217654051ca039066a54f6f9ab673645cf9808ced93014c396fdac5b72962 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..84e27204dbc852688bc00c0b8c3115b42dbb1ff4 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.6142, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.5786, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.2847, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1396, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0801, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 1.0158, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1713, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1505, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8642, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9232, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.9072, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.9024, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.9076, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9545, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9271, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.9082, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.8031, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.8884, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9053, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8838, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9307, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.847, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.9007, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8866, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.838, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9628, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8116, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8367, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8703, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.7948, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8315, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8493, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9724, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.893, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.92, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9265, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8343, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9146, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.873, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8941, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8715, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8403, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8749, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8654, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9962, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.7976, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8586, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8587, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9293, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8326, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8618, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8936, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8694, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8758, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8909, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9415, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7136, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9016, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8108, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8177, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8379, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8294, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9436, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8188, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9014, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8018, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8717, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8495, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8079, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8104, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8598, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8319, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8767, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8151, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7896, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7223, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8326, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.817, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8301, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8375, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8966, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8918, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8671, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8199, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7601, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8774, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9337, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8624, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7563, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.8946, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8755, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8803, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8393, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7844, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8715, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8551, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8642, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8282, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8538, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8745, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.86, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.856, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.9212, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7199, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8897, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9083, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8257, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7323, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.81, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.9093, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8304, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8144, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8304, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8901, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7796, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.93, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7561, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8489, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9132, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.7999, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.8003, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8317, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8005, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8019, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9828, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8288, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.835, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.766, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.8061, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8105, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8824, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8424, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8065, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8305, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.7974, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8435, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8227, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8259, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8724, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8023, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9123, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8145, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8239, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.8931, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8401, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8147, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7931, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8451, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8043, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8182, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8435, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8595, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9216, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.9103, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7959, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7549, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9527, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.8231670013217997e+17, + "train_loss": 0.874420439741414, + "train_runtime": 607.8943, + "train_samples_per_second": 8.225, + "train_steps_per_second": 0.258 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8231670013217997e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3c3cb36644dc2bbdc46d7b792a4182d0474dd00 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "v_proj", + "q_proj", + "gate_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53e0a530f78560ce745bd118c4dc69da1085ed5f --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2a2f1e2ad0d954f92a1b6d04d2ecb413d8ea25a526b4f625d850313fd8b206 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..992802e127d26b35d27c545d8da7272fed3c8404 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.6295, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.6026, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.248, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1522, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0594, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 1.0206, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1345, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1577, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8709, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9002, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.8984, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.884, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.9051, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9614, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9494, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.9168, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.798, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.9036, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9085, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.9088, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9316, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8441, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.8979, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8839, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8382, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9685, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.812, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8297, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8635, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.7964, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8409, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8342, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9455, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.9033, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9191, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9343, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8241, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.92, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8653, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8933, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8766, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8103, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.874, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8465, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9982, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.7975, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.866, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8556, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9404, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8321, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8533, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8929, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8647, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8673, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8801, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9484, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7127, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9191, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.816, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8402, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8325, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8293, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9403, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8097, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9047, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.7984, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8471, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8512, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8096, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8018, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8577, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8162, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8753, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8272, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7928, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7455, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8327, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8059, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8331, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8438, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8964, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8688, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8589, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.831, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7501, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8769, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9562, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.872, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7628, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.8795, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8724, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8727, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8453, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7799, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8688, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8608, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8739, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8461, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.847, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8698, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8466, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8662, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.8859, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7262, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8928, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9302, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8222, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7354, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8246, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8968, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8138, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8306, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8275, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8884, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7864, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.9299, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7418, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8454, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9107, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.8149, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.8049, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8318, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.7918, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8116, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9814, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8234, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8349, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7845, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.7994, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.7918, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8669, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8403, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8024, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8434, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.8117, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8466, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8432, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8248, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8652, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.804, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9036, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8085, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8281, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.8945, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8357, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8258, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.8044, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8358, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8028, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8254, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.838, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8531, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9063, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.8737, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7867, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7725, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9406, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.848582743428956e+17, + "train_loss": 0.8735485073107823, + "train_runtime": 618.1012, + "train_samples_per_second": 8.089, + "train_steps_per_second": 0.254 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.848582743428956e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4f99ada9cb1118e700a7db0b5e001cc9dfe6f3e --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "up_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a6f283e46890f9225bba04aa883caaf9b31a9bb --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fcb33d76292e8ff9b990a50dc619fd704a613edf089f004d72d497e5b2534e +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..af68d6ff4d944f34f4805bc5bece826dcc8c13a6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.5785, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.4715, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.2647, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1428, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0595, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 0.9761, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1309, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1512, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8451, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9051, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.8885, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.8928, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8923, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.951, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9338, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.8979, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.8001, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.9024, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9052, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8975, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9262, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8366, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.9157, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8821, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8154, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.97, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8211, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8308, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8671, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.8054, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8215, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8451, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9579, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.8815, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9276, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9327, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8515, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9245, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8686, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8865, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8799, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.7973, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8798, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8601, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.983, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.8132, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8731, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8627, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9437, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8387, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.871, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8742, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8681, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8718, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8804, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9424, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7149, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9085, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8235, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8361, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8283, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.831, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9272, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8163, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9022, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8155, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8482, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8445, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8116, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8018, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8519, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.834, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8733, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8164, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7852, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7621, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.831, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8202, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8359, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8675, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8936, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8964, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8691, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8405, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7521, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8911, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9457, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8582, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7549, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.9221, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8823, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8874, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8461, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7844, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8769, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8543, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8799, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8386, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8621, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8576, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8518, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8532, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.8984, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7299, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8913, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9104, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8203, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.734, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8011, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8944, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8006, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8161, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8747, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8827, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7817, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.9346, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7557, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.855, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9043, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.815, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7945, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8348, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8028, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8007, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9846, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8312, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.838, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7631, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.8003, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8033, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8626, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8439, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8484, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8451, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.8174, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8448, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8174, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.823, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8671, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8058, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9194, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8112, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8337, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.9022, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8505, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8192, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7939, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8407, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8031, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8162, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8415, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8578, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9013, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.8782, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7967, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7693, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9033, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.8496968613009818e+17, + "train_loss": 0.8729076776534889, + "train_runtime": 624.4234, + "train_samples_per_second": 8.007, + "train_steps_per_second": 0.251 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8496968613009818e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..452f2aa318e3ad12a9319174846abf59f5a6fdff --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "q_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..353682d28c7047c137e958a94fb03325be7eecef --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79679ad6ed209b49fa9fc42508a33192c4757ffc1a1c14c03fdcf4ea8ab10cc +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5b3fddfe25c2275686d0999c2cc3a3afdb35d9f5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/trainer_state.json @@ -0,0 +1,1914 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 314, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 1.5818, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.4755, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 1.3095, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 8.000000000000001e-06, + "loss": 1.2063, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 1.1368, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.2e-05, + "loss": 1.0209, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.4e-05, + "loss": 1.185, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.2063, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.8e-05, + "loss": 0.8748, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 2e-05, + "loss": 0.9318, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.999946602771351e-05, + "loss": 0.9099, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9997864167879313e-05, + "loss": 0.9067, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.999519459156716e-05, + "loss": 0.9027, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.999145758387301e-05, + "loss": 0.9631, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.998665354388857e-05, + "loss": 0.943, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9980782984658682e-05, + "loss": 0.9084, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9973846533126533e-05, + "loss": 0.8094, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.99658449300667e-05, + "loss": 0.9148, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.9956779030006038e-05, + "loss": 0.9141, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9059, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9935458325191365e-05, + "loss": 0.9316, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.992320579737045e-05, + "loss": 0.847, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9909893526171745e-05, + "loss": 0.9222, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.8904, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9880095553370967e-05, + "loss": 0.8193, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.9747, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.9846077135482513e-05, + "loss": 0.8248, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.8356, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.9807852804032306e-05, + "loss": 0.87, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.8082, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.9765438887171327e-05, + "loss": 0.8245, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.8473, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.9718853502700783e-05, + "loss": 0.9611, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.8855, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.966811655033277e-05, + "loss": 0.9304, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.9339, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.96132497031898e-05, + "loss": 0.8534, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9269, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.9554276398546767e-05, + "loss": 0.8699, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8889, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.9491221827819348e-05, + "loss": 0.8806, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.7995, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.942411292580304e-05, + "loss": 0.8818, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8597, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.935297835916754e-05, + "loss": 0.9841, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.813, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.927784851421132e-05, + "loss": 0.8739, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.864, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.9198755483881585e-05, + "loss": 0.9453, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8378, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.911573305406528e-05, + "loss": 0.8705, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.8748, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.902881668915688e-05, + "loss": 0.8694, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8718, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.8938043516909173e-05, + "loss": 0.88, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.9423, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.8843452312573557e-05, + "loss": 0.7148, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.879473751206489e-05, + "loss": 0.9104, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.8745083482336547e-05, + "loss": 0.8233, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.869449552616367e-05, + "loss": 0.8367, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.8642979046059595e-05, + "loss": 0.8282, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8311, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.853718261932964e-05, + "loss": 0.929, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8166, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.8427739394827976e-05, + "loss": 0.9019, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.8145, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.8314696123025456e-05, + "loss": 0.8465, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.825683949996556e-05, + "loss": 0.8442, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.819810109221227e-05, + "loss": 0.8103, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.813848717270195e-05, + "loss": 0.8019, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.8078004107870797e-05, + "loss": 0.8514, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.8341, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.7954456471400393e-05, + "loss": 0.8727, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8152, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.7827510958203147e-05, + "loss": 0.785, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.7608, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.769722179513998e-05, + "loss": 0.83, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8157, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.7563644637366786e-05, + "loss": 0.835, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8646, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1.74268365445604e-05, + "loss": 0.8946, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8949, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 1.7286855956544616e-05, + "loss": 0.8676, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8363, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 1.7143762668326667e-05, + "loss": 0.7519, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8902, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 1.69976178045548e-05, + "loss": 0.9433, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8566, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 1.6848483793407874e-05, + "loss": 0.7547, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9211, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 1.6696424339928153e-05, + "loss": 0.8832, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.8847, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 1.6541504398808633e-05, + "loss": 0.8423, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 1.646299237860941e-05, + "loss": 0.7827, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 1.638379014664659e-05, + "loss": 0.8779, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8516, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 1.6223348953675163e-05, + "loss": 0.8793, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.8393, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 1.6060249354985023e-05, + "loss": 0.8612, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8567, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 1.5894561021248535e-05, + "loss": 0.8511, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8521, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 1.5726354728958736e-05, + "loss": 0.8961, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.7285, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 1.5555702330196024e-05, + "loss": 0.8936, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.9089, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 1.5382676721935344e-05, + "loss": 0.8203, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 1.529529702260709e-05, + "loss": 0.7302, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 1.5207351814907068e-05, + "loss": 0.7977, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8916, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 1.5029802502024788e-05, + "loss": 0.7983, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.8178, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 1.4850104626393598e-05, + "loss": 0.8737, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.8758, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 1.4668334948912455e-05, + "loss": 0.7809, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9323, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 1.4484571115484508e-05, + "loss": 0.7564, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8519, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 1.429889162384937e-05, + "loss": 0.8999, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8166, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 1.4111375790051511e-05, + "loss": 0.7898, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8342, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 1.392210371455913e-05, + "loss": 0.8015, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.7963, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 1.3731156248047903e-05, + "loss": 0.9803, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.8302, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.3538614956864297e-05, + "loss": 0.8336, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.7617, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.3344562088183166e-05, + "loss": 0.8005, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.8004, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.3149080534874519e-05, + "loss": 0.8525, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8398, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2952253800094467e-05, + "loss": 0.8423, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8409, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.2754165961615482e-05, + "loss": 0.8101, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8427, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 1.2554901635911188e-05, + "loss": 0.8152, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8222, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 1.2354545942011058e-05, + "loss": 0.8637, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.804, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 1.2153184465140413e-05, + "loss": 0.9133, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.805, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 1.1950903220161286e-05, + "loss": 0.8318, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8944, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 1.1747788614829758e-05, + "loss": 0.844, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8142, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 1.1543927412885489e-05, + "loss": 0.7869, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8344, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.1339406696989128e-05, + "loss": 0.7961, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.123692631269348e-05, + "loss": 0.8034, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 1.1134313831523547e-05, + "loss": 0.8347, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 1.103158021192357e-05, + "loss": 0.8523, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 1.0928736425274702e-05, + "loss": 0.8964, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8771, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 1.0722762294008107e-05, + "loss": 0.7916, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.7654, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 1.0516479422956882e-05, + "loss": 0.9166, + "step": 157 + }, + { + "epoch": 1.01, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.7424, + "step": 158 + }, + { + "epoch": 1.01, + "learning_rate": 1.0309975929237408e-05, + "loss": 0.7315, + "step": 159 + }, + { + "epoch": 1.02, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.713, + "step": 160 + }, + { + "epoch": 1.03, + "learning_rate": 1.0103340024208674e-05, + "loss": 0.652, + "step": 161 + }, + { + "epoch": 1.03, + "learning_rate": 1e-05, + "loss": 0.7137, + "step": 162 + }, + { + "epoch": 1.04, + "learning_rate": 9.89665997579133e-06, + "loss": 0.6697, + "step": 163 + }, + { + "epoch": 1.04, + "learning_rate": 9.79333098772446e-06, + "loss": 0.7271, + "step": 164 + }, + { + "epoch": 1.05, + "learning_rate": 9.690024070762597e-06, + "loss": 0.7771, + "step": 165 + }, + { + "epoch": 1.06, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8035, + "step": 166 + }, + { + "epoch": 1.06, + "learning_rate": 9.483520577043121e-06, + "loss": 0.75, + "step": 167 + }, + { + "epoch": 1.07, + "learning_rate": 9.380346053714055e-06, + "loss": 0.7385, + "step": 168 + }, + { + "epoch": 1.08, + "learning_rate": 9.277237705991895e-06, + "loss": 0.8113, + "step": 169 + }, + { + "epoch": 1.08, + "learning_rate": 9.174206545276678e-06, + "loss": 0.6981, + "step": 170 + }, + { + "epoch": 1.09, + "learning_rate": 9.0712635747253e-06, + "loss": 0.7025, + "step": 171 + }, + { + "epoch": 1.1, + "learning_rate": 8.968419788076431e-06, + "loss": 0.7553, + "step": 172 + }, + { + "epoch": 1.1, + "learning_rate": 8.865686168476458e-06, + "loss": 0.664, + "step": 173 + }, + { + "epoch": 1.11, + "learning_rate": 8.763073687306523e-06, + "loss": 0.7415, + "step": 174 + }, + { + "epoch": 1.11, + "learning_rate": 8.660593303010876e-06, + "loss": 0.7462, + "step": 175 + }, + { + "epoch": 1.12, + "learning_rate": 8.558255959926533e-06, + "loss": 0.7285, + "step": 176 + }, + { + "epoch": 1.13, + "learning_rate": 8.456072587114516e-06, + "loss": 0.7491, + "step": 177 + }, + { + "epoch": 1.13, + "learning_rate": 8.35405409719266e-06, + "loss": 0.6176, + "step": 178 + }, + { + "epoch": 1.14, + "learning_rate": 8.252211385170242e-06, + "loss": 0.8309, + "step": 179 + }, + { + "epoch": 1.15, + "learning_rate": 8.150555327284417e-06, + "loss": 0.733, + "step": 180 + }, + { + "epoch": 1.15, + "learning_rate": 8.04909677983872e-06, + "loss": 0.7528, + "step": 181 + }, + { + "epoch": 1.16, + "learning_rate": 7.947846578043658e-06, + "loss": 0.7584, + "step": 182 + }, + { + "epoch": 1.17, + "learning_rate": 7.846815534859592e-06, + "loss": 0.7583, + "step": 183 + }, + { + "epoch": 1.17, + "learning_rate": 7.746014439841941e-06, + "loss": 0.7922, + "step": 184 + }, + { + "epoch": 1.18, + "learning_rate": 7.645454057988942e-06, + "loss": 0.765, + "step": 185 + }, + { + "epoch": 1.18, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8083, + "step": 186 + }, + { + "epoch": 1.19, + "learning_rate": 7.445098364088815e-06, + "loss": 0.7482, + "step": 187 + }, + { + "epoch": 1.2, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7737, + "step": 188 + }, + { + "epoch": 1.2, + "learning_rate": 7.245834038384523e-06, + "loss": 0.6522, + "step": 189 + }, + { + "epoch": 1.21, + "learning_rate": 7.14663775750895e-06, + "loss": 0.7313, + "step": 190 + }, + { + "epoch": 1.22, + "learning_rate": 7.0477461999055365e-06, + "loss": 0.7913, + "step": 191 + }, + { + "epoch": 1.22, + "learning_rate": 6.949169926644513e-06, + "loss": 0.699, + "step": 192 + }, + { + "epoch": 1.23, + "learning_rate": 6.8509194651254825e-06, + "loss": 0.7575, + "step": 193 + }, + { + "epoch": 1.24, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.7397, + "step": 194 + }, + { + "epoch": 1.24, + "learning_rate": 6.655437911816838e-06, + "loss": 0.7587, + "step": 195 + }, + { + "epoch": 1.25, + "learning_rate": 6.558227696373617e-06, + "loss": 0.6678, + "step": 196 + }, + { + "epoch": 1.25, + "learning_rate": 6.461385043135704e-06, + "loss": 0.7761, + "step": 197 + }, + { + "epoch": 1.26, + "learning_rate": 6.364920294361701e-06, + "loss": 0.7318, + "step": 198 + }, + { + "epoch": 1.27, + "learning_rate": 6.2688437519521e-06, + "loss": 0.7532, + "step": 199 + }, + { + "epoch": 1.27, + "learning_rate": 6.173165676349103e-06, + "loss": 0.7517, + "step": 200 + }, + { + "epoch": 1.28, + "learning_rate": 6.077896285440874e-06, + "loss": 0.6801, + "step": 201 + }, + { + "epoch": 1.29, + "learning_rate": 5.983045753470308e-06, + "loss": 0.7787, + "step": 202 + }, + { + "epoch": 1.29, + "learning_rate": 5.888624209948495e-06, + "loss": 0.7673, + "step": 203 + }, + { + "epoch": 1.3, + "learning_rate": 5.794641738572925e-06, + "loss": 0.781, + "step": 204 + }, + { + "epoch": 1.31, + "learning_rate": 5.701108376150635e-06, + "loss": 0.7651, + "step": 205 + }, + { + "epoch": 1.31, + "learning_rate": 5.608034111526298e-06, + "loss": 0.756, + "step": 206 + }, + { + "epoch": 1.32, + "learning_rate": 5.515428884515495e-06, + "loss": 0.8037, + "step": 207 + }, + { + "epoch": 1.32, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7231, + "step": 208 + }, + { + "epoch": 1.33, + "learning_rate": 5.331665051087549e-06, + "loss": 0.6989, + "step": 209 + }, + { + "epoch": 1.34, + "learning_rate": 5.240526069629265e-06, + "loss": 0.7202, + "step": 210 + }, + { + "epoch": 1.34, + "learning_rate": 5.149895373606405e-06, + "loss": 0.8339, + "step": 211 + }, + { + "epoch": 1.35, + "learning_rate": 5.059782641874962e-06, + "loss": 0.7997, + "step": 212 + }, + { + "epoch": 1.36, + "learning_rate": 4.970197497975216e-06, + "loss": 0.7051, + "step": 213 + }, + { + "epoch": 1.36, + "learning_rate": 4.881149509103993e-06, + "loss": 0.7325, + "step": 214 + }, + { + "epoch": 1.37, + "learning_rate": 4.7926481850929376e-06, + "loss": 0.8355, + "step": 215 + }, + { + "epoch": 1.38, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7599, + "step": 216 + }, + { + "epoch": 1.38, + "learning_rate": 4.617323278064657e-06, + "loss": 0.7146, + "step": 217 + }, + { + "epoch": 1.39, + "learning_rate": 4.530518418775734e-06, + "loss": 0.7002, + "step": 218 + }, + { + "epoch": 1.39, + "learning_rate": 4.444297669803981e-06, + "loss": 0.7764, + "step": 219 + }, + { + "epoch": 1.4, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8028, + "step": 220 + }, + { + "epoch": 1.41, + "learning_rate": 4.2736452710412645e-06, + "loss": 0.7582, + "step": 221 + }, + { + "epoch": 1.41, + "learning_rate": 4.189231845980618e-06, + "loss": 0.7331, + "step": 222 + }, + { + "epoch": 1.42, + "learning_rate": 4.105438978751465e-06, + "loss": 0.7278, + "step": 223 + }, + { + "epoch": 1.43, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.7189, + "step": 224 + }, + { + "epoch": 1.43, + "learning_rate": 3.939750645014977e-06, + "loss": 0.7571, + "step": 225 + }, + { + "epoch": 1.44, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8225, + "step": 226 + }, + { + "epoch": 1.45, + "learning_rate": 3.776651046324843e-06, + "loss": 0.7085, + "step": 227 + }, + { + "epoch": 1.45, + "learning_rate": 3.69609383872045e-06, + "loss": 0.6993, + "step": 228 + }, + { + "epoch": 1.46, + "learning_rate": 3.6162098533534095e-06, + "loss": 0.7832, + "step": 229 + }, + { + "epoch": 1.46, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.6988, + "step": 230 + }, + { + "epoch": 1.47, + "learning_rate": 3.4584956011913693e-06, + "loss": 0.7421, + "step": 231 + }, + { + "epoch": 1.48, + "learning_rate": 3.380682177404335e-06, + "loss": 0.8109, + "step": 232 + }, + { + "epoch": 1.48, + "learning_rate": 3.3035756600718515e-06, + "loss": 0.6832, + "step": 233 + }, + { + "epoch": 1.49, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7749, + "step": 234 + }, + { + "epoch": 1.5, + "learning_rate": 3.151516206592128e-06, + "loss": 0.6914, + "step": 235 + }, + { + "epoch": 1.5, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.693, + "step": 236 + }, + { + "epoch": 1.51, + "learning_rate": 3.0023821954452036e-06, + "loss": 0.7797, + "step": 237 + }, + { + "epoch": 1.52, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.6528, + "step": 238 + }, + { + "epoch": 1.52, + "learning_rate": 2.856237331673336e-06, + "loss": 0.7217, + "step": 239 + }, + { + "epoch": 1.53, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.738, + "step": 240 + }, + { + "epoch": 1.54, + "learning_rate": 2.713144043455388e-06, + "loss": 0.6506, + "step": 241 + }, + { + "epoch": 1.54, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7358, + "step": 242 + }, + { + "epoch": 1.55, + "learning_rate": 2.573163455439601e-06, + "loss": 0.6081, + "step": 243 + }, + { + "epoch": 1.55, + "learning_rate": 2.504359162588741e-06, + "loss": 0.7771, + "step": 244 + }, + { + "epoch": 1.56, + "learning_rate": 2.4363553626332157e-06, + "loss": 0.7458, + "step": 245 + }, + { + "epoch": 1.57, + "learning_rate": 2.369159318001937e-06, + "loss": 0.7548, + "step": 246 + }, + { + "epoch": 1.57, + "learning_rate": 2.3027782048600247e-06, + "loss": 0.8367, + "step": 247 + }, + { + "epoch": 1.58, + "learning_rate": 2.237219112342426e-06, + "loss": 0.7165, + "step": 248 + }, + { + "epoch": 1.59, + "learning_rate": 2.172489041796856e-06, + "loss": 0.7262, + "step": 249 + }, + { + "epoch": 1.59, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.6275, + "step": 250 + }, + { + "epoch": 1.6, + "learning_rate": 2.045543528599607e-06, + "loss": 0.676, + "step": 251 + }, + { + "epoch": 1.61, + "learning_rate": 1.983341643025117e-06, + "loss": 0.7728, + "step": 252 + }, + { + "epoch": 1.61, + "learning_rate": 1.921995892129208e-06, + "loss": 0.7293, + "step": 253 + }, + { + "epoch": 1.62, + "learning_rate": 1.861512827298051e-06, + "loss": 0.7412, + "step": 254 + }, + { + "epoch": 1.62, + "learning_rate": 1.8018989077877368e-06, + "loss": 0.7636, + "step": 255 + }, + { + "epoch": 1.63, + "learning_rate": 1.743160500034443e-06, + "loss": 0.6914, + "step": 256 + }, + { + "epoch": 1.64, + "learning_rate": 1.6853038769745466e-06, + "loss": 0.7537, + "step": 257 + }, + { + "epoch": 1.64, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.6998, + "step": 258 + }, + { + "epoch": 1.65, + "learning_rate": 1.5722606051720268e-06, + "loss": 0.7397, + "step": 259 + }, + { + "epoch": 1.66, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.7952, + "step": 260 + }, + { + "epoch": 1.66, + "learning_rate": 1.4628173806703594e-06, + "loss": 0.8124, + "step": 261 + }, + { + "epoch": 1.67, + "learning_rate": 1.409460456301147e-06, + "loss": 0.7264, + "step": 262 + }, + { + "epoch": 1.68, + "learning_rate": 1.3570209539404067e-06, + "loss": 0.7245, + "step": 263 + }, + { + "epoch": 1.68, + "learning_rate": 1.305504473836331e-06, + "loss": 0.7775, + "step": 264 + }, + { + "epoch": 1.69, + "learning_rate": 1.2549165176634582e-06, + "loss": 0.7452, + "step": 265 + }, + { + "epoch": 1.69, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.7577, + "step": 266 + }, + { + "epoch": 1.7, + "learning_rate": 1.1565476874264448e-06, + "loss": 0.7348, + "step": 267 + }, + { + "epoch": 1.71, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.7747, + "step": 268 + }, + { + "epoch": 1.71, + "learning_rate": 1.0619564830908303e-06, + "loss": 0.7856, + "step": 269 + }, + { + "epoch": 1.72, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.7297, + "step": 270 + }, + { + "epoch": 1.73, + "learning_rate": 9.711833108431234e-07, + "loss": 0.7543, + "step": 271 + }, + { + "epoch": 1.73, + "learning_rate": 9.272406681844015e-07, + "loss": 0.7633, + "step": 272 + }, + { + "epoch": 1.74, + "learning_rate": 8.842669459347186e-07, + "loss": 0.7307, + "step": 273 + }, + { + "epoch": 1.75, + "learning_rate": 8.42266733449425e-07, + "loss": 0.783, + "step": 274 + }, + { + "epoch": 1.75, + "learning_rate": 8.012445161184179e-07, + "loss": 0.7203, + "step": 275 + }, + { + "epoch": 1.76, + "learning_rate": 7.612046748871327e-07, + "loss": 0.7233, + "step": 276 + }, + { + "epoch": 1.76, + "learning_rate": 7.221514857886857e-07, + "loss": 0.7003, + "step": 277 + }, + { + "epoch": 1.77, + "learning_rate": 6.840891194872112e-07, + "loss": 0.7572, + "step": 278 + }, + { + "epoch": 1.78, + "learning_rate": 6.470216408324626e-07, + "loss": 0.6534, + "step": 279 + }, + { + "epoch": 1.78, + "learning_rate": 6.109530084257043e-07, + "loss": 0.7857, + "step": 280 + }, + { + "epoch": 1.79, + "learning_rate": 5.758870741969635e-07, + "loss": 0.7527, + "step": 281 + }, + { + "epoch": 1.8, + "learning_rate": 5.418275829936537e-07, + "loss": 0.7571, + "step": 282 + }, + { + "epoch": 1.8, + "learning_rate": 5.087781721806539e-07, + "loss": 0.7525, + "step": 283 + }, + { + "epoch": 1.81, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.7605, + "step": 284 + }, + { + "epoch": 1.82, + "learning_rate": 4.45723601453234e-07, + "loss": 0.6809, + "step": 285 + }, + { + "epoch": 1.82, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.7616, + "step": 286 + }, + { + "epoch": 1.83, + "learning_rate": 3.867502968102055e-07, + "loss": 0.6778, + "step": 287 + }, + { + "epoch": 1.83, + "learning_rate": 3.588020599878639e-07, + "loss": 0.7501, + "step": 288 + }, + { + "epoch": 1.84, + "learning_rate": 3.3188344966723516e-07, + "loss": 0.6761, + "step": 289 + }, + { + "epoch": 1.85, + "learning_rate": 3.059973406066963e-07, + "loss": 0.7752, + "step": 290 + }, + { + "epoch": 1.85, + "learning_rate": 2.811464972992195e-07, + "loss": 0.7076, + "step": 291 + }, + { + "epoch": 1.86, + "learning_rate": 2.573335736771254e-07, + "loss": 0.7071, + "step": 292 + }, + { + "epoch": 1.87, + "learning_rate": 2.3456111282867178e-07, + "loss": 0.7576, + "step": 293 + }, + { + "epoch": 1.87, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7305, + "step": 294 + }, + { + "epoch": 1.88, + "learning_rate": 1.921471959676957e-07, + "loss": 0.6966, + "step": 295 + }, + { + "epoch": 1.89, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8391, + "step": 296 + }, + { + "epoch": 1.89, + "learning_rate": 1.539228645174895e-07, + "loss": 0.8079, + "step": 297 + }, + { + "epoch": 1.9, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8451, + "step": 298 + }, + { + "epoch": 1.9, + "learning_rate": 1.1990444662903445e-07, + "loss": 0.788, + "step": 299 + }, + { + "epoch": 1.91, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.7608, + "step": 300 + }, + { + "epoch": 1.92, + "learning_rate": 9.010647382825421e-08, + "loss": 0.78, + "step": 301 + }, + { + "epoch": 1.92, + "learning_rate": 7.679420262954984e-08, + "loss": 0.6972, + "step": 302 + }, + { + "epoch": 1.93, + "learning_rate": 6.454167480863694e-08, + "loss": 0.7271, + "step": 303 + }, + { + "epoch": 1.94, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.7114, + "step": 304 + }, + { + "epoch": 1.94, + "learning_rate": 4.32209699939623e-08, + "loss": 0.7364, + "step": 305 + }, + { + "epoch": 1.95, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.6567, + "step": 306 + }, + { + "epoch": 1.96, + "learning_rate": 2.6153466873468646e-08, + "loss": 0.7657, + "step": 307 + }, + { + "epoch": 1.96, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.7657, + "step": 308 + }, + { + "epoch": 1.97, + "learning_rate": 1.3346456111430484e-08, + "loss": 0.7456, + "step": 309 + }, + { + "epoch": 1.97, + "learning_rate": 8.542416126989805e-09, + "loss": 0.743, + "step": 310 + }, + { + "epoch": 1.98, + "learning_rate": 4.80540843283972e-09, + "loss": 0.8056, + "step": 311 + }, + { + "epoch": 1.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7316, + "step": 312 + }, + { + "epoch": 1.99, + "learning_rate": 5.339722864927677e-10, + "loss": 0.7186, + "step": 313 + }, + { + "epoch": 2.0, + "learning_rate": 0.0, + "loss": 0.6761, + "step": 314 + }, + { + "epoch": 2.0, + "step": 314, + "total_flos": 3.703872705758495e+17, + "train_loss": 0.8081462842643641, + "train_runtime": 1231.9663, + "train_samples_per_second": 8.117, + "train_steps_per_second": 0.255 + } + ], + "logging_steps": 1.0, + "max_steps": 314, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 3.703872705758495e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..16a78102b7e95ef198aa2dad7105a51af8205dec --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj", + "gate_proj", + "up_proj", + "o_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afd67e4ef6d595fac3173495a857cf24971700c8 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f68bcc304cc1c4bc35380106b600735a47a400179deb5e9776bba374f9a70e7 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d3462c756d75e27674e795eec7bb3194e33e0e64 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-Iter2_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.6098, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.5733, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.2851, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1375, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0769, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 1.0143, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1692, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1487, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8639, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9225, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.9068, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.9024, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.9073, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9532, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9257, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.9074, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.8018, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.8862, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9051, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8819, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9292, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8439, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.8997, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8851, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8378, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9623, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8123, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8365, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8701, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.7937, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8309, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8487, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9729, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.8914, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9184, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9261, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8347, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9141, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.872, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8933, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8687, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8399, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8749, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8652, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9969, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.7979, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.856, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8577, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9292, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8323, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8608, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8946, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8686, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8746, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8895, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9424, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7137, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9028, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8119, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8175, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8363, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8283, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9444, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8184, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9004, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8012, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.871, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8469, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8052, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8099, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.86, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8346, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.877, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8152, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7888, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7216, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8323, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8151, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8319, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8373, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8947, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8909, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8647, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8208, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7598, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8762, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9331, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8625, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7557, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.8942, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.875, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8809, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8403, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7851, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8728, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8558, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8646, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8275, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8532, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8733, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8611, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8558, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.9209, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7208, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8901, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9083, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.826, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7325, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8093, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.9089, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.83, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8137, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8297, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8895, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7774, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.9295, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7561, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8494, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9142, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.7994, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.8009, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8307, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8025, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9829, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8293, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8357, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7661, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.8046, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8106, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8807, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8421, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8058, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8288, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.7971, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8441, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.822, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8253, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.872, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8035, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9119, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8145, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8241, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.8933, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8401, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8143, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7924, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8443, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8043, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8204, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8437, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8593, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9213, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.9096, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7958, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7545, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9536, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.8231670013217997e+17, + "train_loss": 0.8739432217968497, + "train_runtime": 689.0914, + "train_samples_per_second": 7.256, + "train_steps_per_second": 0.228 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8231670013217997e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..71008584a61e6a3adc4de45d7afb8f13db785dda --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "gate_proj", + "v_proj", + "k_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff3db2b60e7705564a7238ef4d444a3ae233ff23 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471104284001187bd3ec82fdd1a65b0f40e66ecaf27f46854f3d7a03017fffc4 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..69b59bf9ac2f9dc3e1da4f98a99fd7a502563059 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-gt-judge_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.5382, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.506, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.2118, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.084, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0008, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 0.9599, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.0582, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1419, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8421, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.8986, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.8873, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.8599, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8807, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9474, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9125, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.8908, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7831, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.884, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.8917, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8823, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.912, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8415, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.8825, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8784, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8056, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9612, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8093, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8302, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8629, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.7995, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8228, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.839, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9464, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.886, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9189, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9177, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.823, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9148, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8686, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8897, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8753, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8067, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8739, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8466, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.982, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.7998, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8506, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8621, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9342, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8281, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8616, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8912, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8665, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8727, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8772, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9444, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7163, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9036, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8144, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8295, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8279, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8258, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9333, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8169, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9032, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8181, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8489, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8451, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8081, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8028, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8597, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8268, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.87, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8127, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7773, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7448, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8295, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8038, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8257, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8469, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8865, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8739, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.856, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8284, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7522, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8738, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9324, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8677, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7557, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.8837, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.877, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8742, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8435, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7781, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8691, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8451, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8682, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8353, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8576, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8705, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8492, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8508, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.8925, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.718, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8913, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9157, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8225, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7329, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8174, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8929, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8091, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.82, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8235, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8822, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7844, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.931, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7476, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8429, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9111, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.8021, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7955, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8339, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8006, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8096, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9806, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8267, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8283, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7769, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.7939, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8015, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8694, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8399, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.7826, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8359, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.8184, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8445, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.818, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.827, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8569, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8015, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9009, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8092, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8287, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.8932, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8399, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8044, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7954, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8432, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8004, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8279, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8352, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8495, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.8961, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.8712, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7869, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.785, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.8928, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.2573132258803712e+17, + "train_loss": 0.8656197345940171, + "train_runtime": 499.8196, + "train_samples_per_second": 10.004, + "train_steps_per_second": 0.314 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.2573132258803712e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..49e03824a86b480ad4ae47152bf6355ade74a53e --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "k_proj", + "v_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4d4a90cbcc5155faf01b14dfaacc35e3a5d98b9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28080a32fea5e44ca46981f1ff9541409cf063ac8c21d7bdb0d91807ab10387 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..751f5f12a8e73c5c1d2d6c6d2d4906a915e7f887 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.582, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.4762, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.2722, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1457, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0617, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 0.9762, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.1296, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1525, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8448, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.9041, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.8884, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.8932, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8924, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9506, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9329, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.8974, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7996, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.902, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9056, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8974, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9261, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8359, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.9149, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8821, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8154, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9703, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8205, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.8303, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8678, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.8052, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8215, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.8439, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.958, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.8831, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9266, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9327, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.85, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9246, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8671, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8873, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8799, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.797, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.88, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8599, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.982, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.8132, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8727, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8634, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9443, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8371, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8695, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8743, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8674, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8724, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8807, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.942, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7153, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9083, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.822, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8361, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8279, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8301, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9272, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8164, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9024, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.814, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8483, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8448, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8114, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8019, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8509, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.8345, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8734, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8157, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7855, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7622, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8297, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8175, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.836, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.866, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8942, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8973, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8689, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.8394, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7526, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.891, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9456, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8575, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7546, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.9228, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8837, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8862, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8443, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.785, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8761, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8542, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8801, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8387, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8611, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.8584, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8523, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8533, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.8992, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7299, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8909, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9109, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8207, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7347, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8015, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8944, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8008, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8183, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8747, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8817, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.7818, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.934, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7555, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8548, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.9049, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.816, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7943, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8358, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8023, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8008, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9836, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8322, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8383, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7623, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.8002, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8054, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8636, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8431, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8483, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8435, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.816, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8453, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8174, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8234, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8683, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.8076, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.918, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8111, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.833, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.9031, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8492, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8195, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7933, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8399, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8038, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8148, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8414, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.8584, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9013, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.8788, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7969, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7693, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9059, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.8496968613009818e+17, + "train_loss": 0.872945710352272, + "train_runtime": 619.0515, + "train_samples_per_second": 8.077, + "train_steps_per_second": 0.254 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8496968613009818e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53e4844278d1dba3d71124323a27c34cac43a9f0 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "gate_proj", + "v_proj", + "k_proj", + "down_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09caf2431015d1d288d1dd4f207e39532c5918a1 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b3246c0d08a12aa841a20e704c5cf64a020b727192267c79ba07658f958eba +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7eb503c6d3ad75f6fbf6dc15cc5cdb0408f92a58 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_new_lora/trainer_state.json @@ -0,0 +1,972 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.5331, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 1.5012, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 1.2102, + "step": 3 + }, + { + "epoch": 0.03, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.0834, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 1.0008, + "step": 5 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997864167879313e-05, + "loss": 0.9587, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 1.999145758387301e-05, + "loss": 1.0589, + "step": 7 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980782984658682e-05, + "loss": 1.1424, + "step": 8 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.8429, + "step": 9 + }, + { + "epoch": 0.06, + "learning_rate": 1.994664980113243e-05, + "loss": 0.8993, + "step": 10 + }, + { + "epoch": 0.07, + "learning_rate": 1.992320579737045e-05, + "loss": 0.8882, + "step": 11 + }, + { + "epoch": 0.08, + "learning_rate": 1.9895522933272028e-05, + "loss": 0.8607, + "step": 12 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.8812, + "step": 13 + }, + { + "epoch": 0.09, + "learning_rate": 1.9827489730473597e-05, + "loss": 0.9477, + "step": 14 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.9129, + "step": 15 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742666426322877e-05, + "loss": 0.8922, + "step": 16 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.7841, + "step": 17 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641197940012136e-05, + "loss": 0.8855, + "step": 18 + }, + { + "epoch": 0.12, + "learning_rate": 1.958427482458253e-05, + "loss": 0.8922, + "step": 19 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523257628748148e-05, + "loss": 0.8822, + "step": 20 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9118, + "step": 21 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389046991574298e-05, + "loss": 0.8421, + "step": 22 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.8836, + "step": 23 + }, + { + "epoch": 0.15, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.8783, + "step": 24 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.8056, + "step": 25 + }, + { + "epoch": 0.17, + "learning_rate": 1.9072759331815602e-05, + "loss": 0.9611, + "step": 26 + }, + { + "epoch": 0.17, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8094, + "step": 27 + }, + { + "epoch": 0.18, + "learning_rate": 1.8891222681391853e-05, + "loss": 0.831, + "step": 28 + }, + { + "epoch": 0.18, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8634, + "step": 29 + }, + { + "epoch": 0.19, + "learning_rate": 1.869449552616367e-05, + "loss": 0.7989, + "step": 30 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8231, + "step": 31 + }, + { + "epoch": 0.2, + "learning_rate": 1.8482913971175737e-05, + "loss": 0.839, + "step": 32 + }, + { + "epoch": 0.21, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9476, + "step": 33 + }, + { + "epoch": 0.22, + "learning_rate": 1.825683949996556e-05, + "loss": 0.8864, + "step": 34 + }, + { + "epoch": 0.22, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9188, + "step": 35 + }, + { + "epoch": 0.23, + "learning_rate": 1.8016658356974885e-05, + "loss": 0.9177, + "step": 36 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.8227, + "step": 37 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762780887657576e-05, + "loss": 0.9141, + "step": 38 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.8691, + "step": 39 + }, + { + "epoch": 0.25, + "learning_rate": 1.7495640837411265e-05, + "loss": 0.8902, + "step": 40 + }, + { + "epoch": 0.26, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8746, + "step": 41 + }, + { + "epoch": 0.27, + "learning_rate": 1.7215694610530624e-05, + "loss": 0.8065, + "step": 42 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8744, + "step": 43 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923420490448298e-05, + "loss": 0.8461, + "step": 44 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.9809, + "step": 45 + }, + { + "epoch": 0.29, + "learning_rate": 1.6619317822595666e-05, + "loss": 0.7988, + "step": 46 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8504, + "step": 47 + }, + { + "epoch": 0.31, + "learning_rate": 1.6303906161279554e-05, + "loss": 0.8619, + "step": 48 + }, + { + "epoch": 0.31, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.9342, + "step": 49 + }, + { + "epoch": 0.32, + "learning_rate": 1.597772438203241e-05, + "loss": 0.8289, + "step": 50 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8628, + "step": 51 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641329760952514e-05, + "loss": 0.8913, + "step": 52 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8673, + "step": 53 + }, + { + "epoch": 0.34, + "learning_rate": 1.529529702260709e-05, + "loss": 0.8728, + "step": 54 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.8775, + "step": 55 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940217358125042e-05, + "loss": 0.9451, + "step": 56 + }, + { + "epoch": 0.36, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.7154, + "step": 57 + }, + { + "epoch": 0.37, + "learning_rate": 1.4576697415156818e-05, + "loss": 0.9026, + "step": 58 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8151, + "step": 59 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205358261427076e-05, + "loss": 0.8292, + "step": 60 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.8277, + "step": 61 + }, + { + "epoch": 0.39, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.8266, + "step": 62 + }, + { + "epoch": 0.4, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.9344, + "step": 63 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441772303626387e-05, + "loss": 0.8175, + "step": 64 + }, + { + "epoch": 0.41, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.9036, + "step": 65 + }, + { + "epoch": 0.42, + "learning_rate": 1.305083007335549e-05, + "loss": 0.8172, + "step": 66 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8487, + "step": 67 + }, + { + "epoch": 0.43, + "learning_rate": 1.2654675551080724e-05, + "loss": 0.8453, + "step": 68 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8081, + "step": 69 + }, + { + "epoch": 0.45, + "learning_rate": 1.2253985560158064e-05, + "loss": 0.8017, + "step": 70 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8601, + "step": 71 + }, + { + "epoch": 0.46, + "learning_rate": 1.1849444672715587e-05, + "loss": 0.826, + "step": 72 + }, + { + "epoch": 0.46, + "learning_rate": 1.164594590280734e-05, + "loss": 0.8704, + "step": 73 + }, + { + "epoch": 0.47, + "learning_rate": 1.1441744040073469e-05, + "loss": 0.8132, + "step": 74 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7772, + "step": 75 + }, + { + "epoch": 0.48, + "learning_rate": 1.103158021192357e-05, + "loss": 0.7459, + "step": 76 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.831, + "step": 77 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619653946285948e-05, + "loss": 0.8048, + "step": 78 + }, + { + "epoch": 0.5, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.8256, + "step": 79 + }, + { + "epoch": 0.51, + "learning_rate": 1.0206669012275546e-05, + "loss": 0.8472, + "step": 80 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.8872, + "step": 81 + }, + { + "epoch": 0.52, + "learning_rate": 9.79333098772446e-06, + "loss": 0.8742, + "step": 82 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8564, + "step": 83 + }, + { + "epoch": 0.54, + "learning_rate": 9.380346053714055e-06, + "loss": 0.827, + "step": 84 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.7523, + "step": 85 + }, + { + "epoch": 0.55, + "learning_rate": 8.968419788076431e-06, + "loss": 0.8738, + "step": 86 + }, + { + "epoch": 0.55, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9318, + "step": 87 + }, + { + "epoch": 0.56, + "learning_rate": 8.558255959926533e-06, + "loss": 0.8688, + "step": 88 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.7561, + "step": 89 + }, + { + "epoch": 0.57, + "learning_rate": 8.150555327284417e-06, + "loss": 0.8839, + "step": 90 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.8769, + "step": 91 + }, + { + "epoch": 0.59, + "learning_rate": 7.746014439841941e-06, + "loss": 0.8745, + "step": 92 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8429, + "step": 93 + }, + { + "epoch": 0.6, + "learning_rate": 7.34532444891928e-06, + "loss": 0.7778, + "step": 94 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8691, + "step": 95 + }, + { + "epoch": 0.61, + "learning_rate": 6.949169926644513e-06, + "loss": 0.8458, + "step": 96 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8685, + "step": 97 + }, + { + "epoch": 0.62, + "learning_rate": 6.558227696373617e-06, + "loss": 0.8349, + "step": 98 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8573, + "step": 99 + }, + { + "epoch": 0.64, + "learning_rate": 6.173165676349103e-06, + "loss": 0.87, + "step": 100 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8501, + "step": 101 + }, + { + "epoch": 0.65, + "learning_rate": 5.794641738572925e-06, + "loss": 0.8513, + "step": 102 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.8929, + "step": 103 + }, + { + "epoch": 0.66, + "learning_rate": 5.423302584843186e-06, + "loss": 0.7187, + "step": 104 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.8903, + "step": 105 + }, + { + "epoch": 0.68, + "learning_rate": 5.059782641874962e-06, + "loss": 0.9163, + "step": 106 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8222, + "step": 107 + }, + { + "epoch": 0.69, + "learning_rate": 4.704702977392914e-06, + "loss": 0.7322, + "step": 108 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775734e-06, + "loss": 0.8172, + "step": 109 + }, + { + "epoch": 0.7, + "learning_rate": 4.35867023904749e-06, + "loss": 0.8929, + "step": 110 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8091, + "step": 111 + }, + { + "epoch": 0.71, + "learning_rate": 4.0222756179675915e-06, + "loss": 0.8201, + "step": 112 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8233, + "step": 113 + }, + { + "epoch": 0.73, + "learning_rate": 3.69609383872045e-06, + "loss": 0.8828, + "step": 114 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.785, + "step": 115 + }, + { + "epoch": 0.74, + "learning_rate": 3.380682177404335e-06, + "loss": 0.9317, + "step": 116 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.7481, + "step": 117 + }, + { + "epoch": 0.75, + "learning_rate": 3.0765795095517026e-06, + "loss": 0.8425, + "step": 118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.912, + "step": 119 + }, + { + "epoch": 0.76, + "learning_rate": 2.7843053894693805e-06, + "loss": 0.8021, + "step": 120 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.7954, + "step": 121 + }, + { + "epoch": 0.78, + "learning_rate": 2.504359162588741e-06, + "loss": 0.8338, + "step": 122 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8, + "step": 123 + }, + { + "epoch": 0.79, + "learning_rate": 2.237219112342426e-06, + "loss": 0.8095, + "step": 124 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9827, + "step": 125 + }, + { + "epoch": 0.8, + "learning_rate": 1.983341643025117e-06, + "loss": 0.8282, + "step": 126 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8293, + "step": 127 + }, + { + "epoch": 0.82, + "learning_rate": 1.743160500034443e-06, + "loss": 0.7778, + "step": 128 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.7949, + "step": 129 + }, + { + "epoch": 0.83, + "learning_rate": 1.5170860288242638e-06, + "loss": 0.8007, + "step": 130 + }, + { + "epoch": 0.83, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8687, + "step": 131 + }, + { + "epoch": 0.84, + "learning_rate": 1.305504473836331e-06, + "loss": 0.8397, + "step": 132 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.7837, + "step": 133 + }, + { + "epoch": 0.85, + "learning_rate": 1.1087773186081474e-06, + "loss": 0.8364, + "step": 134 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.8193, + "step": 135 + }, + { + "epoch": 0.87, + "learning_rate": 9.272406681844015e-07, + "loss": 0.8452, + "step": 136 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8182, + "step": 137 + }, + { + "epoch": 0.88, + "learning_rate": 7.612046748871327e-07, + "loss": 0.8267, + "step": 138 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8566, + "step": 139 + }, + { + "epoch": 0.89, + "learning_rate": 6.109530084257043e-07, + "loss": 0.803, + "step": 140 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.9015, + "step": 141 + }, + { + "epoch": 0.9, + "learning_rate": 4.7674237125185597e-07, + "loss": 0.8099, + "step": 142 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8296, + "step": 143 + }, + { + "epoch": 0.92, + "learning_rate": 3.588020599878639e-07, + "loss": 0.893, + "step": 144 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.84, + "step": 145 + }, + { + "epoch": 0.93, + "learning_rate": 2.573335736771254e-07, + "loss": 0.8048, + "step": 146 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.7947, + "step": 147 + }, + { + "epoch": 0.94, + "learning_rate": 1.7251026952640583e-07, + "loss": 0.8422, + "step": 148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.799, + "step": 149 + }, + { + "epoch": 0.96, + "learning_rate": 1.0447706672797264e-07, + "loss": 0.8287, + "step": 150 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.835, + "step": 151 + }, + { + "epoch": 0.97, + "learning_rate": 5.3350198867574424e-08, + "loss": 0.85, + "step": 152 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.8955, + "step": 153 + }, + { + "epoch": 0.98, + "learning_rate": 1.9217015341318478e-08, + "loss": 0.8712, + "step": 154 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.7874, + "step": 155 + }, + { + "epoch": 0.99, + "learning_rate": 2.1358321206899067e-09, + "loss": 0.7844, + "step": 156 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.8946, + "step": 157 + }, + { + "epoch": 1.0, + "step": 157, + "total_flos": 1.2573132258803712e+17, + "train_loss": 0.8657361534750385, + "train_runtime": 500.0159, + "train_samples_per_second": 10.0, + "train_steps_per_second": 0.314 + } + ], + "logging_steps": 1.0, + "max_steps": 157, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.2573132258803712e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b99d6505870d43825706ccaa4cb3d689df6e0cf --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "gate_proj", + "k_proj", + "q_proj", + "o_proj", + "up_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3dc6a6e09c0a3aca6dbeedd5cc7734283e4f54e0 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9ab2281c4ce0312c95825aa463d3ce1b0dd3a70810f5e566162794ff9ad56a +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..be67fed1ff897ab3694b171ee516b8a34771c027 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora/trainer_state.json @@ -0,0 +1,504 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 79, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 6.666666666666667e-06, + "loss": 1.5851, + "step": 1 + }, + { + "epoch": 0.03, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.3353, + "step": 2 + }, + { + "epoch": 0.04, + "learning_rate": 2e-05, + "loss": 1.2305, + "step": 3 + }, + { + "epoch": 0.05, + "learning_rate": 1.999145758387301e-05, + "loss": 1.2723, + "step": 4 + }, + { + "epoch": 0.06, + "learning_rate": 1.99658449300667e-05, + "loss": 0.9605, + "step": 5 + }, + { + "epoch": 0.08, + "learning_rate": 1.992320579737045e-05, + "loss": 0.9758, + "step": 6 + }, + { + "epoch": 0.09, + "learning_rate": 1.9863613034027224e-05, + "loss": 1.007, + "step": 7 + }, + { + "epoch": 0.1, + "learning_rate": 1.9787168453273546e-05, + "loss": 1.0115, + "step": 8 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.8976, + "step": 9 + }, + { + "epoch": 0.13, + "learning_rate": 1.958427482458253e-05, + "loss": 0.9474, + "step": 10 + }, + { + "epoch": 0.14, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.9404, + "step": 11 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.9348, + "step": 12 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.9208, + "step": 13 + }, + { + "epoch": 0.18, + "learning_rate": 1.898390981891979e-05, + "loss": 0.8665, + "step": 14 + }, + { + "epoch": 0.19, + "learning_rate": 1.879473751206489e-05, + "loss": 0.8721, + "step": 15 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.8685, + "step": 16 + }, + { + "epoch": 0.22, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.9503, + "step": 17 + }, + { + "epoch": 0.23, + "learning_rate": 1.813848717270195e-05, + "loss": 0.9587, + "step": 18 + }, + { + "epoch": 0.24, + "learning_rate": 1.789140509396394e-05, + "loss": 0.9115, + "step": 19 + }, + { + "epoch": 0.25, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.9185, + "step": 20 + }, + { + "epoch": 0.27, + "learning_rate": 1.735723910673132e-05, + "loss": 0.8887, + "step": 21 + }, + { + "epoch": 0.28, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.8839, + "step": 22 + }, + { + "epoch": 0.29, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.8986, + "step": 23 + }, + { + "epoch": 0.3, + "learning_rate": 1.646299237860941e-05, + "loss": 0.8795, + "step": 24 + }, + { + "epoch": 0.32, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.908, + "step": 25 + }, + { + "epoch": 0.33, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.8929, + "step": 26 + }, + { + "epoch": 0.34, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.8925, + "step": 27 + }, + { + "epoch": 0.35, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.9317, + "step": 28 + }, + { + "epoch": 0.37, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.8203, + "step": 29 + }, + { + "epoch": 0.38, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.8427, + "step": 30 + }, + { + "epoch": 0.39, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.846, + "step": 31 + }, + { + "epoch": 0.41, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.906, + "step": 32 + }, + { + "epoch": 0.42, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.8835, + "step": 33 + }, + { + "epoch": 0.43, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.8615, + "step": 34 + }, + { + "epoch": 0.44, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.8173, + "step": 35 + }, + { + "epoch": 0.46, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.8634, + "step": 36 + }, + { + "epoch": 0.47, + "learning_rate": 1.164594590280734e-05, + "loss": 0.868, + "step": 37 + }, + { + "epoch": 0.48, + "learning_rate": 1.123692631269348e-05, + "loss": 0.7898, + "step": 38 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.8329, + "step": 39 + }, + { + "epoch": 0.51, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.85, + "step": 40 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.9066, + "step": 41 + }, + { + "epoch": 0.53, + "learning_rate": 9.586750257511868e-06, + "loss": 0.8712, + "step": 42 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276678e-06, + "loss": 0.8278, + "step": 43 + }, + { + "epoch": 0.56, + "learning_rate": 8.763073687306523e-06, + "loss": 0.9164, + "step": 44 + }, + { + "epoch": 0.57, + "learning_rate": 8.35405409719266e-06, + "loss": 0.8412, + "step": 45 + }, + { + "epoch": 0.58, + "learning_rate": 7.947846578043658e-06, + "loss": 0.9025, + "step": 46 + }, + { + "epoch": 0.59, + "learning_rate": 7.545145128592009e-06, + "loss": 0.8027, + "step": 47 + }, + { + "epoch": 0.61, + "learning_rate": 7.14663775750895e-06, + "loss": 0.8706, + "step": 48 + }, + { + "epoch": 0.62, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.8705, + "step": 49 + }, + { + "epoch": 0.63, + "learning_rate": 6.364920294361701e-06, + "loss": 0.8763, + "step": 50 + }, + { + "epoch": 0.65, + "learning_rate": 5.983045753470308e-06, + "loss": 0.8691, + "step": 51 + }, + { + "epoch": 0.66, + "learning_rate": 5.608034111526298e-06, + "loss": 0.8161, + "step": 52 + }, + { + "epoch": 0.67, + "learning_rate": 5.240526069629265e-06, + "loss": 0.9206, + "step": 53 + }, + { + "epoch": 0.68, + "learning_rate": 4.881149509103993e-06, + "loss": 0.8078, + "step": 54 + }, + { + "epoch": 0.7, + "learning_rate": 4.530518418775734e-06, + "loss": 0.9015, + "step": 55 + }, + { + "epoch": 0.71, + "learning_rate": 4.189231845980618e-06, + "loss": 0.8266, + "step": 56 + }, + { + "epoch": 0.72, + "learning_rate": 3.857872873103322e-06, + "loss": 0.8778, + "step": 57 + }, + { + "epoch": 0.73, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.8823, + "step": 58 + }, + { + "epoch": 0.75, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.8276, + "step": 59 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.8693, + "step": 60 + }, + { + "epoch": 0.77, + "learning_rate": 2.642760893268684e-06, + "loss": 0.8308, + "step": 61 + }, + { + "epoch": 0.78, + "learning_rate": 2.369159318001937e-06, + "loss": 0.8141, + "step": 62 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.9208, + "step": 63 + }, + { + "epoch": 0.81, + "learning_rate": 1.861512827298051e-06, + "loss": 0.8182, + "step": 64 + }, + { + "epoch": 0.82, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.8208, + "step": 65 + }, + { + "epoch": 0.84, + "learning_rate": 1.409460456301147e-06, + "loss": 0.8672, + "step": 66 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.8366, + "step": 67 + }, + { + "epoch": 0.86, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.8524, + "step": 68 + }, + { + "epoch": 0.87, + "learning_rate": 8.42266733449425e-07, + "loss": 0.8395, + "step": 69 + }, + { + "epoch": 0.89, + "learning_rate": 6.840891194872112e-07, + "loss": 0.8404, + "step": 70 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-07, + "loss": 0.8686, + "step": 71 + }, + { + "epoch": 0.91, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.8737, + "step": 72 + }, + { + "epoch": 0.92, + "learning_rate": 3.059973406066963e-07, + "loss": 0.8491, + "step": 73 + }, + { + "epoch": 0.94, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.8501, + "step": 74 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.8274, + "step": 75 + }, + { + "epoch": 0.96, + "learning_rate": 7.679420262954984e-08, + "loss": 0.8572, + "step": 76 + }, + { + "epoch": 0.97, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.9106, + "step": 77 + }, + { + "epoch": 0.99, + "learning_rate": 8.542416126989805e-09, + "loss": 0.8088, + "step": 78 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9058, + "step": 79 + }, + { + "epoch": 1.0, + "step": 79, + "total_flos": 1.8784961531425587e+17, + "train_loss": 0.9012524025349677, + "train_runtime": 343.1196, + "train_samples_per_second": 14.572, + "train_steps_per_second": 0.23 + } + ], + "logging_steps": 1.0, + "max_steps": 79, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.8784961531425587e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..282ac58da38dda999667d630f947ae2edbadd98a --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "gate_proj", + "q_proj", + "up_proj", + "k_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73f03d702291a5d3c70aaa732fe5281183144bc3 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:474e01184979bd13a37990ef127f948598313784635ec5fd69111cfce9f1cab2 +size 708923528 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1d2f6b476a47b32d36014815034f8601a3e9e90 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b861161bde6305994dd618296afef4ae30d3250 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-judge_v2_lora_2ep/trainer_state.json @@ -0,0 +1,978 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 158, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 1.5851, + "step": 1 + }, + { + "epoch": 0.03, + "learning_rate": 8.000000000000001e-06, + "loss": 1.3353, + "step": 2 + }, + { + "epoch": 0.04, + "learning_rate": 1.2e-05, + "loss": 1.3272, + "step": 3 + }, + { + "epoch": 0.05, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.3272, + "step": 4 + }, + { + "epoch": 0.06, + "learning_rate": 2e-05, + "loss": 1.0135, + "step": 5 + }, + { + "epoch": 0.08, + "learning_rate": 1.9997891995035914e-05, + "loss": 1.0067, + "step": 6 + }, + { + "epoch": 0.09, + "learning_rate": 1.999156886888064e-05, + "loss": 1.0279, + "step": 7 + }, + { + "epoch": 0.1, + "learning_rate": 1.9981033287370443e-05, + "loss": 1.0302, + "step": 8 + }, + { + "epoch": 0.11, + "learning_rate": 1.9966289692316944e-05, + "loss": 0.9117, + "step": 9 + }, + { + "epoch": 0.13, + "learning_rate": 1.9947344299634464e-05, + "loss": 0.9609, + "step": 10 + }, + { + "epoch": 0.14, + "learning_rate": 1.992420509671936e-05, + "loss": 0.9546, + "step": 11 + }, + { + "epoch": 0.15, + "learning_rate": 1.9896881839082554e-05, + "loss": 0.9437, + "step": 12 + }, + { + "epoch": 0.16, + "learning_rate": 1.9865386046236597e-05, + "loss": 0.9301, + "step": 13 + }, + { + "epoch": 0.18, + "learning_rate": 1.982973099683902e-05, + "loss": 0.8737, + "step": 14 + }, + { + "epoch": 0.19, + "learning_rate": 1.9789931723094046e-05, + "loss": 0.8778, + "step": 15 + }, + { + "epoch": 0.2, + "learning_rate": 1.9746005004415004e-05, + "loss": 0.8737, + "step": 16 + }, + { + "epoch": 0.22, + "learning_rate": 1.9697969360350098e-05, + "loss": 0.9548, + "step": 17 + }, + { + "epoch": 0.23, + "learning_rate": 1.9645845042774555e-05, + "loss": 0.9625, + "step": 18 + }, + { + "epoch": 0.24, + "learning_rate": 1.9589654027352412e-05, + "loss": 0.9144, + "step": 19 + }, + { + "epoch": 0.25, + "learning_rate": 1.9529420004271568e-05, + "loss": 0.9209, + "step": 20 + }, + { + "epoch": 0.27, + "learning_rate": 1.9465168368255946e-05, + "loss": 0.8918, + "step": 21 + }, + { + "epoch": 0.28, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.8867, + "step": 22 + }, + { + "epoch": 0.29, + "learning_rate": 1.932472229404356e-05, + "loss": 0.8998, + "step": 23 + }, + { + "epoch": 0.3, + "learning_rate": 1.924858706805112e-05, + "loss": 0.8811, + "step": 24 + }, + { + "epoch": 0.32, + "learning_rate": 1.9168552628568632e-05, + "loss": 0.9089, + "step": 25 + }, + { + "epoch": 0.33, + "learning_rate": 1.9084652718195237e-05, + "loss": 0.8937, + "step": 26 + }, + { + "epoch": 0.34, + "learning_rate": 1.8996922709216456e-05, + "loss": 0.8929, + "step": 27 + }, + { + "epoch": 0.35, + "learning_rate": 1.8905399588691165e-05, + "loss": 0.9323, + "step": 28 + }, + { + "epoch": 0.37, + "learning_rate": 1.8810121942857848e-05, + "loss": 0.8206, + "step": 29 + }, + { + "epoch": 0.38, + "learning_rate": 1.8711129940866577e-05, + "loss": 0.8435, + "step": 30 + }, + { + "epoch": 0.39, + "learning_rate": 1.860846531784368e-05, + "loss": 0.8462, + "step": 31 + }, + { + "epoch": 0.41, + "learning_rate": 1.8502171357296144e-05, + "loss": 0.9053, + "step": 32 + }, + { + "epoch": 0.42, + "learning_rate": 1.839229287286327e-05, + "loss": 0.8835, + "step": 33 + }, + { + "epoch": 0.43, + "learning_rate": 1.827887618942318e-05, + "loss": 0.8616, + "step": 34 + }, + { + "epoch": 0.44, + "learning_rate": 1.816196912356222e-05, + "loss": 0.8164, + "step": 35 + }, + { + "epoch": 0.46, + "learning_rate": 1.8041620963415418e-05, + "loss": 0.8625, + "step": 36 + }, + { + "epoch": 0.47, + "learning_rate": 1.7917882447886585e-05, + "loss": 0.8666, + "step": 37 + }, + { + "epoch": 0.48, + "learning_rate": 1.7790805745256703e-05, + "loss": 0.7873, + "step": 38 + }, + { + "epoch": 0.49, + "learning_rate": 1.766044443118978e-05, + "loss": 0.8307, + "step": 39 + }, + { + "epoch": 0.51, + "learning_rate": 1.7526853466145248e-05, + "loss": 0.8486, + "step": 40 + }, + { + "epoch": 0.52, + "learning_rate": 1.7390089172206594e-05, + "loss": 0.9053, + "step": 41 + }, + { + "epoch": 0.53, + "learning_rate": 1.725020920933593e-05, + "loss": 0.8688, + "step": 42 + }, + { + "epoch": 0.54, + "learning_rate": 1.710727255106447e-05, + "loss": 0.8265, + "step": 43 + }, + { + "epoch": 0.56, + "learning_rate": 1.696133945962927e-05, + "loss": 0.9139, + "step": 44 + }, + { + "epoch": 0.57, + "learning_rate": 1.681247146056654e-05, + "loss": 0.8386, + "step": 45 + }, + { + "epoch": 0.58, + "learning_rate": 1.6660731316772503e-05, + "loss": 0.8992, + "step": 46 + }, + { + "epoch": 0.59, + "learning_rate": 1.650618300204242e-05, + "loss": 0.7989, + "step": 47 + }, + { + "epoch": 0.61, + "learning_rate": 1.634889167409923e-05, + "loss": 0.8672, + "step": 48 + }, + { + "epoch": 0.62, + "learning_rate": 1.6188923647122946e-05, + "loss": 0.8677, + "step": 49 + }, + { + "epoch": 0.63, + "learning_rate": 1.6026346363792565e-05, + "loss": 0.8739, + "step": 50 + }, + { + "epoch": 0.65, + "learning_rate": 1.5861228366852148e-05, + "loss": 0.8649, + "step": 51 + }, + { + "epoch": 0.66, + "learning_rate": 1.5693639270213138e-05, + "loss": 0.811, + "step": 52 + }, + { + "epoch": 0.67, + "learning_rate": 1.552364972960506e-05, + "loss": 0.9163, + "step": 53 + }, + { + "epoch": 0.68, + "learning_rate": 1.5351331412787004e-05, + "loss": 0.8034, + "step": 54 + }, + { + "epoch": 0.7, + "learning_rate": 1.5176756969332428e-05, + "loss": 0.8954, + "step": 55 + }, + { + "epoch": 0.71, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.8231, + "step": 56 + }, + { + "epoch": 0.72, + "learning_rate": 1.4821135025703491e-05, + "loss": 0.8731, + "step": 57 + }, + { + "epoch": 0.73, + "learning_rate": 1.4640237456093636e-05, + "loss": 0.8771, + "step": 58 + }, + { + "epoch": 0.75, + "learning_rate": 1.4457383557765385e-05, + "loss": 0.8212, + "step": 59 + }, + { + "epoch": 0.76, + "learning_rate": 1.427265042210381e-05, + "loss": 0.8638, + "step": 60 + }, + { + "epoch": 0.77, + "learning_rate": 1.4086115932782316e-05, + "loss": 0.8267, + "step": 61 + }, + { + "epoch": 0.78, + "learning_rate": 1.3897858732926794e-05, + "loss": 0.8097, + "step": 62 + }, + { + "epoch": 0.8, + "learning_rate": 1.3707958191959609e-05, + "loss": 0.9158, + "step": 63 + }, + { + "epoch": 0.81, + "learning_rate": 1.3516494372137368e-05, + "loss": 0.8106, + "step": 64 + }, + { + "epoch": 0.82, + "learning_rate": 1.3323547994796597e-05, + "loss": 0.8142, + "step": 65 + }, + { + "epoch": 0.84, + "learning_rate": 1.3129200406321545e-05, + "loss": 0.861, + "step": 66 + }, + { + "epoch": 0.85, + "learning_rate": 1.2933533543848462e-05, + "loss": 0.8305, + "step": 67 + }, + { + "epoch": 0.86, + "learning_rate": 1.2736629900720832e-05, + "loss": 0.8461, + "step": 68 + }, + { + "epoch": 0.87, + "learning_rate": 1.2538572491710079e-05, + "loss": 0.8338, + "step": 69 + }, + { + "epoch": 0.89, + "learning_rate": 1.2339444818016488e-05, + "loss": 0.8337, + "step": 70 + }, + { + "epoch": 0.9, + "learning_rate": 1.2139330832064975e-05, + "loss": 0.8594, + "step": 71 + }, + { + "epoch": 0.91, + "learning_rate": 1.1938314902110701e-05, + "loss": 0.8668, + "step": 72 + }, + { + "epoch": 0.92, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.8392, + "step": 73 + }, + { + "epoch": 0.94, + "learning_rate": 1.1533916548786856e-05, + "loss": 0.8399, + "step": 74 + }, + { + "epoch": 0.95, + "learning_rate": 1.133070462016454e-05, + "loss": 0.8159, + "step": 75 + }, + { + "epoch": 0.96, + "learning_rate": 1.1126931665153213e-05, + "loss": 0.8475, + "step": 76 + }, + { + "epoch": 0.97, + "learning_rate": 1.092268359463302e-05, + "loss": 0.9015, + "step": 77 + }, + { + "epoch": 0.99, + "learning_rate": 1.0718046519793276e-05, + "loss": 0.7981, + "step": 78 + }, + { + "epoch": 1.0, + "learning_rate": 1.0513106715827897e-05, + "loss": 0.9013, + "step": 79 + }, + { + "epoch": 1.01, + "learning_rate": 1.0307950585561705e-05, + "loss": 0.8196, + "step": 80 + }, + { + "epoch": 1.03, + "learning_rate": 1.01026646230229e-05, + "loss": 0.7468, + "step": 81 + }, + { + "epoch": 1.04, + "learning_rate": 9.897335376977104e-06, + "loss": 0.7359, + "step": 82 + }, + { + "epoch": 1.05, + "learning_rate": 9.692049414438298e-06, + "loss": 0.8088, + "step": 83 + }, + { + "epoch": 1.06, + "learning_rate": 9.486893284172103e-06, + "loss": 0.8199, + "step": 84 + }, + { + "epoch": 1.08, + "learning_rate": 9.281953480206725e-06, + "loss": 0.8313, + "step": 85 + }, + { + "epoch": 1.09, + "learning_rate": 9.07731640536698e-06, + "loss": 0.7696, + "step": 86 + }, + { + "epoch": 1.1, + "learning_rate": 8.87306833484679e-06, + "loss": 0.8031, + "step": 87 + }, + { + "epoch": 1.11, + "learning_rate": 8.669295379835467e-06, + "loss": 0.8056, + "step": 88 + }, + { + "epoch": 1.13, + "learning_rate": 8.466083451213145e-06, + "loss": 0.8029, + "step": 89 + }, + { + "epoch": 1.14, + "learning_rate": 8.263518223330698e-06, + "loss": 0.7648, + "step": 90 + }, + { + "epoch": 1.15, + "learning_rate": 8.0616850978893e-06, + "loss": 0.7962, + "step": 91 + }, + { + "epoch": 1.16, + "learning_rate": 7.860669167935028e-06, + "loss": 0.844, + "step": 92 + }, + { + "epoch": 1.18, + "learning_rate": 7.660555181983517e-06, + "loss": 0.8402, + "step": 93 + }, + { + "epoch": 1.19, + "learning_rate": 7.461427508289922e-06, + "loss": 0.8267, + "step": 94 + }, + { + "epoch": 1.2, + "learning_rate": 7.263370099279173e-06, + "loss": 0.7752, + "step": 95 + }, + { + "epoch": 1.22, + "learning_rate": 7.066466456151541e-06, + "loss": 0.8236, + "step": 96 + }, + { + "epoch": 1.23, + "learning_rate": 6.870799593678459e-06, + "loss": 0.7867, + "step": 97 + }, + { + "epoch": 1.24, + "learning_rate": 6.6764520052034054e-06, + "loss": 0.7933, + "step": 98 + }, + { + "epoch": 1.25, + "learning_rate": 6.483505627862632e-06, + "loss": 0.787, + "step": 99 + }, + { + "epoch": 1.27, + "learning_rate": 6.292041808040393e-06, + "loss": 0.8127, + "step": 100 + }, + { + "epoch": 1.28, + "learning_rate": 6.102141267073207e-06, + "loss": 0.7771, + "step": 101 + }, + { + "epoch": 1.29, + "learning_rate": 5.913884067217686e-06, + "loss": 0.8351, + "step": 102 + }, + { + "epoch": 1.3, + "learning_rate": 5.727349577896194e-06, + "loss": 0.8409, + "step": 103 + }, + { + "epoch": 1.32, + "learning_rate": 5.542616442234618e-06, + "loss": 0.8329, + "step": 104 + }, + { + "epoch": 1.33, + "learning_rate": 5.3597625439063685e-06, + "loss": 0.7829, + "step": 105 + }, + { + "epoch": 1.34, + "learning_rate": 5.178864974296511e-06, + "loss": 0.8322, + "step": 106 + }, + { + "epoch": 1.35, + "learning_rate": 5.000000000000003e-06, + "loss": 0.8195, + "step": 107 + }, + { + "epoch": 1.37, + "learning_rate": 4.823243030667576e-06, + "loss": 0.8085, + "step": 108 + }, + { + "epoch": 1.38, + "learning_rate": 4.648668587212998e-06, + "loss": 0.8225, + "step": 109 + }, + { + "epoch": 1.39, + "learning_rate": 4.476350270394942e-06, + "loss": 0.7969, + "step": 110 + }, + { + "epoch": 1.41, + "learning_rate": 4.306360729786867e-06, + "loss": 0.8235, + "step": 111 + }, + { + "epoch": 1.42, + "learning_rate": 4.138771633147856e-06, + "loss": 0.821, + "step": 112 + }, + { + "epoch": 1.43, + "learning_rate": 3.973653636207437e-06, + "loss": 0.7919, + "step": 113 + }, + { + "epoch": 1.44, + "learning_rate": 3.8110763528770543e-06, + "loss": 0.8244, + "step": 114 + }, + { + "epoch": 1.46, + "learning_rate": 3.651108325900773e-06, + "loss": 0.8309, + "step": 115 + }, + { + "epoch": 1.47, + "learning_rate": 3.493816997957582e-06, + "loss": 0.7764, + "step": 116 + }, + { + "epoch": 1.48, + "learning_rate": 3.339268683227499e-06, + "loss": 0.7887, + "step": 117 + }, + { + "epoch": 1.49, + "learning_rate": 3.1875285394334575e-06, + "loss": 0.8064, + "step": 118 + }, + { + "epoch": 1.51, + "learning_rate": 3.0386605403707347e-06, + "loss": 0.8121, + "step": 119 + }, + { + "epoch": 1.52, + "learning_rate": 2.8927274489355296e-06, + "loss": 0.7425, + "step": 120 + }, + { + "epoch": 1.53, + "learning_rate": 2.749790790664074e-06, + "loss": 0.7482, + "step": 121 + }, + { + "epoch": 1.54, + "learning_rate": 2.6099108277934105e-06, + "loss": 0.7445, + "step": 122 + }, + { + "epoch": 1.56, + "learning_rate": 2.4731465338547556e-06, + "loss": 0.8238, + "step": 123 + }, + { + "epoch": 1.57, + "learning_rate": 2.339555568810221e-06, + "loss": 0.8449, + "step": 124 + }, + { + "epoch": 1.58, + "learning_rate": 2.209194254743295e-06, + "loss": 0.774, + "step": 125 + }, + { + "epoch": 1.59, + "learning_rate": 2.0821175521134208e-06, + "loss": 0.7297, + "step": 126 + }, + { + "epoch": 1.61, + "learning_rate": 1.9583790365845823e-06, + "loss": 0.8108, + "step": 127 + }, + { + "epoch": 1.62, + "learning_rate": 1.8380308764377841e-06, + "loss": 0.7967, + "step": 128 + }, + { + "epoch": 1.63, + "learning_rate": 1.7211238105768213e-06, + "loss": 0.7798, + "step": 129 + }, + { + "epoch": 1.65, + "learning_rate": 1.607707127136734e-06, + "loss": 0.7844, + "step": 130 + }, + { + "epoch": 1.66, + "learning_rate": 1.4978286427038602e-06, + "loss": 0.8666, + "step": 131 + }, + { + "epoch": 1.67, + "learning_rate": 1.3915346821563235e-06, + "loss": 0.7886, + "step": 132 + }, + { + "epoch": 1.68, + "learning_rate": 1.2888700591334225e-06, + "loss": 0.8268, + "step": 133 + }, + { + "epoch": 1.7, + "learning_rate": 1.1898780571421554e-06, + "loss": 0.8099, + "step": 134 + }, + { + "epoch": 1.71, + "learning_rate": 1.0946004113088381e-06, + "loss": 0.8498, + "step": 135 + }, + { + "epoch": 1.72, + "learning_rate": 1.0030772907835484e-06, + "loss": 0.8092, + "step": 136 + }, + { + "epoch": 1.73, + "learning_rate": 9.153472818047627e-07, + "loss": 0.8083, + "step": 137 + }, + { + "epoch": 1.75, + "learning_rate": 8.31447371431372e-07, + "loss": 0.8005, + "step": 138 + }, + { + "epoch": 1.76, + "learning_rate": 7.514129319488839e-07, + "loss": 0.7884, + "step": 139 + }, + { + "epoch": 1.77, + "learning_rate": 6.752777059564431e-07, + "loss": 0.7484, + "step": 140 + }, + { + "epoch": 1.78, + "learning_rate": 6.030737921409169e-07, + "loss": 0.8468, + "step": 141 + }, + { + "epoch": 1.8, + "learning_rate": 5.348316317440549e-07, + "loss": 0.8393, + "step": 142 + }, + { + "epoch": 1.81, + "learning_rate": 4.7057999572843516e-07, + "loss": 0.7851, + "step": 143 + }, + { + "epoch": 1.82, + "learning_rate": 4.103459726475889e-07, + "loss": 0.7866, + "step": 144 + }, + { + "epoch": 1.84, + "learning_rate": 3.541549572254488e-07, + "loss": 0.7847, + "step": 145 + }, + { + "epoch": 1.85, + "learning_rate": 3.020306396499062e-07, + "loss": 0.8206, + "step": 146 + }, + { + "epoch": 1.86, + "learning_rate": 2.539949955849985e-07, + "loss": 0.8332, + "step": 147 + }, + { + "epoch": 1.87, + "learning_rate": 2.1006827690595478e-07, + "loss": 0.7732, + "step": 148 + }, + { + "epoch": 1.89, + "learning_rate": 1.7026900316098217e-07, + "loss": 0.8691, + "step": 149 + }, + { + "epoch": 1.9, + "learning_rate": 1.3461395376340502e-07, + "loss": 0.8971, + "step": 150 + }, + { + "epoch": 1.91, + "learning_rate": 1.0311816091744698e-07, + "loss": 0.8452, + "step": 151 + }, + { + "epoch": 1.92, + "learning_rate": 7.579490328064265e-08, + "loss": 0.7721, + "step": 152 + }, + { + "epoch": 1.94, + "learning_rate": 5.265570036553813e-08, + "loss": 0.7899, + "step": 153 + }, + { + "epoch": 1.95, + "learning_rate": 3.371030768305583e-08, + "loss": 0.7647, + "step": 154 + }, + { + "epoch": 1.96, + "learning_rate": 1.896671262955896e-08, + "loss": 0.8168, + "step": 155 + }, + { + "epoch": 1.97, + "learning_rate": 8.431131119361891e-09, + "loss": 0.8398, + "step": 156 + }, + { + "epoch": 1.99, + "learning_rate": 2.108004964086474e-09, + "loss": 0.7823, + "step": 157 + }, + { + "epoch": 2.0, + "learning_rate": 0.0, + "loss": 0.791, + "step": 158 + }, + { + "epoch": 2.0, + "step": 158, + "total_flos": 3.753425597670359e+17, + "train_loss": 0.8536941778810718, + "train_runtime": 661.0258, + "train_samples_per_second": 15.128, + "train_steps_per_second": 0.239 + } + ], + "logging_steps": 1.0, + "max_steps": 158, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 3.753425597670359e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..de9a8bcfff261602890b73f2863a3dab0dff0cb5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "v_proj", + "o_proj", + "gate_proj", + "down_proj", + "k_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..246f4fbd96b0b3cc754c38d91989876dca0b6d9a --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a819523287a6f04493612b5a36c2f3c964b40488a3ef424a800f6408b33a9e +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..555a190d3655144ecadfb838bd8a86f3002abe5e --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/trainer_state.json @@ -0,0 +1,11006 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9993626513702996, + "eval_steps": 500, + "global_step": 784, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.166666666666666e-09, + "logits/chosen": -3.2766342163085938, + "logits/rejected": -3.1054320335388184, + "logps/chosen": -427.31890869140625, + "logps/rejected": -2512.3076171875, + "loss": 1.1514, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.004450988955795765, + "rewards/margins": 0.19273529946804047, + "rewards/rejected": -0.19718627631664276, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -3.291973114013672, + "logits/rejected": -3.0955986976623535, + "logps/chosen": -503.9743347167969, + "logps/rejected": -2278.18359375, + "loss": 1.2607, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00213623046875, + "rewards/margins": -0.14645996689796448, + "rewards/rejected": 0.14859619736671448, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-08, + "logits/chosen": -3.2029333114624023, + "logits/rejected": -3.0846571922302246, + "logps/chosen": -502.3302917480469, + "logps/rejected": -1518.743408203125, + "loss": 1.2094, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.007611083798110485, + "rewards/margins": -0.083465576171875, + "rewards/rejected": 0.09107665717601776, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -3.1908836364746094, + "logits/rejected": -3.147874355316162, + "logps/chosen": -563.85400390625, + "logps/rejected": -3928.975341796875, + "loss": 1.2686, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02401123195886612, + "rewards/margins": -0.33943483233451843, + "rewards/rejected": 0.36344605684280396, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333335e-08, + "logits/chosen": -3.2365918159484863, + "logits/rejected": -3.1765806674957275, + "logps/chosen": -532.33984375, + "logps/rejected": -1410.562255859375, + "loss": 1.2561, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01850280910730362, + "rewards/margins": -0.07295837253332138, + "rewards/rejected": 0.091461181640625, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-08, + "logits/chosen": -3.2048943042755127, + "logits/rejected": -3.0413317680358887, + "logps/chosen": -522.1971435546875, + "logps/rejected": -2107.736328125, + "loss": 1.187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04217834398150444, + "rewards/margins": 0.22823792695999146, + "rewards/rejected": -0.270416259765625, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.9166666666666666e-08, + "logits/chosen": -3.195402145385742, + "logits/rejected": -3.178725242614746, + "logps/chosen": -572.16455078125, + "logps/rejected": -1906.4061279296875, + "loss": 1.2718, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0009201038628816605, + "rewards/margins": -0.008164975792169571, + "rewards/rejected": 0.0072448719292879105, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -3.246784210205078, + "logits/rejected": -3.180382251739502, + "logps/chosen": -547.1192016601562, + "logps/rejected": -1165.755126953125, + "loss": 1.2707, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0026702880859375, + "rewards/margins": -0.1224822998046875, + "rewards/rejected": 0.11981201171875, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 3.75e-08, + "logits/chosen": -3.2186131477355957, + "logits/rejected": -3.1850380897521973, + "logps/chosen": -539.9624633789062, + "logps/rejected": -1468.87158203125, + "loss": 1.2226, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.05292053148150444, + "rewards/margins": -0.05671081691980362, + "rewards/rejected": 0.10963134467601776, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -3.2312207221984863, + "logits/rejected": -3.1126482486724854, + "logps/chosen": -523.5892944335938, + "logps/rejected": -1325.018310546875, + "loss": 1.2203, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.00302734412252903, + "rewards/margins": 0.02674255333840847, + "rewards/rejected": -0.0297698974609375, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.583333333333333e-08, + "logits/chosen": -3.243213653564453, + "logits/rejected": -3.189818859100342, + "logps/chosen": -471.73016357421875, + "logps/rejected": -1347.054443359375, + "loss": 1.1612, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.05196991190314293, + "rewards/margins": 0.11305390298366547, + "rewards/rejected": -0.06108398735523224, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 5e-08, + "logits/chosen": -3.245100975036621, + "logits/rejected": -3.160212993621826, + "logps/chosen": -505.6475524902344, + "logps/rejected": -1368.30322265625, + "loss": 1.2148, + "rewards/accuracies": 0.5, + "rewards/chosen": 2.4411827325820923e-05, + "rewards/margins": 0.007409665733575821, + "rewards/rejected": -0.00738525390625, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 5.416666666666666e-08, + "logits/chosen": -3.2614994049072266, + "logits/rejected": -3.1965696811676025, + "logps/chosen": -439.6477966308594, + "logps/rejected": -1462.3515625, + "loss": 1.2096, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06709136813879013, + "rewards/margins": 0.07663726806640625, + "rewards/rejected": -0.00954589992761612, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -3.3207192420959473, + "logits/rejected": -3.131727695465088, + "logps/chosen": -495.2718505859375, + "logps/rejected": -1256.8441162109375, + "loss": 1.232, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0028732288628816605, + "rewards/margins": -0.03846893459558487, + "rewards/rejected": 0.03559570387005806, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 6.25e-08, + "logits/chosen": -3.2689874172210693, + "logits/rejected": -3.198465347290039, + "logps/chosen": -520.3651123046875, + "logps/rejected": -900.2677001953125, + "loss": 1.2674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09343567490577698, + "rewards/margins": 0.10238952934741974, + "rewards/rejected": -0.00895385816693306, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -3.2341723442077637, + "logits/rejected": -3.1695642471313477, + "logps/chosen": -544.0496826171875, + "logps/rejected": -1566.3150634765625, + "loss": 1.2146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054931640625, + "rewards/margins": 0.19170531630516052, + "rewards/rejected": -0.19719848036766052, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 7.083333333333334e-08, + "logits/chosen": -3.2417006492614746, + "logits/rejected": -3.138516902923584, + "logps/chosen": -545.6058349609375, + "logps/rejected": -1541.89697265625, + "loss": 1.2054, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05697936937212944, + "rewards/margins": 0.028988651931285858, + "rewards/rejected": -0.085968017578125, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-08, + "logits/chosen": -3.2602686882019043, + "logits/rejected": -2.960326671600342, + "logps/chosen": -488.312744140625, + "logps/rejected": -4750.9677734375, + "loss": 1.0987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04599456861615181, + "rewards/margins": 0.7661484479904175, + "rewards/rejected": -0.72015380859375, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 7.916666666666665e-08, + "logits/chosen": -3.229192018508911, + "logits/rejected": -3.2300195693969727, + "logps/chosen": -528.026123046875, + "logps/rejected": -1008.8605346679688, + "loss": 1.2005, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0062759388238191605, + "rewards/margins": 0.03051910549402237, + "rewards/rejected": -0.02424316480755806, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -3.2015504837036133, + "logits/rejected": -3.1199216842651367, + "logps/chosen": -517.943115234375, + "logps/rejected": -954.9088745117188, + "loss": 1.1814, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04369659349322319, + "rewards/margins": -0.03464202582836151, + "rewards/rejected": -0.00905456393957138, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 8.75e-08, + "logits/chosen": -3.2201361656188965, + "logits/rejected": -3.1562225818634033, + "logps/chosen": -556.8109130859375, + "logps/rejected": -1535.5594482421875, + "loss": 1.2193, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00858764722943306, + "rewards/margins": 0.07724609225988388, + "rewards/rejected": -0.06865844875574112, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -3.2158803939819336, + "logits/rejected": -3.1189799308776855, + "logps/chosen": -449.3207702636719, + "logps/rejected": -1365.832763671875, + "loss": 1.1204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04933013767004013, + "rewards/margins": 0.36752474308013916, + "rewards/rejected": -0.31819459795951843, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 9.583333333333334e-08, + "logits/chosen": -3.2532958984375, + "logits/rejected": -3.1048269271850586, + "logps/chosen": -546.6812744140625, + "logps/rejected": -2274.787353515625, + "loss": 1.0578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011264038272202015, + "rewards/margins": 0.4703766107559204, + "rewards/rejected": -0.48164063692092896, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -3.1998813152313232, + "logits/rejected": -3.1198933124542236, + "logps/chosen": -517.392822265625, + "logps/rejected": -1999.54052734375, + "loss": 1.1051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12201385945081711, + "rewards/margins": 0.4556625485420227, + "rewards/rejected": -0.333648681640625, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 9.999957281897734e-08, + "logits/chosen": -3.258018970489502, + "logits/rejected": -3.201117992401123, + "logps/chosen": -568.55419921875, + "logps/rejected": -1279.1063232421875, + "loss": 1.1528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04779968038201332, + "rewards/margins": 0.21448060870170593, + "rewards/rejected": -0.16668091714382172, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 9.999829128320872e-08, + "logits/chosen": -3.2292840480804443, + "logits/rejected": -3.1350479125976562, + "logps/chosen": -503.2020263671875, + "logps/rejected": -1234.66015625, + "loss": 1.1122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0051147472113370895, + "rewards/margins": 0.23547974228858948, + "rewards/rejected": -0.23036499321460724, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 9.999615541459205e-08, + "logits/chosen": -3.162203073501587, + "logits/rejected": -3.0178093910217285, + "logps/chosen": -476.822509765625, + "logps/rejected": -3110.0849609375, + "loss": 1.095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03331451490521431, + "rewards/margins": 0.5880508422851562, + "rewards/rejected": -0.5547363758087158, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 9.999316524962345e-08, + "logits/chosen": -3.2163825035095215, + "logits/rejected": -3.1541662216186523, + "logps/chosen": -557.4920654296875, + "logps/rejected": -975.6239624023438, + "loss": 1.1278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0028778091073036194, + "rewards/margins": 0.29609376192092896, + "rewards/rejected": -0.29321596026420593, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 9.998932083939655e-08, + "logits/chosen": -3.237154483795166, + "logits/rejected": -3.125666618347168, + "logps/chosen": -554.73876953125, + "logps/rejected": -836.4267578125, + "loss": 1.1057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01369323767721653, + "rewards/margins": 0.21080628037452698, + "rewards/rejected": -0.22449952363967896, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 9.998462224960173e-08, + "logits/chosen": -3.251368999481201, + "logits/rejected": -3.12276291847229, + "logps/chosen": -486.940185546875, + "logps/rejected": -684.8047485351562, + "loss": 1.1064, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02014007791876793, + "rewards/margins": 0.0029235854744911194, + "rewards/rejected": 0.01721649244427681, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 9.997906956052494e-08, + "logits/chosen": -3.2442002296447754, + "logits/rejected": -3.17405366897583, + "logps/chosen": -488.08843994140625, + "logps/rejected": -1151.676025390625, + "loss": 1.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00937194935977459, + "rewards/margins": 0.261849969625473, + "rewards/rejected": -0.2524780333042145, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 9.99726628670463e-08, + "logits/chosen": -3.275197982788086, + "logits/rejected": -3.145320177078247, + "logps/chosen": -463.93914794921875, + "logps/rejected": -772.6134033203125, + "loss": 1.1063, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00474243238568306, + "rewards/margins": 0.1150970458984375, + "rewards/rejected": -0.11035461723804474, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 9.996540227863853e-08, + "logits/chosen": -3.2651426792144775, + "logits/rejected": -3.1506690979003906, + "logps/chosen": -509.3641052246094, + "logps/rejected": -1277.263671875, + "loss": 1.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01659240759909153, + "rewards/margins": 0.26992493867874146, + "rewards/rejected": -0.28651735186576843, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 9.995728791936504e-08, + "logits/chosen": -3.2041547298431396, + "logits/rejected": -3.104224681854248, + "logps/chosen": -531.4658203125, + "logps/rejected": -897.1766967773438, + "loss": 1.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03523864969611168, + "rewards/margins": 0.22691650688648224, + "rewards/rejected": -0.19167785346508026, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 9.994831992787787e-08, + "logits/chosen": -3.2940187454223633, + "logits/rejected": -3.0994794368743896, + "logps/chosen": -536.8515014648438, + "logps/rejected": -901.204345703125, + "loss": 1.0638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038696303963661194, + "rewards/margins": 0.14196014404296875, + "rewards/rejected": -0.14582976698875427, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 9.993849845741523e-08, + "logits/chosen": -3.223297595977783, + "logits/rejected": -3.148317575454712, + "logps/chosen": -515.2670288085938, + "logps/rejected": -922.6947021484375, + "loss": 1.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07696990668773651, + "rewards/margins": 0.4288894534111023, + "rewards/rejected": -0.351919561624527, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 9.992782367579898e-08, + "logits/chosen": -3.233363151550293, + "logits/rejected": -3.084779977798462, + "logps/chosen": -548.6011962890625, + "logps/rejected": -4381.61279296875, + "loss": 0.9962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00107574462890625, + "rewards/margins": 1.0337417125701904, + "rewards/rejected": -1.0348174571990967, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 9.991629576543162e-08, + "logits/chosen": -3.207836151123047, + "logits/rejected": -3.142390251159668, + "logps/chosen": -538.398193359375, + "logps/rejected": -1009.2443237304688, + "loss": 1.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08082428574562073, + "rewards/margins": 0.49198150634765625, + "rewards/rejected": -0.4111572504043579, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 9.99039149232934e-08, + "logits/chosen": -3.241298198699951, + "logits/rejected": -3.148329496383667, + "logps/chosen": -543.5843505859375, + "logps/rejected": -1860.143310546875, + "loss": 0.9738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17465515434741974, + "rewards/margins": 0.8963958621025085, + "rewards/rejected": -0.72174072265625, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 9.989068136093872e-08, + "logits/chosen": -3.2232437133789062, + "logits/rejected": -3.129229784011841, + "logps/chosen": -503.69305419921875, + "logps/rejected": -875.425537109375, + "loss": 0.9629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09298401325941086, + "rewards/margins": 0.4465728998184204, + "rewards/rejected": -0.35358887910842896, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 9.987659530449266e-08, + "logits/chosen": -3.2290329933166504, + "logits/rejected": -3.1248133182525635, + "logps/chosen": -520.7017822265625, + "logps/rejected": -1117.80126953125, + "loss": 1.0557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04483337327837944, + "rewards/margins": 0.3628631830215454, + "rewards/rejected": -0.3180297911167145, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 9.986165699464705e-08, + "logits/chosen": -3.2495765686035156, + "logits/rejected": -3.184037208557129, + "logps/chosen": -533.668212890625, + "logps/rejected": -1121.952392578125, + "loss": 1.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09141998738050461, + "rewards/margins": 0.5632522106170654, + "rewards/rejected": -0.471832275390625, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 9.98458666866564e-08, + "logits/chosen": -3.3196330070495605, + "logits/rejected": -3.1231765747070312, + "logps/chosen": -515.149169921875, + "logps/rejected": -2128.60498046875, + "loss": 0.9235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.152679443359375, + "rewards/margins": 1.006140112876892, + "rewards/rejected": -0.8534606695175171, + "step": 43 + }, + { + "epoch": 0.06, + "learning_rate": 9.982922465033349e-08, + "logits/chosen": -3.241931438446045, + "logits/rejected": -3.1183724403381348, + "logps/chosen": -504.23876953125, + "logps/rejected": -1128.6156005859375, + "loss": 0.8707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.125946044921875, + "rewards/margins": 0.66583251953125, + "rewards/rejected": -0.539886474609375, + "step": 44 + }, + { + "epoch": 0.06, + "learning_rate": 9.981173117004483e-08, + "logits/chosen": -3.230341911315918, + "logits/rejected": -3.1565191745758057, + "logps/chosen": -484.2523193359375, + "logps/rejected": -1952.856201171875, + "loss": 0.8909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13317565619945526, + "rewards/margins": 1.1947113275527954, + "rewards/rejected": -1.0615355968475342, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 9.979338654470568e-08, + "logits/chosen": -3.1632919311523438, + "logits/rejected": -3.0762014389038086, + "logps/chosen": -514.0699462890625, + "logps/rejected": -901.8410034179688, + "loss": 0.9628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0840606689453125, + "rewards/margins": 0.38920289278030396, + "rewards/rejected": -0.30514222383499146, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 9.977419108777513e-08, + "logits/chosen": -3.2656235694885254, + "logits/rejected": -3.1051764488220215, + "logps/chosen": -454.31121826171875, + "logps/rejected": -1320.050048828125, + "loss": 0.8295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03319702297449112, + "rewards/margins": 0.6241546869277954, + "rewards/rejected": -0.5909576416015625, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 9.975414512725057e-08, + "logits/chosen": -3.2473087310791016, + "logits/rejected": -3.192356824874878, + "logps/chosen": -545.134033203125, + "logps/rejected": -1143.503662109375, + "loss": 0.8683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19232177734375, + "rewards/margins": 0.688232421875, + "rewards/rejected": -0.49591064453125, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 9.973324900566212e-08, + "logits/chosen": -3.2477569580078125, + "logits/rejected": -3.1404147148132324, + "logps/chosen": -539.1829223632812, + "logps/rejected": -1689.9359130859375, + "loss": 0.9183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01612548902630806, + "rewards/margins": 0.9266846179962158, + "rewards/rejected": -0.9105590581893921, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 9.971150308006688e-08, + "logits/chosen": -3.197521686553955, + "logits/rejected": -3.090137481689453, + "logps/chosen": -559.40087890625, + "logps/rejected": -1677.717041015625, + "loss": 0.9473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09129638969898224, + "rewards/margins": 0.8863525390625, + "rewards/rejected": -0.795056164264679, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 9.968890772204271e-08, + "logits/chosen": -3.200634002685547, + "logits/rejected": -3.1576597690582275, + "logps/chosen": -565.1937866210938, + "logps/rejected": -1804.5753173828125, + "loss": 0.8842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12246399372816086, + "rewards/margins": 1.1217865943908691, + "rewards/rejected": -0.9993225336074829, + "step": 51 + }, + { + "epoch": 0.07, + "learning_rate": 9.96654633176819e-08, + "logits/chosen": -3.170407772064209, + "logits/rejected": -3.105621099472046, + "logps/chosen": -469.2174072265625, + "logps/rejected": -2144.58447265625, + "loss": 0.8165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11606597900390625, + "rewards/margins": 1.479267954826355, + "rewards/rejected": -1.3632019758224487, + "step": 52 + }, + { + "epoch": 0.07, + "learning_rate": 9.964117026758469e-08, + "logits/chosen": -3.3243017196655273, + "logits/rejected": -3.1362857818603516, + "logps/chosen": -493.94476318359375, + "logps/rejected": -1552.5028076171875, + "loss": 0.8925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09804840385913849, + "rewards/margins": 1.0802383422851562, + "rewards/rejected": -0.9821900129318237, + "step": 53 + }, + { + "epoch": 0.07, + "learning_rate": 9.961602898685224e-08, + "logits/chosen": -3.2284388542175293, + "logits/rejected": -3.1540746688842773, + "logps/chosen": -488.06488037109375, + "logps/rejected": -1221.9254150390625, + "loss": 0.8488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1506607085466385, + "rewards/margins": 0.8397781848907471, + "rewards/rejected": -0.689117431640625, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 9.959003990507971e-08, + "logits/chosen": -3.271824836730957, + "logits/rejected": -3.097425937652588, + "logps/chosen": -578.8687133789062, + "logps/rejected": -1839.059814453125, + "loss": 0.874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1707000732421875, + "rewards/margins": 1.2588592767715454, + "rewards/rejected": -1.088159203529358, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 9.956320346634876e-08, + "logits/chosen": -3.197030544281006, + "logits/rejected": -3.125340461730957, + "logps/chosen": -498.4437255859375, + "logps/rejected": -1918.8218994140625, + "loss": 0.8363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16057434678077698, + "rewards/margins": 1.7557098865509033, + "rewards/rejected": -1.5951354503631592, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 9.953552012922011e-08, + "logits/chosen": -3.2124009132385254, + "logits/rejected": -3.0721042156219482, + "logps/chosen": -564.568359375, + "logps/rejected": -1526.08203125, + "loss": 0.823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1187591552734375, + "rewards/margins": 1.2326996326446533, + "rewards/rejected": -1.1139404773712158, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 9.950699036672558e-08, + "logits/chosen": -3.2879252433776855, + "logits/rejected": -3.1475257873535156, + "logps/chosen": -532.2493896484375, + "logps/rejected": -1221.67578125, + "loss": 0.7486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10104675590991974, + "rewards/margins": 0.8717376589775085, + "rewards/rejected": -0.77069091796875, + "step": 58 + }, + { + "epoch": 0.08, + "learning_rate": 9.947761466636013e-08, + "logits/chosen": -3.2640132904052734, + "logits/rejected": -3.0954408645629883, + "logps/chosen": -540.5562744140625, + "logps/rejected": -2599.3486328125, + "loss": 0.7675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24697265028953552, + "rewards/margins": 1.5741455554962158, + "rewards/rejected": -1.327172875404358, + "step": 59 + }, + { + "epoch": 0.08, + "learning_rate": 9.944739353007342e-08, + "logits/chosen": -3.210343837738037, + "logits/rejected": -3.1668448448181152, + "logps/chosen": -487.68121337890625, + "logps/rejected": -1210.6527099609375, + "loss": 0.7754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17369994521141052, + "rewards/margins": 1.225091576576233, + "rewards/rejected": -1.0513916015625, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 9.941632747426128e-08, + "logits/chosen": -3.313779354095459, + "logits/rejected": -3.1687729358673096, + "logps/chosen": -516.7981567382812, + "logps/rejected": -1505.66064453125, + "loss": 0.8262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26775819063186646, + "rewards/margins": 1.2045929431915283, + "rewards/rejected": -0.9368346929550171, + "step": 61 + }, + { + "epoch": 0.08, + "learning_rate": 9.938441702975688e-08, + "logits/chosen": -3.184061050415039, + "logits/rejected": -3.1634771823883057, + "logps/chosen": -573.785888671875, + "logps/rejected": -1016.2534790039062, + "loss": 0.8786, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1164398193359375, + "rewards/margins": 0.8216217160224915, + "rewards/rejected": -0.705181896686554, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 9.93516627418217e-08, + "logits/chosen": -3.2561635971069336, + "logits/rejected": -3.129401683807373, + "logps/chosen": -534.548828125, + "logps/rejected": -1492.5123291015625, + "loss": 0.8141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.143585205078125, + "rewards/margins": 1.4723267555236816, + "rewards/rejected": -1.3287415504455566, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 9.931806517013611e-08, + "logits/chosen": -3.2475991249084473, + "logits/rejected": -3.201463222503662, + "logps/chosen": -526.1031494140625, + "logps/rejected": -1656.433837890625, + "loss": 0.7184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20230408012866974, + "rewards/margins": 1.6559845209121704, + "rewards/rejected": -1.4536805152893066, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 9.928362488878995e-08, + "logits/chosen": -3.2561120986938477, + "logits/rejected": -3.1462349891662598, + "logps/chosen": -537.96533203125, + "logps/rejected": -1787.9642333984375, + "loss": 0.7866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22292327880859375, + "rewards/margins": 1.6017318964004517, + "rewards/rejected": -1.378808617591858, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 9.924834248627259e-08, + "logits/chosen": -3.3360776901245117, + "logits/rejected": -3.206641674041748, + "logps/chosen": -518.9649047851562, + "logps/rejected": -1078.665771484375, + "loss": 0.8, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14526215195655823, + "rewards/margins": 0.8050644397735596, + "rewards/rejected": -0.659802258014679, + "step": 66 + }, + { + "epoch": 0.09, + "learning_rate": 9.921221856546293e-08, + "logits/chosen": -3.218287944793701, + "logits/rejected": -3.1132097244262695, + "logps/chosen": -514.9090576171875, + "logps/rejected": -606.7734375, + "loss": 0.8534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19938048720359802, + "rewards/margins": 0.6540436148643494, + "rewards/rejected": -0.45466309785842896, + "step": 67 + }, + { + "epoch": 0.09, + "learning_rate": 9.917525374361911e-08, + "logits/chosen": -3.167684555053711, + "logits/rejected": -3.174147129058838, + "logps/chosen": -539.0787353515625, + "logps/rejected": -1905.5736083984375, + "loss": 0.7771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.228668212890625, + "rewards/margins": 1.994165062904358, + "rewards/rejected": -1.7654967308044434, + "step": 68 + }, + { + "epoch": 0.09, + "learning_rate": 9.913744865236797e-08, + "logits/chosen": -3.239103317260742, + "logits/rejected": -3.159733295440674, + "logps/chosen": -531.334228515625, + "logps/rejected": -1315.784912109375, + "loss": 0.7899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23137664794921875, + "rewards/margins": 1.3473495244979858, + "rewards/rejected": -1.115972876548767, + "step": 69 + }, + { + "epoch": 0.09, + "learning_rate": 9.909880393769419e-08, + "logits/chosen": -3.234713077545166, + "logits/rejected": -3.1517152786254883, + "logps/chosen": -492.32489013671875, + "logps/rejected": -1369.736572265625, + "loss": 0.7947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18926849961280823, + "rewards/margins": 1.4180375337600708, + "rewards/rejected": -1.2287689447402954, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 9.905932025992931e-08, + "logits/chosen": -3.251987934112549, + "logits/rejected": -3.1923584938049316, + "logps/chosen": -543.3041381835938, + "logps/rejected": -1224.06201171875, + "loss": 0.7546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28182679414749146, + "rewards/margins": 1.3740081787109375, + "rewards/rejected": -1.0921814441680908, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 9.901899829374047e-08, + "logits/chosen": -3.2513980865478516, + "logits/rejected": -3.1255078315734863, + "logps/chosen": -561.0476684570312, + "logps/rejected": -1456.01416015625, + "loss": 0.75, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24355164170265198, + "rewards/margins": 1.4495391845703125, + "rewards/rejected": -1.205987572669983, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 9.89778387281188e-08, + "logits/chosen": -3.2453818321228027, + "logits/rejected": -3.0306477546691895, + "logps/chosen": -534.4408569335938, + "logps/rejected": -1251.8798828125, + "loss": 0.7987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2856842279434204, + "rewards/margins": 1.3173431158065796, + "rewards/rejected": -1.0316588878631592, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 9.893584226636772e-08, + "logits/chosen": -3.2032642364501953, + "logits/rejected": -3.108574867248535, + "logps/chosen": -528.9642944335938, + "logps/rejected": -834.0701293945312, + "loss": 0.7966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23291930556297302, + "rewards/margins": 0.7901947498321533, + "rewards/rejected": -0.5572754144668579, + "step": 74 + }, + { + "epoch": 0.1, + "learning_rate": 9.889300962609089e-08, + "logits/chosen": -3.2637743949890137, + "logits/rejected": -3.159280300140381, + "logps/chosen": -477.1108093261719, + "logps/rejected": -1246.991455078125, + "loss": 0.7406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2039749175310135, + "rewards/margins": 1.3701614141464233, + "rewards/rejected": -1.1661865711212158, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 9.884934153917997e-08, + "logits/chosen": -3.2583370208740234, + "logits/rejected": -3.1931962966918945, + "logps/chosen": -529.1102294921875, + "logps/rejected": -1360.31884765625, + "loss": 0.8033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.107696533203125, + "rewards/margins": 1.2253053188323975, + "rewards/rejected": -1.117608666419983, + "step": 76 + }, + { + "epoch": 0.1, + "learning_rate": 9.880483875180204e-08, + "logits/chosen": -3.213301658630371, + "logits/rejected": -3.123347759246826, + "logps/chosen": -562.9735107421875, + "logps/rejected": -752.5529174804688, + "loss": 0.8559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21404877305030823, + "rewards/margins": 0.7611541748046875, + "rewards/rejected": -0.5471054315567017, + "step": 77 + }, + { + "epoch": 0.1, + "learning_rate": 9.875950202438699e-08, + "logits/chosen": -3.2090625762939453, + "logits/rejected": -3.169922351837158, + "logps/chosen": -482.85687255859375, + "logps/rejected": -1219.686767578125, + "loss": 0.7122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13569489121437073, + "rewards/margins": 1.503718614578247, + "rewards/rejected": -1.3680236339569092, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 9.871333213161437e-08, + "logits/chosen": -3.2693357467651367, + "logits/rejected": -3.14253830909729, + "logps/chosen": -512.7699584960938, + "logps/rejected": -1211.41259765625, + "loss": 0.7422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.143412783741951, + "rewards/margins": 1.4401473999023438, + "rewards/rejected": -1.2967345714569092, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 9.866632986240029e-08, + "logits/chosen": -3.284149169921875, + "logits/rejected": -3.162376880645752, + "logps/chosen": -505.8550109863281, + "logps/rejected": -2762.1123046875, + "loss": 0.7125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3558700680732727, + "rewards/margins": 3.236192226409912, + "rewards/rejected": -2.880322217941284, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 9.861849601988382e-08, + "logits/chosen": -3.2469825744628906, + "logits/rejected": -3.1379692554473877, + "logps/chosen": -567.5032958984375, + "logps/rejected": -1290.9197998046875, + "loss": 0.7131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26839447021484375, + "rewards/margins": 1.3117234706878662, + "rewards/rejected": -1.043328881263733, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 9.856983142141337e-08, + "logits/chosen": -3.2291436195373535, + "logits/rejected": -3.1858792304992676, + "logps/chosen": -509.65130615234375, + "logps/rejected": -1028.618896484375, + "loss": 0.7108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41200560331344604, + "rewards/margins": 1.2540801763534546, + "rewards/rejected": -0.8420745730400085, + "step": 82 + }, + { + "epoch": 0.11, + "learning_rate": 9.852033689853267e-08, + "logits/chosen": -3.254546642303467, + "logits/rejected": -3.076104164123535, + "logps/chosen": -525.535888671875, + "logps/rejected": -1552.1719970703125, + "loss": 0.7598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.273345947265625, + "rewards/margins": 1.9886047840118408, + "rewards/rejected": -1.7152588367462158, + "step": 83 + }, + { + "epoch": 0.11, + "learning_rate": 9.847001329696651e-08, + "logits/chosen": -3.211690664291382, + "logits/rejected": -3.169180393218994, + "logps/chosen": -510.0760192871094, + "logps/rejected": -1437.7281494140625, + "loss": 0.684, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15918579697608948, + "rewards/margins": 1.719061255455017, + "rewards/rejected": -1.55987548828125, + "step": 84 + }, + { + "epoch": 0.11, + "learning_rate": 9.841886147660644e-08, + "logits/chosen": -3.189258098602295, + "logits/rejected": -3.1754448413848877, + "logps/chosen": -453.84228515625, + "logps/rejected": -1269.519775390625, + "loss": 0.7485, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2644943296909332, + "rewards/margins": 1.6937119960784912, + "rewards/rejected": -1.4292175769805908, + "step": 85 + }, + { + "epoch": 0.11, + "learning_rate": 9.836688231149591e-08, + "logits/chosen": -3.31632399559021, + "logits/rejected": -3.209737777709961, + "logps/chosen": -515.8425903320312, + "logps/rejected": -1334.6734619140625, + "loss": 0.6732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23768004775047302, + "rewards/margins": 1.6465270519256592, + "rewards/rejected": -1.4088470935821533, + "step": 86 + }, + { + "epoch": 0.11, + "learning_rate": 9.831407668981545e-08, + "logits/chosen": -3.255270481109619, + "logits/rejected": -3.099916458129883, + "logps/chosen": -489.0701904296875, + "logps/rejected": -1845.1297607421875, + "loss": 0.6262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19548797607421875, + "rewards/margins": 1.9287490844726562, + "rewards/rejected": -1.7332611083984375, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 9.826044551386743e-08, + "logits/chosen": -3.2785654067993164, + "logits/rejected": -3.173642158508301, + "logps/chosen": -479.2571716308594, + "logps/rejected": -1336.733642578125, + "loss": 0.7062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15018463134765625, + "rewards/margins": 1.7481276988983154, + "rewards/rejected": -1.5979431867599487, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 9.820598970006067e-08, + "logits/chosen": -3.2719197273254395, + "logits/rejected": -3.1743597984313965, + "logps/chosen": -590.902099609375, + "logps/rejected": -1611.8983154296875, + "loss": 0.7977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28930968046188354, + "rewards/margins": 1.8205903768539429, + "rewards/rejected": -1.5312806367874146, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 9.81507101788948e-08, + "logits/chosen": -3.276019811630249, + "logits/rejected": -3.1711909770965576, + "logps/chosen": -495.05780029296875, + "logps/rejected": -1514.2025146484375, + "loss": 0.6719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2386474609375, + "rewards/margins": 1.9669373035430908, + "rewards/rejected": -1.7282898426055908, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 9.80946078949443e-08, + "logits/chosen": -3.284968376159668, + "logits/rejected": -3.082460880279541, + "logps/chosen": -471.40032958984375, + "logps/rejected": -3087.75146484375, + "loss": 0.6812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24281159043312073, + "rewards/margins": 2.963893175125122, + "rewards/rejected": -2.721081495285034, + "step": 91 + }, + { + "epoch": 0.12, + "learning_rate": 9.803768380684241e-08, + "logits/chosen": -3.2578892707824707, + "logits/rejected": -3.2463302612304688, + "logps/chosen": -506.58551025390625, + "logps/rejected": -1068.337890625, + "loss": 0.7448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19263000786304474, + "rewards/margins": 1.4323945045471191, + "rewards/rejected": -1.2397644519805908, + "step": 92 + }, + { + "epoch": 0.12, + "learning_rate": 9.797993888726472e-08, + "logits/chosen": -3.2497243881225586, + "logits/rejected": -3.1813502311706543, + "logps/chosen": -503.58087158203125, + "logps/rejected": -1109.177734375, + "loss": 0.6908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26511383056640625, + "rewards/margins": 1.333461046218872, + "rewards/rejected": -1.0683472156524658, + "step": 93 + }, + { + "epoch": 0.12, + "learning_rate": 9.792137412291263e-08, + "logits/chosen": -3.214191436767578, + "logits/rejected": -3.1619224548339844, + "logps/chosen": -544.7904052734375, + "logps/rejected": -1085.41259765625, + "loss": 0.7512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4149627685546875, + "rewards/margins": 1.84255051612854, + "rewards/rejected": -1.427587866783142, + "step": 94 + }, + { + "epoch": 0.12, + "learning_rate": 9.786199051449635e-08, + "logits/chosen": -3.2009437084198, + "logits/rejected": -3.1670002937316895, + "logps/chosen": -510.2091369628906, + "logps/rejected": -1142.46533203125, + "loss": 0.6677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.341513067483902, + "rewards/margins": 1.6717498302459717, + "rewards/rejected": -1.330236792564392, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 9.780178907671787e-08, + "logits/chosen": -3.2718260288238525, + "logits/rejected": -3.0660133361816406, + "logps/chosen": -532.7755126953125, + "logps/rejected": -1686.5718994140625, + "loss": 0.7464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1830337643623352, + "rewards/margins": 2.013655185699463, + "rewards/rejected": -1.830621361732483, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 9.774077083825372e-08, + "logits/chosen": -3.3284974098205566, + "logits/rejected": -3.197833299636841, + "logps/chosen": -499.1167907714844, + "logps/rejected": -1100.7139892578125, + "loss": 0.7391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.351400762796402, + "rewards/margins": 1.9115813970565796, + "rewards/rejected": -1.5601806640625, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 9.767893684173721e-08, + "logits/chosen": -3.2074179649353027, + "logits/rejected": -3.163076162338257, + "logps/chosen": -524.4642333984375, + "logps/rejected": -1035.56494140625, + "loss": 0.6927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38949280977249146, + "rewards/margins": 1.4763580560684204, + "rewards/rejected": -1.0868653059005737, + "step": 98 + }, + { + "epoch": 0.13, + "learning_rate": 9.761628814374073e-08, + "logits/chosen": -3.2466468811035156, + "logits/rejected": -3.2031795978546143, + "logps/chosen": -532.9094848632812, + "logps/rejected": -4613.869140625, + "loss": 0.6783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19449768960475922, + "rewards/margins": 2.5388641357421875, + "rewards/rejected": -2.3443665504455566, + "step": 99 + }, + { + "epoch": 0.13, + "learning_rate": 9.755282581475768e-08, + "logits/chosen": -3.2496438026428223, + "logits/rejected": -3.1581974029541016, + "logps/chosen": -548.7557983398438, + "logps/rejected": -1221.21728515625, + "loss": 0.6595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4030410647392273, + "rewards/margins": 1.694575548171997, + "rewards/rejected": -1.291534423828125, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 9.748855093918415e-08, + "logits/chosen": -3.2855982780456543, + "logits/rejected": -3.191049098968506, + "logps/chosen": -475.15283203125, + "logps/rejected": -1391.27001953125, + "loss": 0.6347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32851409912109375, + "rewards/margins": 2.0479049682617188, + "rewards/rejected": -1.719390869140625, + "step": 101 + }, + { + "epoch": 0.13, + "learning_rate": 9.742346461530047e-08, + "logits/chosen": -3.244433879852295, + "logits/rejected": -3.1701478958129883, + "logps/chosen": -458.3636474609375, + "logps/rejected": -1375.778076171875, + "loss": 0.7326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3857406675815582, + "rewards/margins": 2.0105209350585938, + "rewards/rejected": -1.6247804164886475, + "step": 102 + }, + { + "epoch": 0.13, + "learning_rate": 9.73575679552523e-08, + "logits/chosen": -3.207352876663208, + "logits/rejected": -3.1007070541381836, + "logps/chosen": -517.4765625, + "logps/rejected": -2045.79345703125, + "loss": 0.6288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4013168215751648, + "rewards/margins": 3.2509689331054688, + "rewards/rejected": -2.849652051925659, + "step": 103 + }, + { + "epoch": 0.13, + "learning_rate": 9.729086208503173e-08, + "logits/chosen": -3.253157615661621, + "logits/rejected": -3.1217761039733887, + "logps/chosen": -439.55975341796875, + "logps/rejected": -2665.8134765625, + "loss": 0.6669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3660949766635895, + "rewards/margins": 4.162152290344238, + "rewards/rejected": -3.7960572242736816, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 9.722334814445807e-08, + "logits/chosen": -3.1925253868103027, + "logits/rejected": -3.130136013031006, + "logps/chosen": -517.8770141601562, + "logps/rejected": -1045.0035400390625, + "loss": 0.684, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3721725344657898, + "rewards/margins": 1.857966661453247, + "rewards/rejected": -1.4857940673828125, + "step": 105 + }, + { + "epoch": 0.14, + "learning_rate": 9.715502728715826e-08, + "logits/chosen": -3.2783830165863037, + "logits/rejected": -3.108290195465088, + "logps/chosen": -496.83929443359375, + "logps/rejected": -1831.3160400390625, + "loss": 0.6462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3381332457065582, + "rewards/margins": 2.467808723449707, + "rewards/rejected": -2.1296753883361816, + "step": 106 + }, + { + "epoch": 0.14, + "learning_rate": 9.708590068054727e-08, + "logits/chosen": -3.2151339054107666, + "logits/rejected": -3.197756767272949, + "logps/chosen": -502.063720703125, + "logps/rejected": -2050.06298828125, + "loss": 0.665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2647643983364105, + "rewards/margins": 2.810995578765869, + "rewards/rejected": -2.546231269836426, + "step": 107 + }, + { + "epoch": 0.14, + "learning_rate": 9.701596950580806e-08, + "logits/chosen": -3.262676477432251, + "logits/rejected": -3.1446518898010254, + "logps/chosen": -511.2315673828125, + "logps/rejected": -1704.8199462890625, + "loss": 0.6585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4599090814590454, + "rewards/margins": 2.773019552230835, + "rewards/rejected": -2.3131103515625, + "step": 108 + }, + { + "epoch": 0.14, + "learning_rate": 9.694523495787148e-08, + "logits/chosen": -3.2930445671081543, + "logits/rejected": -3.144742965698242, + "logps/chosen": -517.044189453125, + "logps/rejected": -1016.3283081054688, + "loss": 0.6255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3581497073173523, + "rewards/margins": 1.543788194656372, + "rewards/rejected": -1.185638427734375, + "step": 109 + }, + { + "epoch": 0.14, + "learning_rate": 9.687369824539577e-08, + "logits/chosen": -3.224435806274414, + "logits/rejected": -3.1317243576049805, + "logps/chosen": -546.4181518554688, + "logps/rejected": -2315.207763671875, + "loss": 0.5975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19571685791015625, + "rewards/margins": 3.396754503250122, + "rewards/rejected": -3.201037645339966, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 9.680136059074597e-08, + "logits/chosen": -3.309121608734131, + "logits/rejected": -3.0697696208953857, + "logps/chosen": -509.47906494140625, + "logps/rejected": -836.6041870117188, + "loss": 0.701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39753419160842896, + "rewards/margins": 1.3158355951309204, + "rewards/rejected": -0.9183014035224915, + "step": 111 + }, + { + "epoch": 0.14, + "learning_rate": 9.672822322997304e-08, + "logits/chosen": -3.2164840698242188, + "logits/rejected": -3.2167859077453613, + "logps/chosen": -490.45941162109375, + "logps/rejected": -1229.0201416015625, + "loss": 0.6319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44518738985061646, + "rewards/margins": 2.285684108734131, + "rewards/rejected": -1.8404967784881592, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 9.665428741279266e-08, + "logits/chosen": -3.250316858291626, + "logits/rejected": -3.1167993545532227, + "logps/chosen": -559.7205810546875, + "logps/rejected": -1235.1279296875, + "loss": 0.6836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5198700428009033, + "rewards/margins": 1.98728346824646, + "rewards/rejected": -1.4674134254455566, + "step": 113 + }, + { + "epoch": 0.15, + "learning_rate": 9.657955440256394e-08, + "logits/chosen": -3.22269606590271, + "logits/rejected": -3.228187084197998, + "logps/chosen": -506.68621826171875, + "logps/rejected": -1092.25048828125, + "loss": 0.7042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5207595825195312, + "rewards/margins": 1.9409806728363037, + "rewards/rejected": -1.420220971107483, + "step": 114 + }, + { + "epoch": 0.15, + "learning_rate": 9.650402547626786e-08, + "logits/chosen": -3.232975482940674, + "logits/rejected": -3.1460494995117188, + "logps/chosen": -618.682373046875, + "logps/rejected": -2913.62451171875, + "loss": 0.689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.53961181640625, + "rewards/margins": 4.046399116516113, + "rewards/rejected": -3.5067873001098633, + "step": 115 + }, + { + "epoch": 0.15, + "learning_rate": 9.642770192448534e-08, + "logits/chosen": -3.2882399559020996, + "logits/rejected": -3.2049880027770996, + "logps/chosen": -537.266357421875, + "logps/rejected": -982.3611450195312, + "loss": 0.6661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2264862060546875, + "rewards/margins": 1.5630645751953125, + "rewards/rejected": -1.336578369140625, + "step": 116 + }, + { + "epoch": 0.15, + "learning_rate": 9.635058505137534e-08, + "logits/chosen": -3.2652182579040527, + "logits/rejected": -3.1867566108703613, + "logps/chosen": -457.189453125, + "logps/rejected": -1047.11865234375, + "loss": 0.6268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.384939581155777, + "rewards/margins": 1.7554688453674316, + "rewards/rejected": -1.3705291748046875, + "step": 117 + }, + { + "epoch": 0.15, + "learning_rate": 9.627267617465242e-08, + "logits/chosen": -3.230767011642456, + "logits/rejected": -3.1315383911132812, + "logps/chosen": -518.771728515625, + "logps/rejected": -1050.8045654296875, + "loss": 0.7073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40052032470703125, + "rewards/margins": 1.7804275751113892, + "rewards/rejected": -1.379907250404358, + "step": 118 + }, + { + "epoch": 0.15, + "learning_rate": 9.619397662556434e-08, + "logits/chosen": -3.2514326572418213, + "logits/rejected": -3.019895076751709, + "logps/chosen": -494.4189453125, + "logps/rejected": -1963.4136962890625, + "loss": 0.5994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4802078306674957, + "rewards/margins": 3.1362686157226562, + "rewards/rejected": -2.6560606956481934, + "step": 119 + }, + { + "epoch": 0.15, + "learning_rate": 9.611448774886923e-08, + "logits/chosen": -3.272172451019287, + "logits/rejected": -3.063150644302368, + "logps/chosen": -565.9896240234375, + "logps/rejected": -1168.3243408203125, + "loss": 0.6556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4406219720840454, + "rewards/margins": 1.9622528553009033, + "rewards/rejected": -1.521630883216858, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 9.603421090281269e-08, + "logits/chosen": -3.256030321121216, + "logits/rejected": -3.1289730072021484, + "logps/chosen": -489.0845947265625, + "logps/rejected": -1823.2919921875, + "loss": 0.6157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33284300565719604, + "rewards/margins": 2.7335329055786133, + "rewards/rejected": -2.4006898403167725, + "step": 121 + }, + { + "epoch": 0.16, + "learning_rate": 9.595314745910454e-08, + "logits/chosen": -3.2637763023376465, + "logits/rejected": -3.1712989807128906, + "logps/chosen": -518.0460205078125, + "logps/rejected": -1029.336181640625, + "loss": 0.6708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.251504510641098, + "rewards/margins": 1.4262787103652954, + "rewards/rejected": -1.174774169921875, + "step": 122 + }, + { + "epoch": 0.16, + "learning_rate": 9.587129880289538e-08, + "logits/chosen": -3.330411911010742, + "logits/rejected": -3.205239772796631, + "logps/chosen": -509.08746337890625, + "logps/rejected": -1960.44384765625, + "loss": 0.6542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24859619140625, + "rewards/margins": 3.4920716285705566, + "rewards/rejected": -3.2434754371643066, + "step": 123 + }, + { + "epoch": 0.16, + "learning_rate": 9.578866633275286e-08, + "logits/chosen": -3.296259880065918, + "logits/rejected": -3.16579008102417, + "logps/chosen": -454.7918701171875, + "logps/rejected": -1106.12109375, + "loss": 0.6862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36508941650390625, + "rewards/margins": 1.765354871749878, + "rewards/rejected": -1.4002654552459717, + "step": 124 + }, + { + "epoch": 0.16, + "learning_rate": 9.570525146063798e-08, + "logits/chosen": -3.2103941440582275, + "logits/rejected": -3.1208107471466064, + "logps/chosen": -507.7249755859375, + "logps/rejected": -2120.0234375, + "loss": 0.6914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3488449156284332, + "rewards/margins": 3.6836440563201904, + "rewards/rejected": -3.33479905128479, + "step": 125 + }, + { + "epoch": 0.16, + "learning_rate": 9.562105561188067e-08, + "logits/chosen": -3.283688545227051, + "logits/rejected": -3.145331859588623, + "logps/chosen": -524.9093017578125, + "logps/rejected": -2658.310546875, + "loss": 0.6212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37607115507125854, + "rewards/margins": 3.9588470458984375, + "rewards/rejected": -3.5827760696411133, + "step": 126 + }, + { + "epoch": 0.16, + "learning_rate": 9.553608022515576e-08, + "logits/chosen": -3.291179656982422, + "logits/rejected": -3.125370502471924, + "logps/chosen": -486.4223937988281, + "logps/rejected": -1420.09912109375, + "loss": 0.6338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33403319120407104, + "rewards/margins": 2.318829298019409, + "rewards/rejected": -1.984796166419983, + "step": 127 + }, + { + "epoch": 0.16, + "learning_rate": 9.545032675245812e-08, + "logits/chosen": -3.249782085418701, + "logits/rejected": -3.0785725116729736, + "logps/chosen": -479.15191650390625, + "logps/rejected": -1400.95947265625, + "loss": 0.6856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3789764642715454, + "rewards/margins": 2.528006076812744, + "rewards/rejected": -2.149029493331909, + "step": 128 + }, + { + "epoch": 0.16, + "learning_rate": 9.536379665907798e-08, + "logits/chosen": -3.234279155731201, + "logits/rejected": -3.228203296661377, + "logps/chosen": -558.6952514648438, + "logps/rejected": -1227.717041015625, + "loss": 0.6866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48677366971969604, + "rewards/margins": 2.163439989089966, + "rewards/rejected": -1.676666259765625, + "step": 129 + }, + { + "epoch": 0.17, + "learning_rate": 9.527649142357594e-08, + "logits/chosen": -3.2159876823425293, + "logits/rejected": -3.150111198425293, + "logps/chosen": -545.93994140625, + "logps/rejected": -1414.515380859375, + "loss": 0.5824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.439483642578125, + "rewards/margins": 2.738128662109375, + "rewards/rejected": -2.29864501953125, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 9.518841253775753e-08, + "logits/chosen": -3.2929844856262207, + "logits/rejected": -3.117673873901367, + "logps/chosen": -484.3270568847656, + "logps/rejected": -1048.810791015625, + "loss": 0.6613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40743255615234375, + "rewards/margins": 1.899897813796997, + "rewards/rejected": -1.4924652576446533, + "step": 131 + }, + { + "epoch": 0.17, + "learning_rate": 9.509956150664795e-08, + "logits/chosen": -3.326503276824951, + "logits/rejected": -3.169628143310547, + "logps/chosen": -502.86346435546875, + "logps/rejected": -466.203369140625, + "loss": 0.7058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4918579161167145, + "rewards/margins": 1.0223724842071533, + "rewards/rejected": -0.5305145382881165, + "step": 132 + }, + { + "epoch": 0.17, + "learning_rate": 9.500993984846612e-08, + "logits/chosen": -3.295715808868408, + "logits/rejected": -3.0734448432922363, + "logps/chosen": -459.01617431640625, + "logps/rejected": -1487.870849609375, + "loss": 0.6101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30418699979782104, + "rewards/margins": 2.464221239089966, + "rewards/rejected": -2.1600341796875, + "step": 133 + }, + { + "epoch": 0.17, + "learning_rate": 9.491954909459894e-08, + "logits/chosen": -3.1522514820098877, + "logits/rejected": -3.167595386505127, + "logps/chosen": -524.8972778320312, + "logps/rejected": -1635.132568359375, + "loss": 0.6264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4769989252090454, + "rewards/margins": 3.0426177978515625, + "rewards/rejected": -2.5656189918518066, + "step": 134 + }, + { + "epoch": 0.17, + "learning_rate": 9.482839078957499e-08, + "logits/chosen": -3.302405834197998, + "logits/rejected": -3.1736183166503906, + "logps/chosen": -514.273681640625, + "logps/rejected": -1251.627685546875, + "loss": 0.6316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49527284502983093, + "rewards/margins": 2.450204372406006, + "rewards/rejected": -1.9549317359924316, + "step": 135 + }, + { + "epoch": 0.17, + "learning_rate": 9.473646649103817e-08, + "logits/chosen": -3.272620677947998, + "logits/rejected": -3.1081600189208984, + "logps/chosen": -477.36767578125, + "logps/rejected": -1859.049072265625, + "loss": 0.6375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47580718994140625, + "rewards/margins": 3.1876237392425537, + "rewards/rejected": -2.7118165493011475, + "step": 136 + }, + { + "epoch": 0.17, + "learning_rate": 9.464377776972114e-08, + "logits/chosen": -3.273292064666748, + "logits/rejected": -3.2090182304382324, + "logps/chosen": -499.63580322265625, + "logps/rejected": -1326.9278564453125, + "loss": 0.5966, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4522995054721832, + "rewards/margins": 2.4864702224731445, + "rewards/rejected": -2.034170627593994, + "step": 137 + }, + { + "epoch": 0.18, + "learning_rate": 9.455032620941839e-08, + "logits/chosen": -3.2650036811828613, + "logits/rejected": -3.110806941986084, + "logps/chosen": -474.84503173828125, + "logps/rejected": -1782.152099609375, + "loss": 0.5888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3407333493232727, + "rewards/margins": 2.7722091674804688, + "rewards/rejected": -2.431475877761841, + "step": 138 + }, + { + "epoch": 0.18, + "learning_rate": 9.445611340695925e-08, + "logits/chosen": -3.2661540508270264, + "logits/rejected": -3.157273769378662, + "logps/chosen": -489.1846008300781, + "logps/rejected": -1362.796142578125, + "loss": 0.6224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35815125703811646, + "rewards/margins": 2.7056427001953125, + "rewards/rejected": -2.347491502761841, + "step": 139 + }, + { + "epoch": 0.18, + "learning_rate": 9.436114097218058e-08, + "logits/chosen": -3.2492971420288086, + "logits/rejected": -3.0980517864227295, + "logps/chosen": -496.28765869140625, + "logps/rejected": -1161.236328125, + "loss": 0.6698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4787963628768921, + "rewards/margins": 2.120166063308716, + "rewards/rejected": -1.6413697004318237, + "step": 140 + }, + { + "epoch": 0.18, + "learning_rate": 9.426541052789925e-08, + "logits/chosen": -3.312145709991455, + "logits/rejected": -3.2439844608306885, + "logps/chosen": -466.27117919921875, + "logps/rejected": -1188.486083984375, + "loss": 0.6066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5373611450195312, + "rewards/margins": 2.610792636871338, + "rewards/rejected": -2.0734314918518066, + "step": 141 + }, + { + "epoch": 0.18, + "learning_rate": 9.416892370988443e-08, + "logits/chosen": -3.2796077728271484, + "logits/rejected": -3.1858201026916504, + "logps/chosen": -500.3385009765625, + "logps/rejected": -805.2297973632812, + "loss": 0.6575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4097000062465668, + "rewards/margins": 1.6410324573516846, + "rewards/rejected": -1.2313324213027954, + "step": 142 + }, + { + "epoch": 0.18, + "learning_rate": 9.40716821668296e-08, + "logits/chosen": -3.3147544860839844, + "logits/rejected": -3.1309280395507812, + "logps/chosen": -513.4326782226562, + "logps/rejected": -1759.967529296875, + "loss": 0.6308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3790481686592102, + "rewards/margins": 2.7343215942382812, + "rewards/rejected": -2.355273485183716, + "step": 143 + }, + { + "epoch": 0.18, + "learning_rate": 9.397368756032444e-08, + "logits/chosen": -3.289907455444336, + "logits/rejected": -3.1519827842712402, + "logps/chosen": -462.04718017578125, + "logps/rejected": -1872.267822265625, + "loss": 0.5678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24956512451171875, + "rewards/margins": 2.9212021827697754, + "rewards/rejected": -2.6716370582580566, + "step": 144 + }, + { + "epoch": 0.18, + "learning_rate": 9.387494156482642e-08, + "logits/chosen": -3.281632423400879, + "logits/rejected": -3.1785335540771484, + "logps/chosen": -530.9482421875, + "logps/rejected": -1373.430419921875, + "loss": 0.5789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35769808292388916, + "rewards/margins": 2.519850254058838, + "rewards/rejected": -2.162152051925659, + "step": 145 + }, + { + "epoch": 0.19, + "learning_rate": 9.377544586763214e-08, + "logits/chosen": -3.252739906311035, + "logits/rejected": -3.1595544815063477, + "logps/chosen": -463.06329345703125, + "logps/rejected": -1258.0992431640625, + "loss": 0.6724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44101107120513916, + "rewards/margins": 2.6268203258514404, + "rewards/rejected": -2.185809373855591, + "step": 146 + }, + { + "epoch": 0.19, + "learning_rate": 9.367520216884854e-08, + "logits/chosen": -3.2743146419525146, + "logits/rejected": -3.1691970825195312, + "logps/chosen": -522.58740234375, + "logps/rejected": -1042.6298828125, + "loss": 0.609, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3005782961845398, + "rewards/margins": 1.8578628301620483, + "rewards/rejected": -1.5572845935821533, + "step": 147 + }, + { + "epoch": 0.19, + "learning_rate": 9.357421218136385e-08, + "logits/chosen": -3.1814451217651367, + "logits/rejected": -3.202343463897705, + "logps/chosen": -529.1980590820312, + "logps/rejected": -1101.92578125, + "loss": 0.6039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.524340808391571, + "rewards/margins": 2.0926027297973633, + "rewards/rejected": -1.568261742591858, + "step": 148 + }, + { + "epoch": 0.19, + "learning_rate": 9.347247763081834e-08, + "logits/chosen": -3.213193416595459, + "logits/rejected": -3.1437149047851562, + "logps/chosen": -550.518798828125, + "logps/rejected": -1382.2828369140625, + "loss": 0.6515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48567813634872437, + "rewards/margins": 2.773812770843506, + "rewards/rejected": -2.288134813308716, + "step": 149 + }, + { + "epoch": 0.19, + "learning_rate": 9.337000025557476e-08, + "logits/chosen": -3.2051548957824707, + "logits/rejected": -3.1761538982391357, + "logps/chosen": -471.2747802734375, + "logps/rejected": -1687.7943115234375, + "loss": 0.572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4379211366176605, + "rewards/margins": 3.3389158248901367, + "rewards/rejected": -2.9009947776794434, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 9.32667818066887e-08, + "logits/chosen": -3.2618367671966553, + "logits/rejected": -3.16750431060791, + "logps/chosen": -511.8447265625, + "logps/rejected": -991.9730224609375, + "loss": 0.6117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44312745332717896, + "rewards/margins": 2.2851624488830566, + "rewards/rejected": -1.8420348167419434, + "step": 151 + }, + { + "epoch": 0.19, + "learning_rate": 9.316282404787869e-08, + "logits/chosen": -3.1908750534057617, + "logits/rejected": -3.0189948081970215, + "logps/chosen": -496.7626647949219, + "logps/rejected": -1336.121826171875, + "loss": 0.5828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.380300909280777, + "rewards/margins": 2.4647858142852783, + "rewards/rejected": -2.084484815597534, + "step": 152 + }, + { + "epoch": 0.2, + "learning_rate": 9.305812875549598e-08, + "logits/chosen": -3.2544772624969482, + "logits/rejected": -3.090698719024658, + "logps/chosen": -449.23968505859375, + "logps/rejected": -1231.958984375, + "loss": 0.5721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35554808378219604, + "rewards/margins": 1.94266676902771, + "rewards/rejected": -1.5871186256408691, + "step": 153 + }, + { + "epoch": 0.2, + "learning_rate": 9.295269771849425e-08, + "logits/chosen": -3.236328363418579, + "logits/rejected": -3.1900105476379395, + "logps/chosen": -531.8478393554688, + "logps/rejected": -1301.614501953125, + "loss": 0.6417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5086318850517273, + "rewards/margins": 2.289027452468872, + "rewards/rejected": -1.7803955078125, + "step": 154 + }, + { + "epoch": 0.2, + "learning_rate": 9.284653273839905e-08, + "logits/chosen": -3.2438278198242188, + "logits/rejected": -3.1808948516845703, + "logps/chosen": -503.336181640625, + "logps/rejected": -1213.2479248046875, + "loss": 0.5765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.357086181640625, + "rewards/margins": 2.2713623046875, + "rewards/rejected": -1.914276123046875, + "step": 155 + }, + { + "epoch": 0.2, + "learning_rate": 9.273963562927694e-08, + "logits/chosen": -3.270559787750244, + "logits/rejected": -3.211725950241089, + "logps/chosen": -516.6626586914062, + "logps/rejected": -1011.1287841796875, + "loss": 0.5972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4026321470737457, + "rewards/margins": 2.1865646839141846, + "rewards/rejected": -1.7839324474334717, + "step": 156 + }, + { + "epoch": 0.2, + "learning_rate": 9.26320082177046e-08, + "logits/chosen": -3.2667689323425293, + "logits/rejected": -3.1833412647247314, + "logps/chosen": -501.8060302734375, + "logps/rejected": -977.359130859375, + "loss": 0.6245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5526565313339233, + "rewards/margins": 2.0357513427734375, + "rewards/rejected": -1.4830948114395142, + "step": 157 + }, + { + "epoch": 0.2, + "learning_rate": 9.252365234273753e-08, + "logits/chosen": -3.2597713470458984, + "logits/rejected": -3.166750907897949, + "logps/chosen": -583.763671875, + "logps/rejected": -3987.635009765625, + "loss": 0.6378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.208445742726326, + "rewards/margins": 4.6996564865112305, + "rewards/rejected": -4.4912109375, + "step": 158 + }, + { + "epoch": 0.2, + "learning_rate": 9.241456985587868e-08, + "logits/chosen": -3.291053056716919, + "logits/rejected": -3.198087692260742, + "logps/chosen": -499.95672607421875, + "logps/rejected": -1006.2666015625, + "loss": 0.6088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41716307401657104, + "rewards/margins": 1.9698333740234375, + "rewards/rejected": -1.5526702404022217, + "step": 159 + }, + { + "epoch": 0.2, + "learning_rate": 9.230476262104676e-08, + "logits/chosen": -3.203233242034912, + "logits/rejected": -3.0904102325439453, + "logps/chosen": -581.9791259765625, + "logps/rejected": -899.0921630859375, + "loss": 0.7263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36889952421188354, + "rewards/margins": 1.5483124256134033, + "rewards/rejected": -1.179412841796875, + "step": 160 + }, + { + "epoch": 0.21, + "learning_rate": 9.219423251454446e-08, + "logits/chosen": -3.2327919006347656, + "logits/rejected": -3.0987966060638428, + "logps/chosen": -510.5614013671875, + "logps/rejected": -1423.88037109375, + "loss": 0.5758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49566346406936646, + "rewards/margins": 2.6385834217071533, + "rewards/rejected": -2.1429200172424316, + "step": 161 + }, + { + "epoch": 0.21, + "learning_rate": 9.208298142502635e-08, + "logits/chosen": -3.184802532196045, + "logits/rejected": -3.15671968460083, + "logps/chosen": -567.4575805664062, + "logps/rejected": -1779.60595703125, + "loss": 0.5981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32099151611328125, + "rewards/margins": 3.5147171020507812, + "rewards/rejected": -3.1937255859375, + "step": 162 + }, + { + "epoch": 0.21, + "learning_rate": 9.197101125346657e-08, + "logits/chosen": -3.238050937652588, + "logits/rejected": -3.1936120986938477, + "logps/chosen": -532.086669921875, + "logps/rejected": -1444.96533203125, + "loss": 0.6534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3022415041923523, + "rewards/margins": 2.8127334117889404, + "rewards/rejected": -2.5104918479919434, + "step": 163 + }, + { + "epoch": 0.21, + "learning_rate": 9.185832391312642e-08, + "logits/chosen": -3.250420570373535, + "logits/rejected": -3.201878070831299, + "logps/chosen": -393.8393249511719, + "logps/rejected": -1281.433837890625, + "loss": 0.563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28461915254592896, + "rewards/margins": 2.4488587379455566, + "rewards/rejected": -2.1642394065856934, + "step": 164 + }, + { + "epoch": 0.21, + "learning_rate": 9.174492132952165e-08, + "logits/chosen": -3.3122003078460693, + "logits/rejected": -3.2547566890716553, + "logps/chosen": -556.8560791015625, + "logps/rejected": -1019.1519775390625, + "loss": 0.6308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48001712560653687, + "rewards/margins": 2.489550828933716, + "rewards/rejected": -2.009533643722534, + "step": 165 + }, + { + "epoch": 0.21, + "learning_rate": 9.163080544038952e-08, + "logits/chosen": -3.2056384086608887, + "logits/rejected": -3.106821060180664, + "logps/chosen": -442.7301330566406, + "logps/rejected": -1333.0960693359375, + "loss": 0.605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43447571992874146, + "rewards/margins": 2.5944855213165283, + "rewards/rejected": -2.1600098609924316, + "step": 166 + }, + { + "epoch": 0.21, + "learning_rate": 9.15159781956557e-08, + "logits/chosen": -3.2891387939453125, + "logits/rejected": -3.186699151992798, + "logps/chosen": -547.7482299804688, + "logps/rejected": -1474.02685546875, + "loss": 0.6275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46234434843063354, + "rewards/margins": 3.0836334228515625, + "rewards/rejected": -2.6212892532348633, + "step": 167 + }, + { + "epoch": 0.21, + "learning_rate": 9.1400441557401e-08, + "logits/chosen": -3.2954680919647217, + "logits/rejected": -3.188505172729492, + "logps/chosen": -505.6473083496094, + "logps/rejected": -889.0321044921875, + "loss": 0.6178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5191787481307983, + "rewards/margins": 1.970454454421997, + "rewards/rejected": -1.4512755870819092, + "step": 168 + }, + { + "epoch": 0.22, + "learning_rate": 9.128419749982779e-08, + "logits/chosen": -3.2582433223724365, + "logits/rejected": -3.117002487182617, + "logps/chosen": -472.5812072753906, + "logps/rejected": -1153.345947265625, + "loss": 0.5683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.432403564453125, + "rewards/margins": 2.2302613258361816, + "rewards/rejected": -1.7978577613830566, + "step": 169 + }, + { + "epoch": 0.22, + "learning_rate": 9.116724800922628e-08, + "logits/chosen": -3.2195518016815186, + "logits/rejected": -3.1221837997436523, + "logps/chosen": -510.71185302734375, + "logps/rejected": -1050.032958984375, + "loss": 0.591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4822036623954773, + "rewards/margins": 2.413038730621338, + "rewards/rejected": -1.9308350086212158, + "step": 170 + }, + { + "epoch": 0.22, + "learning_rate": 9.10495950839406e-08, + "logits/chosen": -3.259490489959717, + "logits/rejected": -3.112882137298584, + "logps/chosen": -487.97198486328125, + "logps/rejected": -766.587890625, + "loss": 0.6171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3926132321357727, + "rewards/margins": 1.6191635131835938, + "rewards/rejected": -1.2265503406524658, + "step": 171 + }, + { + "epoch": 0.22, + "learning_rate": 9.093124073433462e-08, + "logits/chosen": -3.2656991481781006, + "logits/rejected": -3.190746307373047, + "logps/chosen": -505.9459533691406, + "logps/rejected": -1427.4908447265625, + "loss": 0.6232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39645692706108093, + "rewards/margins": 2.7422759532928467, + "rewards/rejected": -2.3458189964294434, + "step": 172 + }, + { + "epoch": 0.22, + "learning_rate": 9.081218698275762e-08, + "logits/chosen": -3.259418487548828, + "logits/rejected": -3.1524553298950195, + "logps/chosen": -513.114990234375, + "logps/rejected": -1066.3857421875, + "loss": 0.5889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5201660394668579, + "rewards/margins": 2.3015565872192383, + "rewards/rejected": -1.7813904285430908, + "step": 173 + }, + { + "epoch": 0.22, + "learning_rate": 9.069243586350974e-08, + "logits/chosen": -3.2156014442443848, + "logits/rejected": -3.0975708961486816, + "logps/chosen": -518.6500244140625, + "logps/rejected": -1671.347412109375, + "loss": 0.6372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3574768304824829, + "rewards/margins": 2.7340331077575684, + "rewards/rejected": -2.376556396484375, + "step": 174 + }, + { + "epoch": 0.22, + "learning_rate": 9.057198942280721e-08, + "logits/chosen": -3.2355599403381348, + "logits/rejected": -3.143240451812744, + "logps/chosen": -515.0316162109375, + "logps/rejected": -927.7183837890625, + "loss": 0.591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5593369007110596, + "rewards/margins": 2.0967087745666504, + "rewards/rejected": -1.5373718738555908, + "step": 175 + }, + { + "epoch": 0.22, + "learning_rate": 9.045084971874737e-08, + "logits/chosen": -3.276298761367798, + "logits/rejected": -3.177938461303711, + "logps/chosen": -482.74945068359375, + "logps/rejected": -1256.259033203125, + "loss": 0.6463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39513856172561646, + "rewards/margins": 2.8230438232421875, + "rewards/rejected": -2.427905321121216, + "step": 176 + }, + { + "epoch": 0.23, + "learning_rate": 9.032901882127352e-08, + "logits/chosen": -3.2902393341064453, + "logits/rejected": -2.9671740531921387, + "logps/chosen": -486.54071044921875, + "logps/rejected": -2032.0928955078125, + "loss": 0.6364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5894958972930908, + "rewards/margins": 3.4600586891174316, + "rewards/rejected": -2.870562791824341, + "step": 177 + }, + { + "epoch": 0.23, + "learning_rate": 9.020649881213957e-08, + "logits/chosen": -3.294735908508301, + "logits/rejected": -3.144044876098633, + "logps/chosen": -494.0511474609375, + "logps/rejected": -1301.8505859375, + "loss": 0.582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3856872618198395, + "rewards/margins": 2.667379856109619, + "rewards/rejected": -2.2816925048828125, + "step": 178 + }, + { + "epoch": 0.23, + "learning_rate": 9.008329178487441e-08, + "logits/chosen": -3.2333168983459473, + "logits/rejected": -3.107372999191284, + "logps/chosen": -518.5740966796875, + "logps/rejected": -1212.485107421875, + "loss": 0.6292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4114120602607727, + "rewards/margins": 2.3760576248168945, + "rewards/rejected": -1.9646453857421875, + "step": 179 + }, + { + "epoch": 0.23, + "learning_rate": 8.995939984474623e-08, + "logits/chosen": -3.2301106452941895, + "logits/rejected": -3.1585257053375244, + "logps/chosen": -559.8628540039062, + "logps/rejected": -1457.6209716796875, + "loss": 0.5998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26638489961624146, + "rewards/margins": 2.834829807281494, + "rewards/rejected": -2.5684447288513184, + "step": 180 + }, + { + "epoch": 0.23, + "learning_rate": 8.983482510872644e-08, + "logits/chosen": -3.244877338409424, + "logits/rejected": -3.1891918182373047, + "logps/chosen": -534.223876953125, + "logps/rejected": -1389.9937744140625, + "loss": 0.587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4289184808731079, + "rewards/margins": 2.904803514480591, + "rewards/rejected": -2.4758849143981934, + "step": 181 + }, + { + "epoch": 0.23, + "learning_rate": 8.970956970545355e-08, + "logits/chosen": -3.324787139892578, + "logits/rejected": -3.1635806560516357, + "logps/chosen": -538.9171752929688, + "logps/rejected": -946.9749755859375, + "loss": 0.6333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5838760733604431, + "rewards/margins": 2.0359573364257812, + "rewards/rejected": -1.452081322669983, + "step": 182 + }, + { + "epoch": 0.23, + "learning_rate": 8.958363577519683e-08, + "logits/chosen": -3.2139394283294678, + "logits/rejected": -3.093297243118286, + "logps/chosen": -475.3451843261719, + "logps/rejected": -1049.533935546875, + "loss": 0.6348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3906097710132599, + "rewards/margins": 2.4186127185821533, + "rewards/rejected": -2.028002977371216, + "step": 183 + }, + { + "epoch": 0.23, + "learning_rate": 8.945702546981968e-08, + "logits/chosen": -3.2551722526550293, + "logits/rejected": -3.147064208984375, + "logps/chosen": -470.44769287109375, + "logps/rejected": -1316.08837890625, + "loss": 0.597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3392791748046875, + "rewards/margins": 3.024850368499756, + "rewards/rejected": -2.6855711936950684, + "step": 184 + }, + { + "epoch": 0.24, + "learning_rate": 8.932974095274289e-08, + "logits/chosen": -3.2873263359069824, + "logits/rejected": -3.283311367034912, + "logps/chosen": -527.41748046875, + "logps/rejected": -977.775146484375, + "loss": 0.6213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7320083975791931, + "rewards/margins": 2.869114875793457, + "rewards/rejected": -2.137106418609619, + "step": 185 + }, + { + "epoch": 0.24, + "learning_rate": 8.920178439890764e-08, + "logits/chosen": -3.2983052730560303, + "logits/rejected": -3.252427339553833, + "logps/chosen": -513.5011596679688, + "logps/rejected": -1311.183349609375, + "loss": 0.5924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6123154163360596, + "rewards/margins": 3.314122200012207, + "rewards/rejected": -2.7018067836761475, + "step": 186 + }, + { + "epoch": 0.24, + "learning_rate": 8.907315799473844e-08, + "logits/chosen": -3.2423932552337646, + "logits/rejected": -3.1158552169799805, + "logps/chosen": -624.5608520507812, + "logps/rejected": -1377.537109375, + "loss": 0.635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5010528564453125, + "rewards/margins": 2.8038361072540283, + "rewards/rejected": -2.302783250808716, + "step": 187 + }, + { + "epoch": 0.24, + "learning_rate": 8.894386393810562e-08, + "logits/chosen": -3.299107551574707, + "logits/rejected": -3.254218578338623, + "logps/chosen": -483.2557373046875, + "logps/rejected": -1119.721923828125, + "loss": 0.5917, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5362884998321533, + "rewards/margins": 2.9709930419921875, + "rewards/rejected": -2.4347047805786133, + "step": 188 + }, + { + "epoch": 0.24, + "learning_rate": 8.881390443828787e-08, + "logits/chosen": -3.287994384765625, + "logits/rejected": -3.1567800045013428, + "logps/chosen": -503.52337646484375, + "logps/rejected": -1155.00927734375, + "loss": 0.5827, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5587875843048096, + "rewards/margins": 2.9394729137420654, + "rewards/rejected": -2.380685567855835, + "step": 189 + }, + { + "epoch": 0.24, + "learning_rate": 8.868328171593447e-08, + "logits/chosen": -3.2895660400390625, + "logits/rejected": -3.278064250946045, + "logps/chosen": -516.2635498046875, + "logps/rejected": -3588.70068359375, + "loss": 0.5625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5286957025527954, + "rewards/margins": 4.04736328125, + "rewards/rejected": -3.518667697906494, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 8.855199800302735e-08, + "logits/chosen": -3.2766165733337402, + "logits/rejected": -3.1785404682159424, + "logps/chosen": -480.2926330566406, + "logps/rejected": -1095.9056396484375, + "loss": 0.5663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4610122740268707, + "rewards/margins": 2.3556413650512695, + "rewards/rejected": -1.8946290016174316, + "step": 191 + }, + { + "epoch": 0.24, + "learning_rate": 8.842005554284295e-08, + "logits/chosen": -3.276367664337158, + "logits/rejected": -3.166576862335205, + "logps/chosen": -536.2089233398438, + "logps/rejected": -1452.141357421875, + "loss": 0.6248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6215652227401733, + "rewards/margins": 3.336543560028076, + "rewards/rejected": -2.7149782180786133, + "step": 192 + }, + { + "epoch": 0.25, + "learning_rate": 8.828745658991386e-08, + "logits/chosen": -3.261601686477661, + "logits/rejected": -3.2027950286865234, + "logps/chosen": -495.2864990234375, + "logps/rejected": -1580.72509765625, + "loss": 0.5748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5257629752159119, + "rewards/margins": 3.2897980213165283, + "rewards/rejected": -2.7640349864959717, + "step": 193 + }, + { + "epoch": 0.25, + "learning_rate": 8.815420340999033e-08, + "logits/chosen": -3.229987144470215, + "logits/rejected": -3.133579969406128, + "logps/chosen": -550.703125, + "logps/rejected": -1164.9593505859375, + "loss": 0.5879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5128387808799744, + "rewards/margins": 2.6891632080078125, + "rewards/rejected": -2.1763243675231934, + "step": 194 + }, + { + "epoch": 0.25, + "learning_rate": 8.802029828000155e-08, + "logits/chosen": -3.2344961166381836, + "logits/rejected": -3.1053647994995117, + "logps/chosen": -514.5802001953125, + "logps/rejected": -1490.42626953125, + "loss": 0.5608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5727035403251648, + "rewards/margins": 2.842393398284912, + "rewards/rejected": -2.2696900367736816, + "step": 195 + }, + { + "epoch": 0.25, + "learning_rate": 8.788574348801674e-08, + "logits/chosen": -3.233214855194092, + "logits/rejected": -3.1093082427978516, + "logps/chosen": -463.80377197265625, + "logps/rejected": -938.3575439453125, + "loss": 0.5579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46183013916015625, + "rewards/margins": 2.0851852893829346, + "rewards/rejected": -1.6233551502227783, + "step": 196 + }, + { + "epoch": 0.25, + "learning_rate": 8.775054133320602e-08, + "logits/chosen": -3.2694339752197266, + "logits/rejected": -3.05252742767334, + "logps/chosen": -493.2152099609375, + "logps/rejected": -2184.001220703125, + "loss": 0.5574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48116302490234375, + "rewards/margins": 4.362126350402832, + "rewards/rejected": -3.880963087081909, + "step": 197 + }, + { + "epoch": 0.25, + "learning_rate": 8.761469412580124e-08, + "logits/chosen": -3.310445547103882, + "logits/rejected": -3.1463561058044434, + "logps/chosen": -473.95367431640625, + "logps/rejected": -798.416015625, + "loss": 0.5995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4600845277309418, + "rewards/margins": 1.8742538690567017, + "rewards/rejected": -1.4141693115234375, + "step": 198 + }, + { + "epoch": 0.25, + "learning_rate": 8.74782041870563e-08, + "logits/chosen": -3.2489428520202637, + "logits/rejected": -3.1738734245300293, + "logps/chosen": -527.6349487304688, + "logps/rejected": -887.7821655273438, + "loss": 0.5993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4112182557582855, + "rewards/margins": 1.8973236083984375, + "rewards/rejected": -1.4861054420471191, + "step": 199 + }, + { + "epoch": 0.25, + "learning_rate": 8.734107384920769e-08, + "logits/chosen": -3.27463960647583, + "logits/rejected": -3.147984504699707, + "logps/chosen": -532.1691284179688, + "logps/rejected": -1375.1859130859375, + "loss": 0.5813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46679383516311646, + "rewards/margins": 2.8183014392852783, + "rewards/rejected": -2.3515076637268066, + "step": 200 + }, + { + "epoch": 0.26, + "learning_rate": 8.720330545543453e-08, + "logits/chosen": -3.298241138458252, + "logits/rejected": -3.1050283908843994, + "logps/chosen": -530.1170654296875, + "logps/rejected": -931.9783935546875, + "loss": 0.5936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5702041983604431, + "rewards/margins": 1.9811813831329346, + "rewards/rejected": -1.4109771251678467, + "step": 201 + }, + { + "epoch": 0.26, + "learning_rate": 8.706490135981854e-08, + "logits/chosen": -3.2612314224243164, + "logits/rejected": -3.2426791191101074, + "logps/chosen": -457.4442138671875, + "logps/rejected": -1240.09033203125, + "loss": 0.6117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5393631458282471, + "rewards/margins": 2.6704483032226562, + "rewards/rejected": -2.131085157394409, + "step": 202 + }, + { + "epoch": 0.26, + "learning_rate": 8.692586392730385e-08, + "logits/chosen": -3.1936161518096924, + "logits/rejected": -3.1699037551879883, + "logps/chosen": -532.218017578125, + "logps/rejected": -1107.15576171875, + "loss": 0.6035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7905670404434204, + "rewards/margins": 2.4957704544067383, + "rewards/rejected": -1.7052032947540283, + "step": 203 + }, + { + "epoch": 0.26, + "learning_rate": 8.678619553365658e-08, + "logits/chosen": -3.2762436866760254, + "logits/rejected": -3.149843215942383, + "logps/chosen": -547.4312744140625, + "logps/rejected": -1514.2735595703125, + "loss": 0.5801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7436874508857727, + "rewards/margins": 4.051963806152344, + "rewards/rejected": -3.308276414871216, + "step": 204 + }, + { + "epoch": 0.26, + "learning_rate": 8.664589856542419e-08, + "logits/chosen": -3.3087782859802246, + "logits/rejected": -3.1741044521331787, + "logps/chosen": -528.8719482421875, + "logps/rejected": -1979.89013671875, + "loss": 0.5917, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7088378667831421, + "rewards/margins": 4.629309177398682, + "rewards/rejected": -3.92047119140625, + "step": 205 + }, + { + "epoch": 0.26, + "learning_rate": 8.650497541989481e-08, + "logits/chosen": -3.2232394218444824, + "logits/rejected": -3.180276870727539, + "logps/chosen": -481.051025390625, + "logps/rejected": -1476.617431640625, + "loss": 0.5788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5524765253067017, + "rewards/margins": 3.609757900238037, + "rewards/rejected": -3.057281494140625, + "step": 206 + }, + { + "epoch": 0.26, + "learning_rate": 8.636342850505615e-08, + "logits/chosen": -3.2386832237243652, + "logits/rejected": -3.162121295928955, + "logps/chosen": -492.131591796875, + "logps/rejected": -1562.940673828125, + "loss": 0.5758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47967684268951416, + "rewards/margins": 3.514631748199463, + "rewards/rejected": -3.0349550247192383, + "step": 207 + }, + { + "epoch": 0.27, + "learning_rate": 8.622126023955445e-08, + "logits/chosen": -3.3231418132781982, + "logits/rejected": -3.2120745182037354, + "logps/chosen": -494.60833740234375, + "logps/rejected": -1354.919677734375, + "loss": 0.5748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6153839230537415, + "rewards/margins": 3.6037049293518066, + "rewards/rejected": -2.98832106590271, + "step": 208 + }, + { + "epoch": 0.27, + "learning_rate": 8.60784730526531e-08, + "logits/chosen": -3.2029151916503906, + "logits/rejected": -3.0896730422973633, + "logps/chosen": -498.01800537109375, + "logps/rejected": -1673.427978515625, + "loss": 0.5777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5658324956893921, + "rewards/margins": 3.695208787918091, + "rewards/rejected": -3.1293764114379883, + "step": 209 + }, + { + "epoch": 0.27, + "learning_rate": 8.593506938419119e-08, + "logits/chosen": -3.2393593788146973, + "logits/rejected": -3.1476054191589355, + "logps/chosen": -543.604248046875, + "logps/rejected": -1043.81103515625, + "loss": 0.6229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6709502935409546, + "rewards/margins": 2.487942695617676, + "rewards/rejected": -1.816992163658142, + "step": 210 + }, + { + "epoch": 0.27, + "learning_rate": 8.579105168454172e-08, + "logits/chosen": -3.231595993041992, + "logits/rejected": -3.085270881652832, + "logps/chosen": -534.76220703125, + "logps/rejected": -1813.095458984375, + "loss": 0.5781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5515716671943665, + "rewards/margins": 3.6059112548828125, + "rewards/rejected": -3.054339647293091, + "step": 211 + }, + { + "epoch": 0.27, + "learning_rate": 8.564642241456985e-08, + "logits/chosen": -3.2219929695129395, + "logits/rejected": -3.1714282035827637, + "logps/chosen": -581.8430786132812, + "logps/rejected": -1182.2918701171875, + "loss": 0.6223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4465179443359375, + "rewards/margins": 2.781353712081909, + "rewards/rejected": -2.3348357677459717, + "step": 212 + }, + { + "epoch": 0.27, + "learning_rate": 8.550118404559074e-08, + "logits/chosen": -3.240907669067383, + "logits/rejected": -3.0736093521118164, + "logps/chosen": -531.8800659179688, + "logps/rejected": -1124.99755859375, + "loss": 0.6079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5369018316268921, + "rewards/margins": 2.421896457672119, + "rewards/rejected": -1.8849945068359375, + "step": 213 + }, + { + "epoch": 0.27, + "learning_rate": 8.535533905932736e-08, + "logits/chosen": -3.236478328704834, + "logits/rejected": -3.1028923988342285, + "logps/chosen": -528.857177734375, + "logps/rejected": -1847.0869140625, + "loss": 0.5716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5917938351631165, + "rewards/margins": 4.2549591064453125, + "rewards/rejected": -3.663165330886841, + "step": 214 + }, + { + "epoch": 0.27, + "learning_rate": 8.52088899478682e-08, + "logits/chosen": -3.298976421356201, + "logits/rejected": -3.238471746444702, + "logps/chosen": -564.40283203125, + "logps/rejected": -1298.61572265625, + "loss": 0.6189, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5475234985351562, + "rewards/margins": 3.1311843395233154, + "rewards/rejected": -2.583660840988159, + "step": 215 + }, + { + "epoch": 0.28, + "learning_rate": 8.506183921362442e-08, + "logits/chosen": -3.2309536933898926, + "logits/rejected": -3.1854920387268066, + "logps/chosen": -512.53515625, + "logps/rejected": -1229.33544921875, + "loss": 0.5438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6709960699081421, + "rewards/margins": 3.2062864303588867, + "rewards/rejected": -2.535290479660034, + "step": 216 + }, + { + "epoch": 0.28, + "learning_rate": 8.491418936928741e-08, + "logits/chosen": -3.298642158508301, + "logits/rejected": -3.2063651084899902, + "logps/chosen": -437.0369873046875, + "logps/rejected": -2622.340087890625, + "loss": 0.5203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5074051022529602, + "rewards/margins": 5.460145473480225, + "rewards/rejected": -4.952740669250488, + "step": 217 + }, + { + "epoch": 0.28, + "learning_rate": 8.47659429377856e-08, + "logits/chosen": -3.1513490676879883, + "logits/rejected": -3.1192798614501953, + "logps/chosen": -476.4214782714844, + "logps/rejected": -1797.56298828125, + "loss": 0.5406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.655590832233429, + "rewards/margins": 4.513134956359863, + "rewards/rejected": -3.8575439453125, + "step": 218 + }, + { + "epoch": 0.28, + "learning_rate": 8.461710245224147e-08, + "logits/chosen": -3.2840161323547363, + "logits/rejected": -3.119493007659912, + "logps/chosen": -476.0220642089844, + "logps/rejected": -1503.15087890625, + "loss": 0.5681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5070419311523438, + "rewards/margins": 2.953923225402832, + "rewards/rejected": -2.4468812942504883, + "step": 219 + }, + { + "epoch": 0.28, + "learning_rate": 8.446767045592829e-08, + "logits/chosen": -3.2898950576782227, + "logits/rejected": -3.161590099334717, + "logps/chosen": -501.6254577636719, + "logps/rejected": -1771.05908203125, + "loss": 0.5412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7150543332099915, + "rewards/margins": 4.3085479736328125, + "rewards/rejected": -3.593493938446045, + "step": 220 + }, + { + "epoch": 0.28, + "learning_rate": 8.431764950222655e-08, + "logits/chosen": -3.260204553604126, + "logits/rejected": -3.0474276542663574, + "logps/chosen": -569.8302001953125, + "logps/rejected": -1883.5928955078125, + "loss": 0.5993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4821731746196747, + "rewards/margins": 3.965669631958008, + "rewards/rejected": -3.4834961891174316, + "step": 221 + }, + { + "epoch": 0.28, + "learning_rate": 8.416704215458041e-08, + "logits/chosen": -3.289341926574707, + "logits/rejected": -3.186781883239746, + "logps/chosen": -504.4563903808594, + "logps/rejected": -1281.5601806640625, + "loss": 0.5769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45394134521484375, + "rewards/margins": 3.0862839221954346, + "rewards/rejected": -2.632342576980591, + "step": 222 + }, + { + "epoch": 0.28, + "learning_rate": 8.401585098645395e-08, + "logits/chosen": -3.2425389289855957, + "logits/rejected": -3.012268304824829, + "logps/chosen": -504.2779541015625, + "logps/rejected": -1717.3572998046875, + "loss": 0.585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6255783438682556, + "rewards/margins": 3.5572006702423096, + "rewards/rejected": -2.931622266769409, + "step": 223 + }, + { + "epoch": 0.29, + "learning_rate": 8.386407858128706e-08, + "logits/chosen": -3.299792766571045, + "logits/rejected": -3.2039878368377686, + "logps/chosen": -456.9503173828125, + "logps/rejected": -1018.939697265625, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5219818353652954, + "rewards/margins": 2.4349732398986816, + "rewards/rejected": -1.9129914045333862, + "step": 224 + }, + { + "epoch": 0.29, + "learning_rate": 8.371172753245137e-08, + "logits/chosen": -3.320335865020752, + "logits/rejected": -3.184281349182129, + "logps/chosen": -464.588623046875, + "logps/rejected": -1467.7974853515625, + "loss": 0.5876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6811935901641846, + "rewards/margins": 3.404680013656616, + "rewards/rejected": -2.7234864234924316, + "step": 225 + }, + { + "epoch": 0.29, + "learning_rate": 8.355880044320598e-08, + "logits/chosen": -3.278989791870117, + "logits/rejected": -3.1426706314086914, + "logps/chosen": -471.309814453125, + "logps/rejected": -1727.9503173828125, + "loss": 0.5541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5236648917198181, + "rewards/margins": 3.5371294021606445, + "rewards/rejected": -3.0134644508361816, + "step": 226 + }, + { + "epoch": 0.29, + "learning_rate": 8.340529992665288e-08, + "logits/chosen": -3.2073960304260254, + "logits/rejected": -3.1157870292663574, + "logps/chosen": -535.8037719726562, + "logps/rejected": -905.8021240234375, + "loss": 0.5886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6801697015762329, + "rewards/margins": 2.406790256500244, + "rewards/rejected": -1.7266204357147217, + "step": 227 + }, + { + "epoch": 0.29, + "learning_rate": 8.32512286056924e-08, + "logits/chosen": -3.2422330379486084, + "logits/rejected": -3.155583381652832, + "logps/chosen": -483.7022705078125, + "logps/rejected": -1191.1395263671875, + "loss": 0.5535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6195907592773438, + "rewards/margins": 3.0163345336914062, + "rewards/rejected": -2.3967437744140625, + "step": 228 + }, + { + "epoch": 0.29, + "learning_rate": 8.309658911297832e-08, + "logits/chosen": -3.2421422004699707, + "logits/rejected": -3.136658191680908, + "logps/chosen": -476.7500915527344, + "logps/rejected": -1158.9990234375, + "loss": 0.5738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5865875482559204, + "rewards/margins": 2.829409599304199, + "rewards/rejected": -2.2428221702575684, + "step": 229 + }, + { + "epoch": 0.29, + "learning_rate": 8.294138409087289e-08, + "logits/chosen": -3.262112617492676, + "logits/rejected": -3.196314811706543, + "logps/chosen": -514.231689453125, + "logps/rejected": -1202.3837890625, + "loss": 0.5957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.666912853717804, + "rewards/margins": 3.0682311058044434, + "rewards/rejected": -2.401318311691284, + "step": 230 + }, + { + "epoch": 0.29, + "learning_rate": 8.278561619140171e-08, + "logits/chosen": -3.3294448852539062, + "logits/rejected": -3.219862461090088, + "logps/chosen": -510.1011962890625, + "logps/rejected": -1223.541015625, + "loss": 0.5812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.512866199016571, + "rewards/margins": 3.153939962387085, + "rewards/rejected": -2.641073703765869, + "step": 231 + }, + { + "epoch": 0.3, + "learning_rate": 8.262928807620843e-08, + "logits/chosen": -3.2414627075195312, + "logits/rejected": -3.164081573486328, + "logps/chosen": -502.11865234375, + "logps/rejected": -1346.885986328125, + "loss": 0.5565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8043915033340454, + "rewards/margins": 3.3542816638946533, + "rewards/rejected": -2.5498900413513184, + "step": 232 + }, + { + "epoch": 0.3, + "learning_rate": 8.247240241650917e-08, + "logits/chosen": -3.259662389755249, + "logits/rejected": -3.107649326324463, + "logps/chosen": -515.1135864257812, + "logps/rejected": -1518.0205078125, + "loss": 0.5707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5551956295967102, + "rewards/margins": 3.2443132400512695, + "rewards/rejected": -2.689117431640625, + "step": 233 + }, + { + "epoch": 0.3, + "learning_rate": 8.231496189304703e-08, + "logits/chosen": -3.241623878479004, + "logits/rejected": -3.1559228897094727, + "logps/chosen": -583.7993774414062, + "logps/rejected": -1435.826171875, + "loss": 0.5863, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7007964849472046, + "rewards/margins": 3.5080108642578125, + "rewards/rejected": -2.8072142601013184, + "step": 234 + }, + { + "epoch": 0.3, + "learning_rate": 8.215696919604617e-08, + "logits/chosen": -3.254565715789795, + "logits/rejected": -3.1754541397094727, + "logps/chosen": -513.672119140625, + "logps/rejected": -1295.8134765625, + "loss": 0.5742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.511157214641571, + "rewards/margins": 3.0218629837036133, + "rewards/rejected": -2.5107054710388184, + "step": 235 + }, + { + "epoch": 0.3, + "learning_rate": 8.199842702516583e-08, + "logits/chosen": -3.232907772064209, + "logits/rejected": -3.159684419631958, + "logps/chosen": -501.1788330078125, + "logps/rejected": -3670.26416015625, + "loss": 0.5448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6047775745391846, + "rewards/margins": 6.302891731262207, + "rewards/rejected": -5.698113918304443, + "step": 236 + }, + { + "epoch": 0.3, + "learning_rate": 8.18393380894543e-08, + "logits/chosen": -3.256347894668579, + "logits/rejected": -3.213303804397583, + "logps/chosen": -539.9426879882812, + "logps/rejected": -1262.4378662109375, + "loss": 0.6297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6372162103652954, + "rewards/margins": 3.520681858062744, + "rewards/rejected": -2.8834657669067383, + "step": 237 + }, + { + "epoch": 0.3, + "learning_rate": 8.167970510730252e-08, + "logits/chosen": -3.2459747791290283, + "logits/rejected": -3.0985560417175293, + "logps/chosen": -485.33209228515625, + "logps/rejected": -1092.129150390625, + "loss": 0.5681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6044372916221619, + "rewards/margins": 2.4305100440979004, + "rewards/rejected": -1.8260728120803833, + "step": 238 + }, + { + "epoch": 0.3, + "learning_rate": 8.151953080639775e-08, + "logits/chosen": -3.1987853050231934, + "logits/rejected": -3.0935535430908203, + "logps/chosen": -487.8656311035156, + "logps/rejected": -1000.177734375, + "loss": 0.5598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6649277210235596, + "rewards/margins": 2.451298713684082, + "rewards/rejected": -1.786370873451233, + "step": 239 + }, + { + "epoch": 0.31, + "learning_rate": 8.135881792367685e-08, + "logits/chosen": -3.2680625915527344, + "logits/rejected": -3.172489643096924, + "logps/chosen": -487.34954833984375, + "logps/rejected": -1044.951904296875, + "loss": 0.5941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5850127935409546, + "rewards/margins": 2.2070343494415283, + "rewards/rejected": -1.6220214366912842, + "step": 240 + }, + { + "epoch": 0.31, + "learning_rate": 8.119756920527954e-08, + "logits/chosen": -3.312333345413208, + "logits/rejected": -3.106085777282715, + "logps/chosen": -475.33807373046875, + "logps/rejected": -1715.16650390625, + "loss": 0.5798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5675399899482727, + "rewards/margins": 3.7092390060424805, + "rewards/rejected": -3.1416993141174316, + "step": 241 + }, + { + "epoch": 0.31, + "learning_rate": 8.103578740650156e-08, + "logits/chosen": -3.2439842224121094, + "logits/rejected": -3.111393928527832, + "logps/chosen": -527.5750732421875, + "logps/rejected": -1080.341064453125, + "loss": 0.6207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6675964593887329, + "rewards/margins": 2.8081908226013184, + "rewards/rejected": -2.140594482421875, + "step": 242 + }, + { + "epoch": 0.31, + "learning_rate": 8.087347529174742e-08, + "logits/chosen": -3.2897348403930664, + "logits/rejected": -3.0919251441955566, + "logps/chosen": -506.1542053222656, + "logps/rejected": -1148.8828125, + "loss": 0.5599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3767150938510895, + "rewards/margins": 2.599722385406494, + "rewards/rejected": -2.2230072021484375, + "step": 243 + }, + { + "epoch": 0.31, + "learning_rate": 8.07106356344834e-08, + "logits/chosen": -3.215665817260742, + "logits/rejected": -3.1297607421875, + "logps/chosen": -581.7115478515625, + "logps/rejected": -1049.8189697265625, + "loss": 0.6232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6299072504043579, + "rewards/margins": 2.81882643699646, + "rewards/rejected": -2.1889190673828125, + "step": 244 + }, + { + "epoch": 0.31, + "learning_rate": 8.054727121718987e-08, + "logits/chosen": -3.2837114334106445, + "logits/rejected": -3.1387274265289307, + "logps/chosen": -441.6219482421875, + "logps/rejected": -1127.4576416015625, + "loss": 0.557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6510025262832642, + "rewards/margins": 2.904362440109253, + "rewards/rejected": -2.2533600330352783, + "step": 245 + }, + { + "epoch": 0.31, + "learning_rate": 8.038338483131406e-08, + "logits/chosen": -3.2960562705993652, + "logits/rejected": -3.061459541320801, + "logps/chosen": -490.8929443359375, + "logps/rejected": -1589.646240234375, + "loss": 0.5739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.619232177734375, + "rewards/margins": 3.3689393997192383, + "rewards/rejected": -2.749706983566284, + "step": 246 + }, + { + "epoch": 0.31, + "learning_rate": 8.021897927722208e-08, + "logits/chosen": -3.2250819206237793, + "logits/rejected": -3.1617279052734375, + "logps/chosen": -539.5657958984375, + "logps/rejected": -1545.5626220703125, + "loss": 0.5879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7438690066337585, + "rewards/margins": 3.951760768890381, + "rewards/rejected": -3.2078919410705566, + "step": 247 + }, + { + "epoch": 0.32, + "learning_rate": 8.005405736415125e-08, + "logits/chosen": -3.308196544647217, + "logits/rejected": -3.21317195892334, + "logps/chosen": -467.76312255859375, + "logps/rejected": -1491.712646484375, + "loss": 0.5252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5296036005020142, + "rewards/margins": 3.7853989601135254, + "rewards/rejected": -3.255795478820801, + "step": 248 + }, + { + "epoch": 0.32, + "learning_rate": 7.988862191016203e-08, + "logits/chosen": -3.213688611984253, + "logits/rejected": -3.100299835205078, + "logps/chosen": -480.2362976074219, + "logps/rejected": -1106.79150390625, + "loss": 0.5705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.54815673828125, + "rewards/margins": 2.762014865875244, + "rewards/rejected": -2.213858127593994, + "step": 249 + }, + { + "epoch": 0.32, + "learning_rate": 7.97226757420899e-08, + "logits/chosen": -3.3147735595703125, + "logits/rejected": -3.100728750228882, + "logps/chosen": -473.9757080078125, + "logps/rejected": -3858.46728515625, + "loss": 0.5851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.720458984375, + "rewards/margins": 6.83384370803833, + "rewards/rejected": -6.113385200500488, + "step": 250 + }, + { + "epoch": 0.32, + "learning_rate": 7.955622169549696e-08, + "logits/chosen": -3.2665090560913086, + "logits/rejected": -3.144742012023926, + "logps/chosen": -525.6806030273438, + "logps/rejected": -1182.1019287109375, + "loss": 0.5842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6989593505859375, + "rewards/margins": 2.906881809234619, + "rewards/rejected": -2.2079224586486816, + "step": 251 + }, + { + "epoch": 0.32, + "learning_rate": 7.938926261462366e-08, + "logits/chosen": -3.3298933506011963, + "logits/rejected": -3.1252505779266357, + "logps/chosen": -510.69317626953125, + "logps/rejected": -1357.6005859375, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6322799921035767, + "rewards/margins": 2.921635627746582, + "rewards/rejected": -2.289355516433716, + "step": 252 + }, + { + "epoch": 0.32, + "learning_rate": 7.922180135233999e-08, + "logits/chosen": -3.2273764610290527, + "logits/rejected": -3.116478443145752, + "logps/chosen": -566.0274658203125, + "logps/rejected": -1142.0028076171875, + "loss": 0.607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6674530506134033, + "rewards/margins": 2.8641510009765625, + "rewards/rejected": -2.196697950363159, + "step": 253 + }, + { + "epoch": 0.32, + "learning_rate": 7.905384077009691e-08, + "logits/chosen": -3.2816898822784424, + "logits/rejected": -3.1196019649505615, + "logps/chosen": -495.6107177734375, + "logps/rejected": -1518.5006103515625, + "loss": 0.5528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6686127185821533, + "rewards/margins": 3.7621188163757324, + "rewards/rejected": -3.093505859375, + "step": 254 + }, + { + "epoch": 0.33, + "learning_rate": 7.888538373787734e-08, + "logits/chosen": -3.204885959625244, + "logits/rejected": -3.15920352935791, + "logps/chosen": -503.34088134765625, + "logps/rejected": -1062.9918212890625, + "loss": 0.5397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5293548703193665, + "rewards/margins": 2.3947296142578125, + "rewards/rejected": -1.8653748035430908, + "step": 255 + }, + { + "epoch": 0.33, + "learning_rate": 7.871643313414717e-08, + "logits/chosen": -3.2948415279388428, + "logits/rejected": -3.187971591949463, + "logps/chosen": -486.052734375, + "logps/rejected": -1722.787109375, + "loss": 0.5941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5782715082168579, + "rewards/margins": 3.9548068046569824, + "rewards/rejected": -3.376535177230835, + "step": 256 + }, + { + "epoch": 0.33, + "learning_rate": 7.854699184580609e-08, + "logits/chosen": -3.28963303565979, + "logits/rejected": -3.059722900390625, + "logps/chosen": -508.78167724609375, + "logps/rejected": -2869.94873046875, + "loss": 0.5806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6740570068359375, + "rewards/margins": 5.614071846008301, + "rewards/rejected": -4.940014839172363, + "step": 257 + }, + { + "epoch": 0.33, + "learning_rate": 7.837706276813818e-08, + "logits/chosen": -3.2472314834594727, + "logits/rejected": -3.1525583267211914, + "logps/chosen": -527.3955078125, + "logps/rejected": -1039.1129150390625, + "loss": 0.63, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.609509289264679, + "rewards/margins": 2.5312318801879883, + "rewards/rejected": -1.921722412109375, + "step": 258 + }, + { + "epoch": 0.33, + "learning_rate": 7.820664880476255e-08, + "logits/chosen": -3.2665722370147705, + "logits/rejected": -3.2026619911193848, + "logps/chosen": -474.0660400390625, + "logps/rejected": -1305.2611083984375, + "loss": 0.5467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.582366943359375, + "rewards/margins": 3.2511839866638184, + "rewards/rejected": -2.6688170433044434, + "step": 259 + }, + { + "epoch": 0.33, + "learning_rate": 7.803575286758363e-08, + "logits/chosen": -3.202971935272217, + "logits/rejected": -3.0784802436828613, + "logps/chosen": -475.1886291503906, + "logps/rejected": -751.862060546875, + "loss": 0.5878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4957718253135681, + "rewards/margins": 1.670881748199463, + "rewards/rejected": -1.17510986328125, + "step": 260 + }, + { + "epoch": 0.33, + "learning_rate": 7.786437787674148e-08, + "logits/chosen": -3.2821335792541504, + "logits/rejected": -3.1496925354003906, + "logps/chosen": -487.36651611328125, + "logps/rejected": -818.2418212890625, + "loss": 0.6526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.640972912311554, + "rewards/margins": 2.528064012527466, + "rewards/rejected": -1.8870911598205566, + "step": 261 + }, + { + "epoch": 0.33, + "learning_rate": 7.769252676056186e-08, + "logits/chosen": -3.249415636062622, + "logits/rejected": -3.1660802364349365, + "logps/chosen": -506.7237548828125, + "logps/rejected": -1951.71240234375, + "loss": 0.5874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6316696405410767, + "rewards/margins": 4.898576736450195, + "rewards/rejected": -4.26690673828125, + "step": 262 + }, + { + "epoch": 0.34, + "learning_rate": 7.752020245550617e-08, + "logits/chosen": -3.2691683769226074, + "logits/rejected": -3.2090511322021484, + "logps/chosen": -441.1667785644531, + "logps/rejected": -1595.957275390625, + "loss": 0.5472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5358734130859375, + "rewards/margins": 3.6093173027038574, + "rewards/rejected": -3.07344388961792, + "step": 263 + }, + { + "epoch": 0.34, + "learning_rate": 7.734740790612135e-08, + "logits/chosen": -3.2508928775787354, + "logits/rejected": -3.2049388885498047, + "logps/chosen": -527.8408203125, + "logps/rejected": -1376.172607421875, + "loss": 0.5668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6678741574287415, + "rewards/margins": 3.6515045166015625, + "rewards/rejected": -2.983630418777466, + "step": 264 + }, + { + "epoch": 0.34, + "learning_rate": 7.717414606498946e-08, + "logits/chosen": -3.270641326904297, + "logits/rejected": -3.206099033355713, + "logps/chosen": -456.8864440917969, + "logps/rejected": -1407.857421875, + "loss": 0.5482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5772796869277954, + "rewards/margins": 4.115923881530762, + "rewards/rejected": -3.538644552230835, + "step": 265 + }, + { + "epoch": 0.34, + "learning_rate": 7.700041989267735e-08, + "logits/chosen": -3.308547258377075, + "logits/rejected": -3.1880290508270264, + "logps/chosen": -494.6634826660156, + "logps/rejected": -686.8846435546875, + "loss": 0.592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.748394787311554, + "rewards/margins": 2.214956760406494, + "rewards/rejected": -1.4665619134902954, + "step": 266 + }, + { + "epoch": 0.34, + "learning_rate": 7.682623235768597e-08, + "logits/chosen": -3.2787885665893555, + "logits/rejected": -3.14959979057312, + "logps/chosen": -548.5895385742188, + "logps/rejected": -1431.584716796875, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5056091547012329, + "rewards/margins": 3.5321044921875, + "rewards/rejected": -3.0264954566955566, + "step": 267 + }, + { + "epoch": 0.34, + "learning_rate": 7.665158643639968e-08, + "logits/chosen": -3.2881364822387695, + "logits/rejected": -3.1400346755981445, + "logps/chosen": -477.5010986328125, + "logps/rejected": -1888.6328125, + "loss": 0.5591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5636780261993408, + "rewards/margins": 4.907732963562012, + "rewards/rejected": -4.34405517578125, + "step": 268 + }, + { + "epoch": 0.34, + "learning_rate": 7.647648511303544e-08, + "logits/chosen": -3.283010721206665, + "logits/rejected": -3.1931397914886475, + "logps/chosen": -493.45452880859375, + "logps/rejected": -1111.3388671875, + "loss": 0.5583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49705660343170166, + "rewards/margins": 3.054591655731201, + "rewards/rejected": -2.55753493309021, + "step": 269 + }, + { + "epoch": 0.34, + "learning_rate": 7.63009313795917e-08, + "logits/chosen": -3.3014183044433594, + "logits/rejected": -3.1402721405029297, + "logps/chosen": -542.9127197265625, + "logps/rejected": -1497.6881103515625, + "loss": 0.6119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6127411127090454, + "rewards/margins": 3.806485176086426, + "rewards/rejected": -3.193743944168091, + "step": 270 + }, + { + "epoch": 0.35, + "learning_rate": 7.612492823579743e-08, + "logits/chosen": -3.307462692260742, + "logits/rejected": -3.2054524421691895, + "logps/chosen": -460.15545654296875, + "logps/rejected": -1491.4842529296875, + "loss": 0.5414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5830551385879517, + "rewards/margins": 3.655534267425537, + "rewards/rejected": -3.072479248046875, + "step": 271 + }, + { + "epoch": 0.35, + "learning_rate": 7.594847868906076e-08, + "logits/chosen": -3.29742693901062, + "logits/rejected": -3.1156504154205322, + "logps/chosen": -419.61517333984375, + "logps/rejected": -1150.67919921875, + "loss": 0.5549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5511184930801392, + "rewards/margins": 3.134263753890991, + "rewards/rejected": -2.5831451416015625, + "step": 272 + }, + { + "epoch": 0.35, + "learning_rate": 7.577158575441756e-08, + "logits/chosen": -3.24314546585083, + "logits/rejected": -3.123805046081543, + "logps/chosen": -511.328369140625, + "logps/rejected": -1243.2158203125, + "loss": 0.5836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6896454095840454, + "rewards/margins": 3.2666354179382324, + "rewards/rejected": -2.5769898891448975, + "step": 273 + }, + { + "epoch": 0.35, + "learning_rate": 7.559425245448005e-08, + "logits/chosen": -3.2434139251708984, + "logits/rejected": -3.128997325897217, + "logps/chosen": -522.6084594726562, + "logps/rejected": -1608.6058349609375, + "loss": 0.5843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3480331301689148, + "rewards/margins": 4.1088547706604, + "rewards/rejected": -3.760821580886841, + "step": 274 + }, + { + "epoch": 0.35, + "learning_rate": 7.541648181938503e-08, + "logits/chosen": -3.202698230743408, + "logits/rejected": -3.1456456184387207, + "logps/chosen": -477.0115661621094, + "logps/rejected": -1309.760009765625, + "loss": 0.5558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8294357657432556, + "rewards/margins": 3.4896163940429688, + "rewards/rejected": -2.6601808071136475, + "step": 275 + }, + { + "epoch": 0.35, + "learning_rate": 7.523827688674219e-08, + "logits/chosen": -3.272153854370117, + "logits/rejected": -3.1193830966949463, + "logps/chosen": -447.764892578125, + "logps/rejected": -1508.7484130859375, + "loss": 0.5902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5558853149414062, + "rewards/margins": 3.307408332824707, + "rewards/rejected": -2.751523017883301, + "step": 276 + }, + { + "epoch": 0.35, + "learning_rate": 7.505964070158213e-08, + "logits/chosen": -3.279092788696289, + "logits/rejected": -3.1393537521362305, + "logps/chosen": -484.9269104003906, + "logps/rejected": -1027.2591552734375, + "loss": 0.5603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6568511724472046, + "rewards/margins": 3.432919502258301, + "rewards/rejected": -2.7760682106018066, + "step": 277 + }, + { + "epoch": 0.35, + "learning_rate": 7.488057631630437e-08, + "logits/chosen": -3.188724994659424, + "logits/rejected": -3.0750856399536133, + "logps/chosen": -493.927001953125, + "logps/rejected": -1765.1219482421875, + "loss": 0.5156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6551041007041931, + "rewards/margins": 3.7536821365356445, + "rewards/rejected": -3.0985779762268066, + "step": 278 + }, + { + "epoch": 0.36, + "learning_rate": 7.47010867906252e-08, + "logits/chosen": -3.3262779712677, + "logits/rejected": -3.1908302307128906, + "logps/chosen": -527.8447265625, + "logps/rejected": -959.5848999023438, + "loss": 0.6101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.73138427734375, + "rewards/margins": 2.6063356399536133, + "rewards/rejected": -1.8749512434005737, + "step": 279 + }, + { + "epoch": 0.36, + "learning_rate": 7.452117519152541e-08, + "logits/chosen": -3.2922773361206055, + "logits/rejected": -3.2023167610168457, + "logps/chosen": -503.45745849609375, + "logps/rejected": -900.15576171875, + "loss": 0.6087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6017990112304688, + "rewards/margins": 2.3447341918945312, + "rewards/rejected": -1.7429351806640625, + "step": 280 + }, + { + "epoch": 0.36, + "learning_rate": 7.434084459319781e-08, + "logits/chosen": -3.2553229331970215, + "logits/rejected": -3.1708853244781494, + "logps/chosen": -523.97998046875, + "logps/rejected": -1769.17529296875, + "loss": 0.5932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5876601934432983, + "rewards/margins": 3.6862869262695312, + "rewards/rejected": -3.0986266136169434, + "step": 281 + }, + { + "epoch": 0.36, + "learning_rate": 7.41600980769948e-08, + "logits/chosen": -3.253493309020996, + "logits/rejected": -3.238476276397705, + "logps/chosen": -485.91632080078125, + "logps/rejected": -1152.43505859375, + "loss": 0.5138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7503601312637329, + "rewards/margins": 2.893756151199341, + "rewards/rejected": -2.1433959007263184, + "step": 282 + }, + { + "epoch": 0.36, + "learning_rate": 7.397893873137563e-08, + "logits/chosen": -3.286243200302124, + "logits/rejected": -3.192316770553589, + "logps/chosen": -515.83154296875, + "logps/rejected": -1453.166015625, + "loss": 0.5637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5717346668243408, + "rewards/margins": 3.79085111618042, + "rewards/rejected": -3.2191162109375, + "step": 283 + }, + { + "epoch": 0.36, + "learning_rate": 7.379736965185368e-08, + "logits/chosen": -3.2562406063079834, + "logits/rejected": -3.207949161529541, + "logps/chosen": -505.8721923828125, + "logps/rejected": -1058.2449951171875, + "loss": 0.6213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6599823236465454, + "rewards/margins": 2.964709520339966, + "rewards/rejected": -2.30472731590271, + "step": 284 + }, + { + "epoch": 0.36, + "learning_rate": 7.361539394094355e-08, + "logits/chosen": -3.307211399078369, + "logits/rejected": -3.1344971656799316, + "logps/chosen": -553.2413330078125, + "logps/rejected": -1288.6865234375, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7006805539131165, + "rewards/margins": 3.6877808570861816, + "rewards/rejected": -2.987100124359131, + "step": 285 + }, + { + "epoch": 0.36, + "learning_rate": 7.343301470810807e-08, + "logits/chosen": -3.2022414207458496, + "logits/rejected": -3.245528221130371, + "logps/chosen": -625.6607055664062, + "logps/rejected": -1775.7685546875, + "loss": 0.58, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6999053955078125, + "rewards/margins": 4.744937419891357, + "rewards/rejected": -4.045032024383545, + "step": 286 + }, + { + "epoch": 0.37, + "learning_rate": 7.325023506970511e-08, + "logits/chosen": -3.2525782585144043, + "logits/rejected": -3.1666812896728516, + "logps/chosen": -509.12310791015625, + "logps/rejected": -1876.164794921875, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6422058343887329, + "rewards/margins": 4.717877388000488, + "rewards/rejected": -4.075671195983887, + "step": 287 + }, + { + "epoch": 0.37, + "learning_rate": 7.306705814893439e-08, + "logits/chosen": -3.2751858234405518, + "logits/rejected": -3.1567835807800293, + "logps/chosen": -504.23712158203125, + "logps/rejected": -1110.948486328125, + "loss": 0.6119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.612231433391571, + "rewards/margins": 2.923596143722534, + "rewards/rejected": -2.3113646507263184, + "step": 288 + }, + { + "epoch": 0.37, + "learning_rate": 7.288348707578408e-08, + "logits/chosen": -3.295417308807373, + "logits/rejected": -3.1814780235290527, + "logps/chosen": -497.0800476074219, + "logps/rejected": -1070.525390625, + "loss": 0.5412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.597930908203125, + "rewards/margins": 2.890856981277466, + "rewards/rejected": -2.292926073074341, + "step": 289 + }, + { + "epoch": 0.37, + "learning_rate": 7.269952498697734e-08, + "logits/chosen": -3.3059465885162354, + "logits/rejected": -3.135413408279419, + "logps/chosen": -502.43133544921875, + "logps/rejected": -2134.931640625, + "loss": 0.5554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6502563953399658, + "rewards/margins": 4.941577434539795, + "rewards/rejected": -4.29132080078125, + "step": 290 + }, + { + "epoch": 0.37, + "learning_rate": 7.251517502591869e-08, + "logits/chosen": -3.3251962661743164, + "logits/rejected": -3.118803024291992, + "logps/chosen": -493.92962646484375, + "logps/rejected": -1599.7274169921875, + "loss": 0.5683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.601666271686554, + "rewards/margins": 3.8695740699768066, + "rewards/rejected": -3.2679078578948975, + "step": 291 + }, + { + "epoch": 0.37, + "learning_rate": 7.233044034264033e-08, + "logits/chosen": -3.2398576736450195, + "logits/rejected": -3.215186595916748, + "logps/chosen": -537.4606323242188, + "logps/rejected": -1330.919921875, + "loss": 0.561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6178802251815796, + "rewards/margins": 4.216500759124756, + "rewards/rejected": -3.598620653152466, + "step": 292 + }, + { + "epoch": 0.37, + "learning_rate": 7.214532409374828e-08, + "logits/chosen": -3.3020131587982178, + "logits/rejected": -3.130143165588379, + "logps/chosen": -499.3557434082031, + "logps/rejected": -1582.3828125, + "loss": 0.5705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8810043334960938, + "rewards/margins": 4.277226448059082, + "rewards/rejected": -3.3962221145629883, + "step": 293 + }, + { + "epoch": 0.37, + "learning_rate": 7.195982944236851e-08, + "logits/chosen": -3.2537097930908203, + "logits/rejected": -3.1641321182250977, + "logps/chosen": -539.5465087890625, + "logps/rejected": -1301.560791015625, + "loss": 0.5869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.660430908203125, + "rewards/margins": 3.2617125511169434, + "rewards/rejected": -2.6012816429138184, + "step": 294 + }, + { + "epoch": 0.38, + "learning_rate": 7.17739595580928e-08, + "logits/chosen": -3.289416551589966, + "logits/rejected": -3.145017623901367, + "logps/chosen": -480.9229736328125, + "logps/rejected": -1964.491455078125, + "loss": 0.572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.868865966796875, + "rewards/margins": 4.826873779296875, + "rewards/rejected": -3.9580078125, + "step": 295 + }, + { + "epoch": 0.38, + "learning_rate": 7.158771761692464e-08, + "logits/chosen": -3.234396457672119, + "logits/rejected": -3.164405107498169, + "logps/chosen": -518.9449462890625, + "logps/rejected": -1060.22314453125, + "loss": 0.5402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6243377923965454, + "rewards/margins": 3.3655519485473633, + "rewards/rejected": -2.7412140369415283, + "step": 296 + }, + { + "epoch": 0.38, + "learning_rate": 7.140110680122495e-08, + "logits/chosen": -3.2662405967712402, + "logits/rejected": -3.202692985534668, + "logps/chosen": -540.21484375, + "logps/rejected": -1011.5703125, + "loss": 0.5573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6342239379882812, + "rewards/margins": 3.0427536964416504, + "rewards/rejected": -2.408529758453369, + "step": 297 + }, + { + "epoch": 0.38, + "learning_rate": 7.121413029965768e-08, + "logits/chosen": -3.2783737182617188, + "logits/rejected": -3.1664915084838867, + "logps/chosen": -515.596923828125, + "logps/rejected": -1425.23388671875, + "loss": 0.5568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.560589611530304, + "rewards/margins": 3.3839111328125, + "rewards/rejected": -2.823321580886841, + "step": 298 + }, + { + "epoch": 0.38, + "learning_rate": 7.102679130713537e-08, + "logits/chosen": -3.2752506732940674, + "logits/rejected": -3.1551549434661865, + "logps/chosen": -466.5396423339844, + "logps/rejected": -1465.9080810546875, + "loss": 0.5604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6105941534042358, + "rewards/margins": 3.8941664695739746, + "rewards/rejected": -3.2835724353790283, + "step": 299 + }, + { + "epoch": 0.38, + "learning_rate": 7.083909302476451e-08, + "logits/chosen": -3.292320728302002, + "logits/rejected": -3.0964300632476807, + "logps/chosen": -503.4748229980469, + "logps/rejected": -1146.38916015625, + "loss": 0.5635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7370712161064148, + "rewards/margins": 2.8991057872772217, + "rewards/rejected": -2.162034511566162, + "step": 300 + }, + { + "epoch": 0.38, + "learning_rate": 7.065103865979087e-08, + "logits/chosen": -3.2383155822753906, + "logits/rejected": -3.2083048820495605, + "logps/chosen": -486.32281494140625, + "logps/rejected": -1527.363037109375, + "loss": 0.5358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6574615240097046, + "rewards/margins": 4.015384197235107, + "rewards/rejected": -3.3579225540161133, + "step": 301 + }, + { + "epoch": 0.38, + "learning_rate": 7.046263142554469e-08, + "logits/chosen": -3.322004795074463, + "logits/rejected": -3.1994824409484863, + "logps/chosen": -534.7395629882812, + "logps/rejected": -1448.30615234375, + "loss": 0.6173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.734039306640625, + "rewards/margins": 3.9365234375, + "rewards/rejected": -3.202484130859375, + "step": 302 + }, + { + "epoch": 0.39, + "learning_rate": 7.027387454138578e-08, + "logits/chosen": -3.2648839950561523, + "logits/rejected": -3.189502716064453, + "logps/chosen": -505.83050537109375, + "logps/rejected": -1107.259521484375, + "loss": 0.5677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5624130368232727, + "rewards/margins": 3.063206672668457, + "rewards/rejected": -2.50079345703125, + "step": 303 + }, + { + "epoch": 0.39, + "learning_rate": 7.008477123264848e-08, + "logits/chosen": -3.282665729522705, + "logits/rejected": -3.0923571586608887, + "logps/chosen": -548.96484375, + "logps/rejected": -1447.387451171875, + "loss": 0.6044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6794418096542358, + "rewards/margins": 4.304771423339844, + "rewards/rejected": -3.6253297328948975, + "step": 304 + }, + { + "epoch": 0.39, + "learning_rate": 6.989532473058657e-08, + "logits/chosen": -3.29681396484375, + "logits/rejected": -3.177894353866577, + "logps/chosen": -518.0464477539062, + "logps/rejected": -1070.163818359375, + "loss": 0.5892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8184051513671875, + "rewards/margins": 2.93841552734375, + "rewards/rejected": -2.1200103759765625, + "step": 305 + }, + { + "epoch": 0.39, + "learning_rate": 6.970553827231808e-08, + "logits/chosen": -3.2772445678710938, + "logits/rejected": -3.1804494857788086, + "logps/chosen": -496.0229187011719, + "logps/rejected": -1441.908203125, + "loss": 0.532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6707611083984375, + "rewards/margins": 4.001278877258301, + "rewards/rejected": -3.330517530441284, + "step": 306 + }, + { + "epoch": 0.39, + "learning_rate": 6.951541510076994e-08, + "logits/chosen": -3.3040475845336914, + "logits/rejected": -3.098172187805176, + "logps/chosen": -525.9420776367188, + "logps/rejected": -1441.2646484375, + "loss": 0.5436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5691483020782471, + "rewards/margins": 3.7233595848083496, + "rewards/rejected": -3.1542115211486816, + "step": 307 + }, + { + "epoch": 0.39, + "learning_rate": 6.932495846462261e-08, + "logits/chosen": -3.241611957550049, + "logits/rejected": -3.1214675903320312, + "logps/chosen": -511.149169921875, + "logps/rejected": -1201.37353515625, + "loss": 0.5625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8703582882881165, + "rewards/margins": 3.636740207672119, + "rewards/rejected": -2.7663819789886475, + "step": 308 + }, + { + "epoch": 0.39, + "learning_rate": 6.913417161825448e-08, + "logits/chosen": -3.269946813583374, + "logits/rejected": -3.1245169639587402, + "logps/chosen": -537.40966796875, + "logps/rejected": -3897.479736328125, + "loss": 0.558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6811691522598267, + "rewards/margins": 4.951909065246582, + "rewards/rejected": -4.270740032196045, + "step": 309 + }, + { + "epoch": 0.4, + "learning_rate": 6.894305782168638e-08, + "logits/chosen": -3.2908859252929688, + "logits/rejected": -3.17708158493042, + "logps/chosen": -509.6315612792969, + "logps/rejected": -770.4804077148438, + "loss": 0.6083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8173401355743408, + "rewards/margins": 2.6736176013946533, + "rewards/rejected": -1.8562774658203125, + "step": 310 + }, + { + "epoch": 0.4, + "learning_rate": 6.875162034052578e-08, + "logits/chosen": -3.2483882904052734, + "logits/rejected": -3.178328037261963, + "logps/chosen": -481.593505859375, + "logps/rejected": -1139.06005859375, + "loss": 0.5224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7558258175849915, + "rewards/margins": 3.2546935081481934, + "rewards/rejected": -2.498867988586426, + "step": 311 + }, + { + "epoch": 0.4, + "learning_rate": 6.855986244591103e-08, + "logits/chosen": -3.2259602546691895, + "logits/rejected": -3.0397605895996094, + "logps/chosen": -508.4966735839844, + "logps/rejected": -1821.389404296875, + "loss": 0.5417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7778457403182983, + "rewards/margins": 4.704231262207031, + "rewards/rejected": -3.9263856410980225, + "step": 312 + }, + { + "epoch": 0.4, + "learning_rate": 6.836778741445549e-08, + "logits/chosen": -3.2468202114105225, + "logits/rejected": -3.1686525344848633, + "logps/chosen": -512.1435546875, + "logps/rejected": -1393.37890625, + "loss": 0.5893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8363479375839233, + "rewards/margins": 3.4575424194335938, + "rewards/rejected": -2.62119460105896, + "step": 313 + }, + { + "epoch": 0.4, + "learning_rate": 6.817539852819148e-08, + "logits/chosen": -3.2737936973571777, + "logits/rejected": -3.162179946899414, + "logps/chosen": -461.51605224609375, + "logps/rejected": -1107.6375732421875, + "loss": 0.5334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5938583612442017, + "rewards/margins": 3.6253769397735596, + "rewards/rejected": -3.0315184593200684, + "step": 314 + }, + { + "epoch": 0.4, + "learning_rate": 6.798269907451427e-08, + "logits/chosen": -3.261972427368164, + "logits/rejected": -3.180837392807007, + "logps/chosen": -475.5566711425781, + "logps/rejected": -2021.9117431640625, + "loss": 0.5127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7342026233673096, + "rewards/margins": 5.461790561676025, + "rewards/rejected": -4.727587699890137, + "step": 315 + }, + { + "epoch": 0.4, + "learning_rate": 6.778969234612582e-08, + "logits/chosen": -3.3123340606689453, + "logits/rejected": -3.174067497253418, + "logps/chosen": -534.5098876953125, + "logps/rejected": -832.287109375, + "loss": 0.55, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6043014526367188, + "rewards/margins": 2.4084181785583496, + "rewards/rejected": -1.8041167259216309, + "step": 316 + }, + { + "epoch": 0.4, + "learning_rate": 6.759638164097861e-08, + "logits/chosen": -3.220551013946533, + "logits/rejected": -3.133720874786377, + "logps/chosen": -557.38134765625, + "logps/rejected": -1374.9034423828125, + "loss": 0.6111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0582489967346191, + "rewards/margins": 4.112362861633301, + "rewards/rejected": -3.0541138648986816, + "step": 317 + }, + { + "epoch": 0.41, + "learning_rate": 6.740277026221922e-08, + "logits/chosen": -3.3078248500823975, + "logits/rejected": -3.122800827026367, + "logps/chosen": -527.6830444335938, + "logps/rejected": -1960.87060546875, + "loss": 0.5716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8015701770782471, + "rewards/margins": 5.3774614334106445, + "rewards/rejected": -4.575891017913818, + "step": 318 + }, + { + "epoch": 0.41, + "learning_rate": 6.720886151813194e-08, + "logits/chosen": -3.240435838699341, + "logits/rejected": -3.195396900177002, + "logps/chosen": -535.3953247070312, + "logps/rejected": -1088.4425048828125, + "loss": 0.5811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5798858404159546, + "rewards/margins": 2.9278078079223633, + "rewards/rejected": -2.347921848297119, + "step": 319 + }, + { + "epoch": 0.41, + "learning_rate": 6.701465872208215e-08, + "logits/chosen": -3.2881977558135986, + "logits/rejected": -3.1472926139831543, + "logps/chosen": -534.6431274414062, + "logps/rejected": -1400.3291015625, + "loss": 0.557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7195464968681335, + "rewards/margins": 3.892965793609619, + "rewards/rejected": -3.173419237136841, + "step": 320 + }, + { + "epoch": 0.41, + "learning_rate": 6.682016519245985e-08, + "logits/chosen": -3.225154399871826, + "logits/rejected": -3.1536521911621094, + "logps/chosen": -575.81640625, + "logps/rejected": -707.070068359375, + "loss": 0.5957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7504852414131165, + "rewards/margins": 2.1566193103790283, + "rewards/rejected": -1.406134009361267, + "step": 321 + }, + { + "epoch": 0.41, + "learning_rate": 6.662538425262284e-08, + "logits/chosen": -3.284726619720459, + "logits/rejected": -3.0842342376708984, + "logps/chosen": -502.68658447265625, + "logps/rejected": -3000.85546875, + "loss": 0.5431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5409119129180908, + "rewards/margins": 6.676049709320068, + "rewards/rejected": -6.135138034820557, + "step": 322 + }, + { + "epoch": 0.41, + "learning_rate": 6.643031923083994e-08, + "logits/chosen": -3.3026373386383057, + "logits/rejected": -3.188828945159912, + "logps/chosen": -454.2982177734375, + "logps/rejected": -1142.6494140625, + "loss": 0.5483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6398712396621704, + "rewards/margins": 3.309060573577881, + "rewards/rejected": -2.669189453125, + "step": 323 + }, + { + "epoch": 0.41, + "learning_rate": 6.623497346023418e-08, + "logits/chosen": -3.2985692024230957, + "logits/rejected": -3.2497453689575195, + "logps/chosen": -571.13134765625, + "logps/rejected": -1113.40234375, + "loss": 0.5777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8630096316337585, + "rewards/margins": 3.0893402099609375, + "rewards/rejected": -2.2263307571411133, + "step": 324 + }, + { + "epoch": 0.41, + "learning_rate": 6.603935027872579e-08, + "logits/chosen": -3.276716709136963, + "logits/rejected": -3.108443021774292, + "logps/chosen": -505.02435302734375, + "logps/rejected": -1223.531005859375, + "loss": 0.5306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6769027709960938, + "rewards/margins": 3.0828170776367188, + "rewards/rejected": -2.405914306640625, + "step": 325 + }, + { + "epoch": 0.42, + "learning_rate": 6.584345302897522e-08, + "logits/chosen": -3.3212075233459473, + "logits/rejected": -3.2655811309814453, + "logps/chosen": -483.2991027832031, + "logps/rejected": -1536.78125, + "loss": 0.5432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6567718982696533, + "rewards/margins": 3.681027412414551, + "rewards/rejected": -3.0242552757263184, + "step": 326 + }, + { + "epoch": 0.42, + "learning_rate": 6.564728505832595e-08, + "logits/chosen": -3.2335782051086426, + "logits/rejected": -3.062272548675537, + "logps/chosen": -538.1387939453125, + "logps/rejected": -1269.25390625, + "loss": 0.5794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8663971424102783, + "rewards/margins": 3.536648750305176, + "rewards/rejected": -2.6702513694763184, + "step": 327 + }, + { + "epoch": 0.42, + "learning_rate": 6.545084971874738e-08, + "logits/chosen": -3.2806639671325684, + "logits/rejected": -3.191408634185791, + "logps/chosen": -546.9816284179688, + "logps/rejected": -1693.43212890625, + "loss": 0.5927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7994720935821533, + "rewards/margins": 4.7996063232421875, + "rewards/rejected": -4.000134468078613, + "step": 328 + }, + { + "epoch": 0.42, + "learning_rate": 6.525415036677744e-08, + "logits/chosen": -3.2197868824005127, + "logits/rejected": -3.1377735137939453, + "logps/chosen": -472.61395263671875, + "logps/rejected": -2656.40478515625, + "loss": 0.5824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6798187494277954, + "rewards/margins": 5.656064033508301, + "rewards/rejected": -4.976245403289795, + "step": 329 + }, + { + "epoch": 0.42, + "learning_rate": 6.505719036346537e-08, + "logits/chosen": -3.220374822616577, + "logits/rejected": -3.149477958679199, + "logps/chosen": -513.1549682617188, + "logps/rejected": -1700.8990478515625, + "loss": 0.5561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.75836181640625, + "rewards/margins": 4.622533798217773, + "rewards/rejected": -3.8641724586486816, + "step": 330 + }, + { + "epoch": 0.42, + "learning_rate": 6.485997307431419e-08, + "logits/chosen": -3.2876381874084473, + "logits/rejected": -3.210109233856201, + "logps/chosen": -500.8257141113281, + "logps/rejected": -1259.3458251953125, + "loss": 0.5395, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8053314089775085, + "rewards/margins": 3.8824706077575684, + "rewards/rejected": -3.077139377593994, + "step": 331 + }, + { + "epoch": 0.42, + "learning_rate": 6.466250186922324e-08, + "logits/chosen": -3.230431318283081, + "logits/rejected": -3.0889358520507812, + "logps/chosen": -508.26019287109375, + "logps/rejected": -1416.9833984375, + "loss": 0.513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7266479730606079, + "rewards/margins": 3.8278136253356934, + "rewards/rejected": -3.101165771484375, + "step": 332 + }, + { + "epoch": 0.42, + "learning_rate": 6.446478012243055e-08, + "logits/chosen": -3.284820079803467, + "logits/rejected": -3.176270008087158, + "logps/chosen": -484.19183349609375, + "logps/rejected": -1164.4117431640625, + "loss": 0.5378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8000564575195312, + "rewards/margins": 3.1949996948242188, + "rewards/rejected": -2.3949432373046875, + "step": 333 + }, + { + "epoch": 0.43, + "learning_rate": 6.426681121245526e-08, + "logits/chosen": -3.1875717639923096, + "logits/rejected": -3.103623390197754, + "logps/chosen": -521.1002807617188, + "logps/rejected": -832.9805908203125, + "loss": 0.5766, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8059494495391846, + "rewards/margins": 2.8037309646606445, + "rewards/rejected": -1.9977813959121704, + "step": 334 + }, + { + "epoch": 0.43, + "learning_rate": 6.406859852203981e-08, + "logits/chosen": -3.287693738937378, + "logits/rejected": -3.1650633811950684, + "logps/chosen": -486.66473388671875, + "logps/rejected": -1219.488037109375, + "loss": 0.5741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6724945306777954, + "rewards/margins": 3.4315032958984375, + "rewards/rejected": -2.7590088844299316, + "step": 335 + }, + { + "epoch": 0.43, + "learning_rate": 6.387014543809223e-08, + "logits/chosen": -3.302924871444702, + "logits/rejected": -3.2094759941101074, + "logps/chosen": -508.908203125, + "logps/rejected": -1132.064453125, + "loss": 0.5392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6480133533477783, + "rewards/margins": 3.4104278087615967, + "rewards/rejected": -2.7624144554138184, + "step": 336 + }, + { + "epoch": 0.43, + "learning_rate": 6.367145535162812e-08, + "logits/chosen": -3.2779791355133057, + "logits/rejected": -3.101553201675415, + "logps/chosen": -488.5245361328125, + "logps/rejected": -952.9678344726562, + "loss": 0.5491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.741912841796875, + "rewards/margins": 2.539346218109131, + "rewards/rejected": -1.797433614730835, + "step": 337 + }, + { + "epoch": 0.43, + "learning_rate": 6.347253165771289e-08, + "logits/chosen": -3.312553882598877, + "logits/rejected": -3.2179059982299805, + "logps/chosen": -488.763427734375, + "logps/rejected": -874.90185546875, + "loss": 0.5342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7298752069473267, + "rewards/margins": 2.498683214187622, + "rewards/rejected": -1.7688080072402954, + "step": 338 + }, + { + "epoch": 0.43, + "learning_rate": 6.327337775540361e-08, + "logits/chosen": -3.3398189544677734, + "logits/rejected": -3.269469738006592, + "logps/chosen": -473.88787841796875, + "logps/rejected": -1221.177978515625, + "loss": 0.5528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5913742184638977, + "rewards/margins": 3.547722101211548, + "rewards/rejected": -2.956347703933716, + "step": 339 + }, + { + "epoch": 0.43, + "learning_rate": 6.307399704769099e-08, + "logits/chosen": -3.2465853691101074, + "logits/rejected": -3.18654465675354, + "logps/chosen": -451.9352111816406, + "logps/rejected": -2076.99609375, + "loss": 0.543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5801117420196533, + "rewards/margins": 5.222287178039551, + "rewards/rejected": -4.642175674438477, + "step": 340 + }, + { + "epoch": 0.43, + "learning_rate": 6.287439294144119e-08, + "logits/chosen": -3.2874622344970703, + "logits/rejected": -3.163883686065674, + "logps/chosen": -473.98211669921875, + "logps/rejected": -1173.72265625, + "loss": 0.5479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7335205078125, + "rewards/margins": 3.4779083728790283, + "rewards/rejected": -2.7443878650665283, + "step": 341 + }, + { + "epoch": 0.44, + "learning_rate": 6.26745688473377e-08, + "logits/chosen": -3.259183406829834, + "logits/rejected": -3.1068978309631348, + "logps/chosen": -504.97412109375, + "logps/rejected": -2045.420166015625, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9358733892440796, + "rewards/margins": 5.0012054443359375, + "rewards/rejected": -4.065332412719727, + "step": 342 + }, + { + "epoch": 0.44, + "learning_rate": 6.247452817982293e-08, + "logits/chosen": -3.1833407878875732, + "logits/rejected": -3.092222213745117, + "logps/chosen": -426.7772216796875, + "logps/rejected": -1000.989501953125, + "loss": 0.5543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6968567371368408, + "rewards/margins": 3.3136627674102783, + "rewards/rejected": -2.6168060302734375, + "step": 343 + }, + { + "epoch": 0.44, + "learning_rate": 6.227427435703996e-08, + "logits/chosen": -3.178605079650879, + "logits/rejected": -3.2027812004089355, + "logps/chosen": -537.3505859375, + "logps/rejected": -1221.7049560546875, + "loss": 0.5764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9452240467071533, + "rewards/margins": 4.205264091491699, + "rewards/rejected": -3.260040283203125, + "step": 344 + }, + { + "epoch": 0.44, + "learning_rate": 6.20738108007741e-08, + "logits/chosen": -3.2754361629486084, + "logits/rejected": -3.0789332389831543, + "logps/chosen": -483.54046630859375, + "logps/rejected": -2023.93408203125, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7785934209823608, + "rewards/margins": 5.880858421325684, + "rewards/rejected": -5.102264404296875, + "step": 345 + }, + { + "epoch": 0.44, + "learning_rate": 6.187314093639443e-08, + "logits/chosen": -3.3390636444091797, + "logits/rejected": -3.228574275970459, + "logps/chosen": -514.1868896484375, + "logps/rejected": -1500.512939453125, + "loss": 0.5454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6766525506973267, + "rewards/margins": 4.0555219650268555, + "rewards/rejected": -3.3788695335388184, + "step": 346 + }, + { + "epoch": 0.44, + "learning_rate": 6.167226819279528e-08, + "logits/chosen": -3.276970624923706, + "logits/rejected": -3.1952552795410156, + "logps/chosen": -532.3362426757812, + "logps/rejected": -1180.7923583984375, + "loss": 0.5894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9185745716094971, + "rewards/margins": 3.6910386085510254, + "rewards/rejected": -2.7724640369415283, + "step": 347 + }, + { + "epoch": 0.44, + "learning_rate": 6.147119600233758e-08, + "logits/chosen": -3.264439582824707, + "logits/rejected": -3.1260576248168945, + "logps/chosen": -555.0325317382812, + "logps/rejected": -4363.5966796875, + "loss": 0.5846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6645752191543579, + "rewards/margins": 6.0228271484375, + "rewards/rejected": -5.358252048492432, + "step": 348 + }, + { + "epoch": 0.44, + "learning_rate": 6.126992780079031e-08, + "logits/chosen": -3.2219090461730957, + "logits/rejected": -3.1914572715759277, + "logps/chosen": -519.6248779296875, + "logps/rejected": -741.7815551757812, + "loss": 0.5449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9599533081054688, + "rewards/margins": 2.527357578277588, + "rewards/rejected": -1.5674042701721191, + "step": 349 + }, + { + "epoch": 0.45, + "learning_rate": 6.106846702727172e-08, + "logits/chosen": -3.1874279975891113, + "logits/rejected": -3.132251262664795, + "logps/chosen": -472.0279235839844, + "logps/rejected": -1425.93212890625, + "loss": 0.5471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8173096179962158, + "rewards/margins": 4.48012113571167, + "rewards/rejected": -3.662811279296875, + "step": 350 + }, + { + "epoch": 0.45, + "learning_rate": 6.086681712419058e-08, + "logits/chosen": -3.293400287628174, + "logits/rejected": -3.2048516273498535, + "logps/chosen": -489.92974853515625, + "logps/rejected": -1298.0435791015625, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.726391613483429, + "rewards/margins": 3.9505434036254883, + "rewards/rejected": -3.224151611328125, + "step": 351 + }, + { + "epoch": 0.45, + "learning_rate": 6.066498153718735e-08, + "logits/chosen": -3.2669925689697266, + "logits/rejected": -3.253699779510498, + "logps/chosen": -569.7796020507812, + "logps/rejected": -1589.841064453125, + "loss": 0.5577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7352157831192017, + "rewards/margins": 4.301682949066162, + "rewards/rejected": -3.56646728515625, + "step": 352 + }, + { + "epoch": 0.45, + "learning_rate": 6.046296371507533e-08, + "logits/chosen": -3.310945749282837, + "logits/rejected": -3.1832571029663086, + "logps/chosen": -474.72503662109375, + "logps/rejected": -1959.502685546875, + "loss": 0.5463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7763855457305908, + "rewards/margins": 5.560998916625977, + "rewards/rejected": -4.784613132476807, + "step": 353 + }, + { + "epoch": 0.45, + "learning_rate": 6.02607671097817e-08, + "logits/chosen": -3.314014196395874, + "logits/rejected": -3.160698413848877, + "logps/chosen": -507.52581787109375, + "logps/rejected": -1108.5723876953125, + "loss": 0.57, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7367401123046875, + "rewards/margins": 3.256613254547119, + "rewards/rejected": -2.5198731422424316, + "step": 354 + }, + { + "epoch": 0.45, + "learning_rate": 6.005839517628861e-08, + "logits/chosen": -3.2997961044311523, + "logits/rejected": -3.1454195976257324, + "logps/chosen": -537.9642333984375, + "logps/rejected": -1148.164794921875, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.791485607624054, + "rewards/margins": 3.4946107864379883, + "rewards/rejected": -2.703125, + "step": 355 + }, + { + "epoch": 0.45, + "learning_rate": 5.985585137257401e-08, + "logits/chosen": -3.232290029525757, + "logits/rejected": -3.1478629112243652, + "logps/chosen": -523.9434814453125, + "logps/rejected": -998.603271484375, + "loss": 0.5918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4738296866416931, + "rewards/margins": 2.874983310699463, + "rewards/rejected": -2.401153564453125, + "step": 356 + }, + { + "epoch": 0.46, + "learning_rate": 5.965313915955268e-08, + "logits/chosen": -3.275529384613037, + "logits/rejected": -3.1203413009643555, + "logps/chosen": -472.4722595214844, + "logps/rejected": -1410.44189453125, + "loss": 0.5358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.507519543170929, + "rewards/margins": 3.572235107421875, + "rewards/rejected": -3.064715623855591, + "step": 357 + }, + { + "epoch": 0.46, + "learning_rate": 5.945026200101701e-08, + "logits/chosen": -3.3139142990112305, + "logits/rejected": -3.205871105194092, + "logps/chosen": -470.4710998535156, + "logps/rejected": -2318.08642578125, + "loss": 0.5207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6717498898506165, + "rewards/margins": 5.848849296569824, + "rewards/rejected": -5.177099704742432, + "step": 358 + }, + { + "epoch": 0.46, + "learning_rate": 5.9247223363577924e-08, + "logits/chosen": -3.23366117477417, + "logits/rejected": -3.2452340126037598, + "logps/chosen": -518.3046264648438, + "logps/rejected": -1400.648681640625, + "loss": 0.5621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9113662242889404, + "rewards/margins": 4.4918532371521, + "rewards/rejected": -3.5804872512817383, + "step": 359 + }, + { + "epoch": 0.46, + "learning_rate": 5.90440267166055e-08, + "logits/chosen": -3.25108003616333, + "logits/rejected": -3.1760551929473877, + "logps/chosen": -477.0731201171875, + "logps/rejected": -1159.895263671875, + "loss": 0.5463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5612594485282898, + "rewards/margins": 2.873631477355957, + "rewards/rejected": -2.3123717308044434, + "step": 360 + }, + { + "epoch": 0.46, + "learning_rate": 5.8840675532169806e-08, + "logits/chosen": -3.259692668914795, + "logits/rejected": -3.2096657752990723, + "logps/chosen": -484.2942810058594, + "logps/rejected": -1264.406982421875, + "loss": 0.5182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8355072736740112, + "rewards/margins": 3.874325752258301, + "rewards/rejected": -3.038818359375, + "step": 361 + }, + { + "epoch": 0.46, + "learning_rate": 5.8637173284981525e-08, + "logits/chosen": -3.343686103820801, + "logits/rejected": -3.242115020751953, + "logps/chosen": -478.250732421875, + "logps/rejected": -832.8880615234375, + "loss": 0.5939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7983200550079346, + "rewards/margins": 2.6013383865356445, + "rewards/rejected": -1.8030180931091309, + "step": 362 + }, + { + "epoch": 0.46, + "learning_rate": 5.843352345233257e-08, + "logits/chosen": -3.334282398223877, + "logits/rejected": -3.2646007537841797, + "logps/chosen": -464.56768798828125, + "logps/rejected": -1519.530517578125, + "loss": 0.5454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8550094366073608, + "rewards/margins": 5.320268154144287, + "rewards/rejected": -4.465258598327637, + "step": 363 + }, + { + "epoch": 0.46, + "learning_rate": 5.8229729514036697e-08, + "logits/chosen": -3.3561151027679443, + "logits/rejected": -3.1337854862213135, + "logps/chosen": -436.8844909667969, + "logps/rejected": -1532.0130615234375, + "loss": 0.5215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7772018909454346, + "rewards/margins": 3.6809403896331787, + "rewards/rejected": -2.903738498687744, + "step": 364 + }, + { + "epoch": 0.47, + "learning_rate": 5.802579495237003e-08, + "logits/chosen": -3.357820987701416, + "logits/rejected": -3.1438779830932617, + "logps/chosen": -529.4112548828125, + "logps/rejected": -1645.3294677734375, + "loss": 0.6011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7872207760810852, + "rewards/margins": 4.1994123458862305, + "rewards/rejected": -3.412191867828369, + "step": 365 + }, + { + "epoch": 0.47, + "learning_rate": 5.7821723252011546e-08, + "logits/chosen": -3.2753217220306396, + "logits/rejected": -3.191965341567993, + "logps/chosen": -489.88519287109375, + "logps/rejected": -1129.312744140625, + "loss": 0.5463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6377350091934204, + "rewards/margins": 3.2044036388397217, + "rewards/rejected": -2.566668748855591, + "step": 366 + }, + { + "epoch": 0.47, + "learning_rate": 5.7617517899983546e-08, + "logits/chosen": -3.321348190307617, + "logits/rejected": -3.1970291137695312, + "logps/chosen": -504.01824951171875, + "logps/rejected": -757.8255615234375, + "loss": 0.566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8347412347793579, + "rewards/margins": 2.590733528137207, + "rewards/rejected": -1.7559921741485596, + "step": 367 + }, + { + "epoch": 0.47, + "learning_rate": 5.741318238559209e-08, + "logits/chosen": -3.2528076171875, + "logits/rejected": -3.178328275680542, + "logps/chosen": -505.56048583984375, + "logps/rejected": -913.0062255859375, + "loss": 0.5718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7079681158065796, + "rewards/margins": 3.005337715148926, + "rewards/rejected": -2.2973694801330566, + "step": 368 + }, + { + "epoch": 0.47, + "learning_rate": 5.7208720200367334e-08, + "logits/chosen": -3.2704625129699707, + "logits/rejected": -3.233672618865967, + "logps/chosen": -506.3554992675781, + "logps/rejected": -1487.369140625, + "loss": 0.5486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5404205322265625, + "rewards/margins": 3.922384738922119, + "rewards/rejected": -3.3819642066955566, + "step": 369 + }, + { + "epoch": 0.47, + "learning_rate": 5.7004134838003895e-08, + "logits/chosen": -3.2704122066497803, + "logits/rejected": -3.1330466270446777, + "logps/chosen": -479.4857177734375, + "logps/rejected": -990.1763916015625, + "loss": 0.5701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7066742181777954, + "rewards/margins": 3.020150661468506, + "rewards/rejected": -2.3134765625, + "step": 370 + }, + { + "epoch": 0.47, + "learning_rate": 5.6799429794301135e-08, + "logits/chosen": -3.2975475788116455, + "logits/rejected": -3.1758503913879395, + "logps/chosen": -502.6893310546875, + "logps/rejected": -1186.4747314453125, + "loss": 0.5297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7026581168174744, + "rewards/margins": 3.5154175758361816, + "rewards/rejected": -2.8127593994140625, + "step": 371 + }, + { + "epoch": 0.47, + "learning_rate": 5.659460856710345e-08, + "logits/chosen": -3.3360040187835693, + "logits/rejected": -3.189126491546631, + "logps/chosen": -490.61767578125, + "logps/rejected": -1676.1728515625, + "loss": 0.5794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8694976568222046, + "rewards/margins": 4.949041843414307, + "rewards/rejected": -4.0795440673828125, + "step": 372 + }, + { + "epoch": 0.48, + "learning_rate": 5.63896746562405e-08, + "logits/chosen": -3.3216824531555176, + "logits/rejected": -3.0868818759918213, + "logps/chosen": -469.3122253417969, + "logps/rejected": -1677.80322265625, + "loss": 0.5354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7199646234512329, + "rewards/margins": 4.010339260101318, + "rewards/rejected": -3.290374755859375, + "step": 373 + }, + { + "epoch": 0.48, + "learning_rate": 5.618463156346739e-08, + "logits/chosen": -3.2430615425109863, + "logits/rejected": -3.028045177459717, + "logps/chosen": -546.699951171875, + "logps/rejected": -2276.915283203125, + "loss": 0.566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7388702630996704, + "rewards/margins": 4.871335029602051, + "rewards/rejected": -4.132464408874512, + "step": 374 + }, + { + "epoch": 0.48, + "learning_rate": 5.597948279240483e-08, + "logits/chosen": -3.303199052810669, + "logits/rejected": -3.167431592941284, + "logps/chosen": -508.6291809082031, + "logps/rejected": -931.2562255859375, + "loss": 0.5755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6494796872138977, + "rewards/margins": 2.8954665660858154, + "rewards/rejected": -2.2459869384765625, + "step": 375 + }, + { + "epoch": 0.48, + "learning_rate": 5.5774231848479313e-08, + "logits/chosen": -3.3543949127197266, + "logits/rejected": -3.170283317565918, + "logps/chosen": -460.4706726074219, + "logps/rejected": -1194.115966796875, + "loss": 0.5539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7671051025390625, + "rewards/margins": 3.641079902648926, + "rewards/rejected": -2.8739748001098633, + "step": 376 + }, + { + "epoch": 0.48, + "learning_rate": 5.556888223886315e-08, + "logits/chosen": -3.2624309062957764, + "logits/rejected": -3.2154622077941895, + "logps/chosen": -549.3995361328125, + "logps/rejected": -1026.098876953125, + "loss": 0.6262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6712554693222046, + "rewards/margins": 3.047811985015869, + "rewards/rejected": -2.376556396484375, + "step": 377 + }, + { + "epoch": 0.48, + "learning_rate": 5.536343747241459e-08, + "logits/chosen": -3.2207298278808594, + "logits/rejected": -3.0934665203094482, + "logps/chosen": -540.969482421875, + "logps/rejected": -1067.127685546875, + "loss": 0.5732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9861084222793579, + "rewards/margins": 3.2693848609924316, + "rewards/rejected": -2.2832765579223633, + "step": 378 + }, + { + "epoch": 0.48, + "learning_rate": 5.515790105961785e-08, + "logits/chosen": -3.3036904335021973, + "logits/rejected": -3.1555428504943848, + "logps/chosen": -489.5745544433594, + "logps/rejected": -1456.939697265625, + "loss": 0.541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7330887317657471, + "rewards/margins": 4.051997661590576, + "rewards/rejected": -3.31890869140625, + "step": 379 + }, + { + "epoch": 0.48, + "learning_rate": 5.495227651252314e-08, + "logits/chosen": -3.289823293685913, + "logits/rejected": -3.213009834289551, + "logps/chosen": -542.5499267578125, + "logps/rejected": -1108.21826171875, + "loss": 0.6154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6020752191543579, + "rewards/margins": 3.4272704124450684, + "rewards/rejected": -2.8251953125, + "step": 380 + }, + { + "epoch": 0.49, + "learning_rate": 5.474656734468662e-08, + "logits/chosen": -3.2583866119384766, + "logits/rejected": -3.1245312690734863, + "logps/chosen": -541.963134765625, + "logps/rejected": -2106.08984375, + "loss": 0.539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8055038452148438, + "rewards/margins": 4.844438076019287, + "rewards/rejected": -4.038934230804443, + "step": 381 + }, + { + "epoch": 0.49, + "learning_rate": 5.454077707111041e-08, + "logits/chosen": -3.315122127532959, + "logits/rejected": -3.064659595489502, + "logps/chosen": -499.416259765625, + "logps/rejected": -1309.0782470703125, + "loss": 0.5736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7751495838165283, + "rewards/margins": 3.28192138671875, + "rewards/rejected": -2.5067718029022217, + "step": 382 + }, + { + "epoch": 0.49, + "learning_rate": 5.433490920818249e-08, + "logits/chosen": -3.3006796836853027, + "logits/rejected": -3.208481788635254, + "logps/chosen": -524.7070922851562, + "logps/rejected": -1188.89599609375, + "loss": 0.5633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9193115234375, + "rewards/margins": 4.286383152008057, + "rewards/rejected": -3.3670716285705566, + "step": 383 + }, + { + "epoch": 0.49, + "learning_rate": 5.4128967273616623e-08, + "logits/chosen": -3.2542777061462402, + "logits/rejected": -3.0545029640197754, + "logps/chosen": -483.8607177734375, + "logps/rejected": -873.5701293945312, + "loss": 0.552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7175369262695312, + "rewards/margins": 2.3263320922851562, + "rewards/rejected": -1.608795166015625, + "step": 384 + }, + { + "epoch": 0.49, + "learning_rate": 5.392295478639225e-08, + "logits/chosen": -3.2456183433532715, + "logits/rejected": -3.167344331741333, + "logps/chosen": -449.5637512207031, + "logps/rejected": -2446.816650390625, + "loss": 0.5607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7231155633926392, + "rewards/margins": 6.57064962387085, + "rewards/rejected": -5.8475341796875, + "step": 385 + }, + { + "epoch": 0.49, + "learning_rate": 5.3716875266694385e-08, + "logits/chosen": -3.2641959190368652, + "logits/rejected": -3.131826400756836, + "logps/chosen": -482.5624694824219, + "logps/rejected": -1530.8052978515625, + "loss": 0.5846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.80108642578125, + "rewards/margins": 4.216827392578125, + "rewards/rejected": -3.415740966796875, + "step": 386 + }, + { + "epoch": 0.49, + "learning_rate": 5.351073223585341e-08, + "logits/chosen": -3.2300844192504883, + "logits/rejected": -3.141963481903076, + "logps/chosen": -555.1974487304688, + "logps/rejected": -2165.93896484375, + "loss": 0.5256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7572387456893921, + "rewards/margins": 5.12060546875, + "rewards/rejected": -4.363366603851318, + "step": 387 + }, + { + "epoch": 0.49, + "learning_rate": 5.3304529216284965e-08, + "logits/chosen": -3.329439163208008, + "logits/rejected": -3.276338577270508, + "logps/chosen": -525.1246337890625, + "logps/rejected": -1394.213134765625, + "loss": 0.553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8294677734375, + "rewards/margins": 4.190350532531738, + "rewards/rejected": -3.3608827590942383, + "step": 388 + }, + { + "epoch": 0.5, + "learning_rate": 5.309826973142973e-08, + "logits/chosen": -3.339341640472412, + "logits/rejected": -3.1334586143493652, + "logps/chosen": -508.73455810546875, + "logps/rejected": -1116.4482421875, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7305816411972046, + "rewards/margins": 3.1060423851013184, + "rewards/rejected": -2.3754608631134033, + "step": 389 + }, + { + "epoch": 0.5, + "learning_rate": 5.28919573056932e-08, + "logits/chosen": -3.275097131729126, + "logits/rejected": -3.182854652404785, + "logps/chosen": -447.2318115234375, + "logps/rejected": -1114.140380859375, + "loss": 0.5208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7710784673690796, + "rewards/margins": 3.741507053375244, + "rewards/rejected": -2.970428466796875, + "step": 390 + }, + { + "epoch": 0.5, + "learning_rate": 5.268559546438549e-08, + "logits/chosen": -3.2305402755737305, + "logits/rejected": -3.198777198791504, + "logps/chosen": -478.4827575683594, + "logps/rejected": -4191.4677734375, + "loss": 0.5281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8015182614326477, + "rewards/margins": 6.0830488204956055, + "rewards/rejected": -5.281530857086182, + "step": 391 + }, + { + "epoch": 0.5, + "learning_rate": 5.2479187733661114e-08, + "logits/chosen": -3.257903575897217, + "logits/rejected": -3.2118325233459473, + "logps/chosen": -453.9065246582031, + "logps/rejected": -1270.6595458984375, + "loss": 0.5394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7810226678848267, + "rewards/margins": 3.8165664672851562, + "rewards/rejected": -3.035543918609619, + "step": 392 + }, + { + "epoch": 0.5, + "learning_rate": 5.227273764045868e-08, + "logits/chosen": -3.188377618789673, + "logits/rejected": -3.166889190673828, + "logps/chosen": -463.5711669921875, + "logps/rejected": -1352.3515625, + "loss": 0.4996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7903091907501221, + "rewards/margins": 4.0863237380981445, + "rewards/rejected": -3.2960143089294434, + "step": 393 + }, + { + "epoch": 0.5, + "learning_rate": 5.2066248712440654e-08, + "logits/chosen": -3.2622177600860596, + "logits/rejected": -3.1544198989868164, + "logps/chosen": -526.42724609375, + "logps/rejected": -977.880126953125, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.78338623046875, + "rewards/margins": 2.894509792327881, + "rewards/rejected": -2.111123561859131, + "step": 394 + }, + { + "epoch": 0.5, + "learning_rate": 5.185972447793312e-08, + "logits/chosen": -3.2708816528320312, + "logits/rejected": -3.0893335342407227, + "logps/chosen": -520.3009033203125, + "logps/rejected": -2109.151611328125, + "loss": 0.5532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8595306277275085, + "rewards/margins": 5.863623142242432, + "rewards/rejected": -5.004092216491699, + "step": 395 + }, + { + "epoch": 0.5, + "learning_rate": 5.16531684658654e-08, + "logits/chosen": -3.322185754776001, + "logits/rejected": -3.1788413524627686, + "logps/chosen": -498.4941101074219, + "logps/rejected": -987.5025634765625, + "loss": 0.607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6714935302734375, + "rewards/margins": 2.8516478538513184, + "rewards/rejected": -2.180154323577881, + "step": 396 + }, + { + "epoch": 0.51, + "learning_rate": 5.1446584205709856e-08, + "logits/chosen": -3.298698902130127, + "logits/rejected": -3.1866073608398438, + "logps/chosen": -487.543212890625, + "logps/rejected": -1241.435546875, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6389496326446533, + "rewards/margins": 3.648303508758545, + "rewards/rejected": -3.0093536376953125, + "step": 397 + }, + { + "epoch": 0.51, + "learning_rate": 5.123997522742151e-08, + "logits/chosen": -3.3021395206451416, + "logits/rejected": -3.226907730102539, + "logps/chosen": -438.7533874511719, + "logps/rejected": -1249.17041015625, + "loss": 0.5206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7893127202987671, + "rewards/margins": 4.234247207641602, + "rewards/rejected": -3.444934129714966, + "step": 398 + }, + { + "epoch": 0.51, + "learning_rate": 5.103334506137772e-08, + "logits/chosen": -3.2073516845703125, + "logits/rejected": -3.085909843444824, + "logps/chosen": -519.4733276367188, + "logps/rejected": -1362.2303466796875, + "loss": 0.5707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.890869140625, + "rewards/margins": 3.9750123023986816, + "rewards/rejected": -3.0841431617736816, + "step": 399 + }, + { + "epoch": 0.51, + "learning_rate": 5.082669723831793e-08, + "logits/chosen": -3.2338547706604004, + "logits/rejected": -3.016464948654175, + "logps/chosen": -529.8164672851562, + "logps/rejected": -1963.080078125, + "loss": 0.537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8087142705917358, + "rewards/margins": 5.407933235168457, + "rewards/rejected": -4.599218845367432, + "step": 400 + }, + { + "epoch": 0.51, + "learning_rate": 5.062003528928327e-08, + "logits/chosen": -3.2518911361694336, + "logits/rejected": -3.087702751159668, + "logps/chosen": -525.1804809570312, + "logps/rejected": -2082.611083984375, + "loss": 0.5711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7724365592002869, + "rewards/margins": 5.479861259460449, + "rewards/rejected": -4.707425117492676, + "step": 401 + }, + { + "epoch": 0.51, + "learning_rate": 5.041336274555624e-08, + "logits/chosen": -3.2790093421936035, + "logits/rejected": -3.1709649562835693, + "logps/chosen": -508.2475891113281, + "logps/rejected": -1544.721923828125, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8135955929756165, + "rewards/margins": 4.935275554656982, + "rewards/rejected": -4.121679782867432, + "step": 402 + }, + { + "epoch": 0.51, + "learning_rate": 5.0206683138600414e-08, + "logits/chosen": -3.209660530090332, + "logits/rejected": -3.1547181606292725, + "logps/chosen": -545.2484130859375, + "logps/rejected": -1061.9981689453125, + "loss": 0.6003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.96837317943573, + "rewards/margins": 3.3264665603637695, + "rewards/rejected": -2.35809326171875, + "step": 403 + }, + { + "epoch": 0.51, + "learning_rate": 5e-08, + "logits/chosen": -3.253588914871216, + "logits/rejected": -3.142024517059326, + "logps/chosen": -518.955322265625, + "logps/rejected": -702.0631103515625, + "loss": 0.6113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9420044422149658, + "rewards/margins": 2.8002443313598633, + "rewards/rejected": -1.8582396507263184, + "step": 404 + }, + { + "epoch": 0.52, + "learning_rate": 4.9793316861399595e-08, + "logits/chosen": -3.226717472076416, + "logits/rejected": -3.1314632892608643, + "logps/chosen": -501.78839111328125, + "logps/rejected": -2450.813720703125, + "loss": 0.5243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6937515735626221, + "rewards/margins": 6.886000633239746, + "rewards/rejected": -6.192248821258545, + "step": 405 + }, + { + "epoch": 0.52, + "learning_rate": 4.9586637254443753e-08, + "logits/chosen": -3.331528425216675, + "logits/rejected": -3.2903802394866943, + "logps/chosen": -501.75140380859375, + "logps/rejected": -1033.873046875, + "loss": 0.5599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7759307622909546, + "rewards/margins": 3.230865478515625, + "rewards/rejected": -2.45493483543396, + "step": 406 + }, + { + "epoch": 0.52, + "learning_rate": 4.937996471071675e-08, + "logits/chosen": -3.272279739379883, + "logits/rejected": -3.1669209003448486, + "logps/chosen": -583.1241455078125, + "logps/rejected": -1420.98681640625, + "loss": 0.5712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.731719970703125, + "rewards/margins": 4.053491115570068, + "rewards/rejected": -3.3217711448669434, + "step": 407 + }, + { + "epoch": 0.52, + "learning_rate": 4.917330276168207e-08, + "logits/chosen": -3.2611231803894043, + "logits/rejected": -3.0977938175201416, + "logps/chosen": -511.4303894042969, + "logps/rejected": -1334.5330810546875, + "loss": 0.5523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9417572021484375, + "rewards/margins": 4.4424591064453125, + "rewards/rejected": -3.500701904296875, + "step": 408 + }, + { + "epoch": 0.52, + "learning_rate": 4.8966654938622295e-08, + "logits/chosen": -3.2372748851776123, + "logits/rejected": -3.0310862064361572, + "logps/chosen": -536.0106201171875, + "logps/rejected": -3671.80419921875, + "loss": 0.5877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9387726187705994, + "rewards/margins": 7.9192047119140625, + "rewards/rejected": -6.980432033538818, + "step": 409 + }, + { + "epoch": 0.52, + "learning_rate": 4.8760024772578495e-08, + "logits/chosen": -3.250983715057373, + "logits/rejected": -3.1515111923217773, + "logps/chosen": -558.0955200195312, + "logps/rejected": -1549.90283203125, + "loss": 0.5495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9260162711143494, + "rewards/margins": 4.654562473297119, + "rewards/rejected": -3.728546142578125, + "step": 410 + }, + { + "epoch": 0.52, + "learning_rate": 4.855341579429014e-08, + "logits/chosen": -3.2875542640686035, + "logits/rejected": -3.1975202560424805, + "logps/chosen": -525.0045166015625, + "logps/rejected": -1538.3507080078125, + "loss": 0.5608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8718109130859375, + "rewards/margins": 4.473978042602539, + "rewards/rejected": -3.6021666526794434, + "step": 411 + }, + { + "epoch": 0.53, + "learning_rate": 4.834683153413459e-08, + "logits/chosen": -3.313408851623535, + "logits/rejected": -3.1769893169403076, + "logps/chosen": -526.3875732421875, + "logps/rejected": -1277.9354248046875, + "loss": 0.6026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7539047002792358, + "rewards/margins": 3.911778450012207, + "rewards/rejected": -3.1578736305236816, + "step": 412 + }, + { + "epoch": 0.53, + "learning_rate": 4.814027552206689e-08, + "logits/chosen": -3.263566493988037, + "logits/rejected": -3.1279783248901367, + "logps/chosen": -474.3828125, + "logps/rejected": -1153.514892578125, + "loss": 0.5474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7441024780273438, + "rewards/margins": 3.3538529872894287, + "rewards/rejected": -2.609750509262085, + "step": 413 + }, + { + "epoch": 0.53, + "learning_rate": 4.793375128755933e-08, + "logits/chosen": -3.226339817047119, + "logits/rejected": -3.110654830932617, + "logps/chosen": -574.3088989257812, + "logps/rejected": -2142.31982421875, + "loss": 0.594, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0597107410430908, + "rewards/margins": 5.717516899108887, + "rewards/rejected": -4.657806396484375, + "step": 414 + }, + { + "epoch": 0.53, + "learning_rate": 4.7727262359541324e-08, + "logits/chosen": -3.2998404502868652, + "logits/rejected": -3.155292510986328, + "logps/chosen": -517.1890869140625, + "logps/rejected": -894.0174560546875, + "loss": 0.5346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8177475333213806, + "rewards/margins": 2.5435287952423096, + "rewards/rejected": -1.7257813215255737, + "step": 415 + }, + { + "epoch": 0.53, + "learning_rate": 4.7520812266338875e-08, + "logits/chosen": -3.2651491165161133, + "logits/rejected": -3.0860886573791504, + "logps/chosen": -434.2261047363281, + "logps/rejected": -1392.79052734375, + "loss": 0.5332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8321518301963806, + "rewards/margins": 4.299989223480225, + "rewards/rejected": -3.467837333679199, + "step": 416 + }, + { + "epoch": 0.53, + "learning_rate": 4.7314404535614514e-08, + "logits/chosen": -3.257119655609131, + "logits/rejected": -3.194373607635498, + "logps/chosen": -536.0596923828125, + "logps/rejected": -1360.5789794921875, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8962463140487671, + "rewards/margins": 3.9956116676330566, + "rewards/rejected": -3.099365234375, + "step": 417 + }, + { + "epoch": 0.53, + "learning_rate": 4.7108042694306806e-08, + "logits/chosen": -3.2513985633850098, + "logits/rejected": -3.176677703857422, + "logps/chosen": -506.5772705078125, + "logps/rejected": -1120.0186767578125, + "loss": 0.6008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6529983282089233, + "rewards/margins": 3.299748420715332, + "rewards/rejected": -2.646749973297119, + "step": 418 + }, + { + "epoch": 0.53, + "learning_rate": 4.690173026857027e-08, + "logits/chosen": -3.1804425716400146, + "logits/rejected": -3.038257122039795, + "logps/chosen": -491.39984130859375, + "logps/rejected": -2584.3828125, + "loss": 0.5701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9195098876953125, + "rewards/margins": 5.689971923828125, + "rewards/rejected": -4.7704620361328125, + "step": 419 + }, + { + "epoch": 0.54, + "learning_rate": 4.669547078371503e-08, + "logits/chosen": -3.2569029331207275, + "logits/rejected": -3.060784339904785, + "logps/chosen": -500.71417236328125, + "logps/rejected": -999.80029296875, + "loss": 0.5695, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.041864037513733, + "rewards/margins": 3.788888692855835, + "rewards/rejected": -2.7470245361328125, + "step": 420 + }, + { + "epoch": 0.54, + "learning_rate": 4.648926776414659e-08, + "logits/chosen": -3.304374933242798, + "logits/rejected": -3.21266508102417, + "logps/chosen": -488.4495849609375, + "logps/rejected": -1280.0389404296875, + "loss": 0.5509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9674332141876221, + "rewards/margins": 4.448378086090088, + "rewards/rejected": -3.480944871902466, + "step": 421 + }, + { + "epoch": 0.54, + "learning_rate": 4.6283124733305623e-08, + "logits/chosen": -3.2459185123443604, + "logits/rejected": -3.055206775665283, + "logps/chosen": -572.7269287109375, + "logps/rejected": -2822.139892578125, + "loss": 0.6, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.95550537109375, + "rewards/margins": 6.08709716796875, + "rewards/rejected": -5.131591796875, + "step": 422 + }, + { + "epoch": 0.54, + "learning_rate": 4.6077045213607754e-08, + "logits/chosen": -3.284116268157959, + "logits/rejected": -3.100625514984131, + "logps/chosen": -477.7874755859375, + "logps/rejected": -1888.757568359375, + "loss": 0.5283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.879101574420929, + "rewards/margins": 4.867459297180176, + "rewards/rejected": -3.9883575439453125, + "step": 423 + }, + { + "epoch": 0.54, + "learning_rate": 4.5871032726383385e-08, + "logits/chosen": -3.259796142578125, + "logits/rejected": -3.1783206462860107, + "logps/chosen": -480.91375732421875, + "logps/rejected": -1456.26171875, + "loss": 0.5153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7921936511993408, + "rewards/margins": 4.298956394195557, + "rewards/rejected": -3.506762742996216, + "step": 424 + }, + { + "epoch": 0.54, + "learning_rate": 4.566509079181751e-08, + "logits/chosen": -3.2577202320098877, + "logits/rejected": -3.1663568019866943, + "logps/chosen": -478.090087890625, + "logps/rejected": -1231.5863037109375, + "loss": 0.5488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8962936401367188, + "rewards/margins": 3.9565842151641846, + "rewards/rejected": -3.060290575027466, + "step": 425 + }, + { + "epoch": 0.54, + "learning_rate": 4.5459222928889584e-08, + "logits/chosen": -3.2588114738464355, + "logits/rejected": -3.1720972061157227, + "logps/chosen": -487.3298645019531, + "logps/rejected": -1294.3941650390625, + "loss": 0.5663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8414748907089233, + "rewards/margins": 3.9226458072662354, + "rewards/rejected": -3.0811707973480225, + "step": 426 + }, + { + "epoch": 0.54, + "learning_rate": 4.525343265531338e-08, + "logits/chosen": -3.2940611839294434, + "logits/rejected": -3.1932029724121094, + "logps/chosen": -564.9061279296875, + "logps/rejected": -1290.346923828125, + "loss": 0.5523, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0330567359924316, + "rewards/margins": 4.234790325164795, + "rewards/rejected": -3.2017335891723633, + "step": 427 + }, + { + "epoch": 0.55, + "learning_rate": 4.504772348747686e-08, + "logits/chosen": -3.289931297302246, + "logits/rejected": -3.126323699951172, + "logps/chosen": -500.18377685546875, + "logps/rejected": -761.504150390625, + "loss": 0.5697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7872451543807983, + "rewards/margins": 2.3052139282226562, + "rewards/rejected": -1.5179688930511475, + "step": 428 + }, + { + "epoch": 0.55, + "learning_rate": 4.484209894038215e-08, + "logits/chosen": -3.275205612182617, + "logits/rejected": -3.0982513427734375, + "logps/chosen": -444.5372009277344, + "logps/rejected": -1329.6719970703125, + "loss": 0.5223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.95538330078125, + "rewards/margins": 3.6394410133361816, + "rewards/rejected": -2.6840577125549316, + "step": 429 + }, + { + "epoch": 0.55, + "learning_rate": 4.463656252758542e-08, + "logits/chosen": -3.298576831817627, + "logits/rejected": -3.167227268218994, + "logps/chosen": -473.994384765625, + "logps/rejected": -1479.6513671875, + "loss": 0.5348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0022156238555908, + "rewards/margins": 4.377306938171387, + "rewards/rejected": -3.375091552734375, + "step": 430 + }, + { + "epoch": 0.55, + "learning_rate": 4.443111776113686e-08, + "logits/chosen": -3.193350076675415, + "logits/rejected": -3.091433048248291, + "logps/chosen": -541.4193115234375, + "logps/rejected": -1236.602783203125, + "loss": 0.5776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7242599725723267, + "rewards/margins": 4.065752029418945, + "rewards/rejected": -3.34149169921875, + "step": 431 + }, + { + "epoch": 0.55, + "learning_rate": 4.4225768151520695e-08, + "logits/chosen": -3.2708849906921387, + "logits/rejected": -3.24082088470459, + "logps/chosen": -527.395751953125, + "logps/rejected": -1612.8192138671875, + "loss": 0.5655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7062774896621704, + "rewards/margins": 4.861056327819824, + "rewards/rejected": -4.154779434204102, + "step": 432 + }, + { + "epoch": 0.55, + "learning_rate": 4.402051720759518e-08, + "logits/chosen": -3.2586121559143066, + "logits/rejected": -3.166844606399536, + "logps/chosen": -492.591552734375, + "logps/rejected": -1872.53369140625, + "loss": 0.5375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7433944940567017, + "rewards/margins": 5.091856479644775, + "rewards/rejected": -4.348462104797363, + "step": 433 + }, + { + "epoch": 0.55, + "learning_rate": 4.3815368436532614e-08, + "logits/chosen": -3.2569241523742676, + "logits/rejected": -3.0658843517303467, + "logps/chosen": -497.9145812988281, + "logps/rejected": -1166.024169921875, + "loss": 0.5799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9569061398506165, + "rewards/margins": 3.5201079845428467, + "rewards/rejected": -2.563201904296875, + "step": 434 + }, + { + "epoch": 0.55, + "learning_rate": 4.361032534375951e-08, + "logits/chosen": -3.20474910736084, + "logits/rejected": -3.127595901489258, + "logps/chosen": -493.52935791015625, + "logps/rejected": -1514.281005859375, + "loss": 0.5263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8627029657363892, + "rewards/margins": 4.445887565612793, + "rewards/rejected": -3.5831847190856934, + "step": 435 + }, + { + "epoch": 0.56, + "learning_rate": 4.340539143289655e-08, + "logits/chosen": -3.210862636566162, + "logits/rejected": -3.1659746170043945, + "logps/chosen": -539.3200073242188, + "logps/rejected": -2042.026611328125, + "loss": 0.594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8773987293243408, + "rewards/margins": 5.801355361938477, + "rewards/rejected": -4.923956394195557, + "step": 436 + }, + { + "epoch": 0.56, + "learning_rate": 4.320057020569888e-08, + "logits/chosen": -3.253671169281006, + "logits/rejected": -3.1351053714752197, + "logps/chosen": -542.253173828125, + "logps/rejected": -1783.2301025390625, + "loss": 0.5631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9128631353378296, + "rewards/margins": 5.280911445617676, + "rewards/rejected": -4.368048191070557, + "step": 437 + }, + { + "epoch": 0.56, + "learning_rate": 4.29958651619961e-08, + "logits/chosen": -3.2961487770080566, + "logits/rejected": -3.1621086597442627, + "logps/chosen": -469.3222961425781, + "logps/rejected": -1355.228271484375, + "loss": 0.5216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7197158932685852, + "rewards/margins": 4.414302349090576, + "rewards/rejected": -3.6945862770080566, + "step": 438 + }, + { + "epoch": 0.56, + "learning_rate": 4.279127979963266e-08, + "logits/chosen": -3.2199552059173584, + "logits/rejected": -3.169954776763916, + "logps/chosen": -537.8057861328125, + "logps/rejected": -1394.458251953125, + "loss": 0.5567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.927075207233429, + "rewards/margins": 4.035147190093994, + "rewards/rejected": -3.108071804046631, + "step": 439 + }, + { + "epoch": 0.56, + "learning_rate": 4.2586817614407896e-08, + "logits/chosen": -3.277587890625, + "logits/rejected": -3.2632737159729004, + "logps/chosen": -559.5659790039062, + "logps/rejected": -985.3599243164062, + "loss": 0.5724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8304840326309204, + "rewards/margins": 3.461212158203125, + "rewards/rejected": -2.630728244781494, + "step": 440 + }, + { + "epoch": 0.56, + "learning_rate": 4.238248210001645e-08, + "logits/chosen": -3.337428569793701, + "logits/rejected": -3.147611379623413, + "logps/chosen": -496.21905517578125, + "logps/rejected": -1182.839111328125, + "loss": 0.5576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9234085083007812, + "rewards/margins": 3.4032485485076904, + "rewards/rejected": -2.479840040206909, + "step": 441 + }, + { + "epoch": 0.56, + "learning_rate": 4.217827674798844e-08, + "logits/chosen": -3.2665767669677734, + "logits/rejected": -3.1515655517578125, + "logps/chosen": -503.57183837890625, + "logps/rejected": -1761.1605224609375, + "loss": 0.5294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8025482296943665, + "rewards/margins": 4.647677421569824, + "rewards/rejected": -3.8451294898986816, + "step": 442 + }, + { + "epoch": 0.56, + "learning_rate": 4.197420504762997e-08, + "logits/chosen": -3.3112616539001465, + "logits/rejected": -3.1498444080352783, + "logps/chosen": -485.48858642578125, + "logps/rejected": -1055.8360595703125, + "loss": 0.5466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.838226318359375, + "rewards/margins": 3.6602964401245117, + "rewards/rejected": -2.822070360183716, + "step": 443 + }, + { + "epoch": 0.57, + "learning_rate": 4.177027048596329e-08, + "logits/chosen": -3.211991310119629, + "logits/rejected": -3.1814608573913574, + "logps/chosen": -608.3141479492188, + "logps/rejected": -983.6510620117188, + "loss": 0.5659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9082580804824829, + "rewards/margins": 3.1717376708984375, + "rewards/rejected": -2.263479709625244, + "step": 444 + }, + { + "epoch": 0.57, + "learning_rate": 4.156647654766743e-08, + "logits/chosen": -3.2023768424987793, + "logits/rejected": -3.113314151763916, + "logps/chosen": -513.4708251953125, + "logps/rejected": -1074.89208984375, + "loss": 0.5566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9659698605537415, + "rewards/margins": 3.3989503383636475, + "rewards/rejected": -2.432980537414551, + "step": 445 + }, + { + "epoch": 0.57, + "learning_rate": 4.13628267150185e-08, + "logits/chosen": -3.2579216957092285, + "logits/rejected": -3.161987781524658, + "logps/chosen": -463.6068420410156, + "logps/rejected": -1637.453125, + "loss": 0.5499, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.992443859577179, + "rewards/margins": 4.826208591461182, + "rewards/rejected": -3.8337645530700684, + "step": 446 + }, + { + "epoch": 0.57, + "learning_rate": 4.1159324467830196e-08, + "logits/chosen": -3.2482635974884033, + "logits/rejected": -3.119536876678467, + "logps/chosen": -471.5355529785156, + "logps/rejected": -1169.01318359375, + "loss": 0.5332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7519516348838806, + "rewards/margins": 3.903437614440918, + "rewards/rejected": -3.1514861583709717, + "step": 447 + }, + { + "epoch": 0.57, + "learning_rate": 4.095597328339452e-08, + "logits/chosen": -3.2599844932556152, + "logits/rejected": -3.1398143768310547, + "logps/chosen": -533.0091552734375, + "logps/rejected": -1340.2763671875, + "loss": 0.5411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5409119129180908, + "rewards/margins": 3.7788147926330566, + "rewards/rejected": -3.237902879714966, + "step": 448 + }, + { + "epoch": 0.57, + "learning_rate": 4.075277663642208e-08, + "logits/chosen": -3.304736375808716, + "logits/rejected": -3.2132511138916016, + "logps/chosen": -484.34454345703125, + "logps/rejected": -1191.693115234375, + "loss": 0.5621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8618271350860596, + "rewards/margins": 3.8652329444885254, + "rewards/rejected": -3.003405809402466, + "step": 449 + }, + { + "epoch": 0.57, + "learning_rate": 4.054973799898299e-08, + "logits/chosen": -3.2318007946014404, + "logits/rejected": -3.14064884185791, + "logps/chosen": -508.1822509765625, + "logps/rejected": -1322.78271484375, + "loss": 0.5339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7176010608673096, + "rewards/margins": 3.5872483253479004, + "rewards/rejected": -2.869647264480591, + "step": 450 + }, + { + "epoch": 0.57, + "learning_rate": 4.0346860840447325e-08, + "logits/chosen": -3.20090913772583, + "logits/rejected": -3.0947389602661133, + "logps/chosen": -546.9422607421875, + "logps/rejected": -2439.0087890625, + "loss": 0.5516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8046401739120483, + "rewards/margins": 5.927464485168457, + "rewards/rejected": -5.122824192047119, + "step": 451 + }, + { + "epoch": 0.58, + "learning_rate": 4.014414862742599e-08, + "logits/chosen": -3.28363037109375, + "logits/rejected": -3.223215103149414, + "logps/chosen": -522.6209106445312, + "logps/rejected": -1210.915283203125, + "loss": 0.5776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6077712774276733, + "rewards/margins": 3.9902939796447754, + "rewards/rejected": -3.3825225830078125, + "step": 452 + }, + { + "epoch": 0.58, + "learning_rate": 3.994160482371138e-08, + "logits/chosen": -3.2909440994262695, + "logits/rejected": -3.184887409210205, + "logps/chosen": -490.07501220703125, + "logps/rejected": -976.4556884765625, + "loss": 0.5463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9054611325263977, + "rewards/margins": 4.168425559997559, + "rewards/rejected": -3.2629640102386475, + "step": 453 + }, + { + "epoch": 0.58, + "learning_rate": 3.973923289021829e-08, + "logits/chosen": -3.273343801498413, + "logits/rejected": -3.2041518688201904, + "logps/chosen": -456.79290771484375, + "logps/rejected": -1925.927490234375, + "loss": 0.5379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.988423228263855, + "rewards/margins": 5.381667137145996, + "rewards/rejected": -4.393243789672852, + "step": 454 + }, + { + "epoch": 0.58, + "learning_rate": 3.953703628492467e-08, + "logits/chosen": -3.244431257247925, + "logits/rejected": -3.0988690853118896, + "logps/chosen": -519.2131958007812, + "logps/rejected": -950.0682373046875, + "loss": 0.5534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.92523193359375, + "rewards/margins": 2.915627956390381, + "rewards/rejected": -1.9903960227966309, + "step": 455 + }, + { + "epoch": 0.58, + "learning_rate": 3.933501846281266e-08, + "logits/chosen": -3.3139848709106445, + "logits/rejected": -3.2039096355438232, + "logps/chosen": -468.0716247558594, + "logps/rejected": -1104.976806640625, + "loss": 0.554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8128921985626221, + "rewards/margins": 2.9293198585510254, + "rewards/rejected": -2.1164276599884033, + "step": 456 + }, + { + "epoch": 0.58, + "learning_rate": 3.913318287580942e-08, + "logits/chosen": -3.2535958290100098, + "logits/rejected": -3.0942137241363525, + "logps/chosen": -496.7640686035156, + "logps/rejected": -4196.8359375, + "loss": 0.555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.818402111530304, + "rewards/margins": 5.246115207672119, + "rewards/rejected": -4.427712917327881, + "step": 457 + }, + { + "epoch": 0.58, + "learning_rate": 3.893153297272828e-08, + "logits/chosen": -3.231919050216675, + "logits/rejected": -3.061084747314453, + "logps/chosen": -533.5147094726562, + "logps/rejected": -1943.5281982421875, + "loss": 0.5843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8131988644599915, + "rewards/margins": 5.093011856079102, + "rewards/rejected": -4.279812812805176, + "step": 458 + }, + { + "epoch": 0.59, + "learning_rate": 3.87300721992097e-08, + "logits/chosen": -3.271249294281006, + "logits/rejected": -3.0708720684051514, + "logps/chosen": -516.4366455078125, + "logps/rejected": -2319.46875, + "loss": 0.5753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8913757801055908, + "rewards/margins": 5.62286376953125, + "rewards/rejected": -4.731488227844238, + "step": 459 + }, + { + "epoch": 0.59, + "learning_rate": 3.8528803997662425e-08, + "logits/chosen": -3.2132339477539062, + "logits/rejected": -3.1862716674804688, + "logps/chosen": -527.208740234375, + "logps/rejected": -1106.8990478515625, + "loss": 0.5603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9781677722930908, + "rewards/margins": 4.0356597900390625, + "rewards/rejected": -3.057492256164551, + "step": 460 + }, + { + "epoch": 0.59, + "learning_rate": 3.8327731807204744e-08, + "logits/chosen": -3.2432260513305664, + "logits/rejected": -3.172245979309082, + "logps/chosen": -499.11236572265625, + "logps/rejected": -732.991943359375, + "loss": 0.5591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8172820806503296, + "rewards/margins": 2.4544618129730225, + "rewards/rejected": -1.6371796131134033, + "step": 461 + }, + { + "epoch": 0.59, + "learning_rate": 3.812685906360557e-08, + "logits/chosen": -3.2799110412597656, + "logits/rejected": -3.145247220993042, + "logps/chosen": -505.023681640625, + "logps/rejected": -1410.3447265625, + "loss": 0.5235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.875152587890625, + "rewards/margins": 4.560003280639648, + "rewards/rejected": -3.6848511695861816, + "step": 462 + }, + { + "epoch": 0.59, + "learning_rate": 3.792618919922591e-08, + "logits/chosen": -3.186194896697998, + "logits/rejected": -3.166506290435791, + "logps/chosen": -484.14520263671875, + "logps/rejected": -1416.083740234375, + "loss": 0.5462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8006637692451477, + "rewards/margins": 4.931767463684082, + "rewards/rejected": -4.131103515625, + "step": 463 + }, + { + "epoch": 0.59, + "learning_rate": 3.7725725642960044e-08, + "logits/chosen": -3.2346463203430176, + "logits/rejected": -3.125354766845703, + "logps/chosen": -533.220703125, + "logps/rejected": -1659.70751953125, + "loss": 0.5586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8950256109237671, + "rewards/margins": 5.056384086608887, + "rewards/rejected": -4.161358833312988, + "step": 464 + }, + { + "epoch": 0.59, + "learning_rate": 3.752547182017708e-08, + "logits/chosen": -3.272177219390869, + "logits/rejected": -3.125403881072998, + "logps/chosen": -536.0398559570312, + "logps/rejected": -2659.68017578125, + "loss": 0.5404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9813293218612671, + "rewards/margins": 6.552423000335693, + "rewards/rejected": -5.571093559265137, + "step": 465 + }, + { + "epoch": 0.59, + "learning_rate": 3.7325431152662297e-08, + "logits/chosen": -3.269078016281128, + "logits/rejected": -3.225847005844116, + "logps/chosen": -520.267822265625, + "logps/rejected": -1406.13916015625, + "loss": 0.5398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9320328235626221, + "rewards/margins": 4.361900329589844, + "rewards/rejected": -3.429867744445801, + "step": 466 + }, + { + "epoch": 0.6, + "learning_rate": 3.7125607058558804e-08, + "logits/chosen": -3.3079004287719727, + "logits/rejected": -3.1034064292907715, + "logps/chosen": -503.2004089355469, + "logps/rejected": -1019.311767578125, + "loss": 0.5553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.562786877155304, + "rewards/margins": 2.6971497535705566, + "rewards/rejected": -2.1343626976013184, + "step": 467 + }, + { + "epoch": 0.6, + "learning_rate": 3.692600295230901e-08, + "logits/chosen": -3.3206984996795654, + "logits/rejected": -3.1833112239837646, + "logps/chosen": -509.1126403808594, + "logps/rejected": -1592.413330078125, + "loss": 0.5606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8381637334823608, + "rewards/margins": 4.668281555175781, + "rewards/rejected": -3.83011794090271, + "step": 468 + }, + { + "epoch": 0.6, + "learning_rate": 3.6726622244596394e-08, + "logits/chosen": -3.259021282196045, + "logits/rejected": -3.158496856689453, + "logps/chosen": -504.3584289550781, + "logps/rejected": -1283.048095703125, + "loss": 0.5309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9954468011856079, + "rewards/margins": 4.002734184265137, + "rewards/rejected": -3.0072875022888184, + "step": 469 + }, + { + "epoch": 0.6, + "learning_rate": 3.6527468342287096e-08, + "logits/chosen": -3.2926740646362305, + "logits/rejected": -3.1435322761535645, + "logps/chosen": -515.225830078125, + "logps/rejected": -909.7846069335938, + "loss": 0.5608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9130325317382812, + "rewards/margins": 2.9476394653320312, + "rewards/rejected": -2.03460693359375, + "step": 470 + }, + { + "epoch": 0.6, + "learning_rate": 3.632854464837188e-08, + "logits/chosen": -3.226196765899658, + "logits/rejected": -3.177750825881958, + "logps/chosen": -522.8284912109375, + "logps/rejected": -2395.28173828125, + "loss": 0.5349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9152892827987671, + "rewards/margins": 6.49346923828125, + "rewards/rejected": -5.578180313110352, + "step": 471 + }, + { + "epoch": 0.6, + "learning_rate": 3.612985456190778e-08, + "logits/chosen": -3.301316738128662, + "logits/rejected": -3.1681766510009766, + "logps/chosen": -486.4922790527344, + "logps/rejected": -1385.76708984375, + "loss": 0.5502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9601227045059204, + "rewards/margins": 4.3193817138671875, + "rewards/rejected": -3.3592591285705566, + "step": 472 + }, + { + "epoch": 0.6, + "learning_rate": 3.5931401477960176e-08, + "logits/chosen": -3.271075963973999, + "logits/rejected": -3.1461048126220703, + "logps/chosen": -491.6304931640625, + "logps/rejected": -1010.310791015625, + "loss": 0.5523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8447540402412415, + "rewards/margins": 2.757006883621216, + "rewards/rejected": -1.9122529029846191, + "step": 473 + }, + { + "epoch": 0.6, + "learning_rate": 3.5733188787544745e-08, + "logits/chosen": -3.2966833114624023, + "logits/rejected": -3.150221586227417, + "logps/chosen": -514.6214599609375, + "logps/rejected": -2354.04833984375, + "loss": 0.5437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9936081171035767, + "rewards/margins": 6.315367221832275, + "rewards/rejected": -5.321759223937988, + "step": 474 + }, + { + "epoch": 0.61, + "learning_rate": 3.553521987756945e-08, + "logits/chosen": -3.2411136627197266, + "logits/rejected": -3.1038570404052734, + "logps/chosen": -450.5810546875, + "logps/rejected": -1206.63134765625, + "loss": 0.5644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6687088012695312, + "rewards/margins": 3.596065044403076, + "rewards/rejected": -2.927356004714966, + "step": 475 + }, + { + "epoch": 0.61, + "learning_rate": 3.5337498130776766e-08, + "logits/chosen": -3.2386794090270996, + "logits/rejected": -3.1369881629943848, + "logps/chosen": -445.12994384765625, + "logps/rejected": -1781.9139404296875, + "loss": 0.5114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8160949945449829, + "rewards/margins": 5.648101806640625, + "rewards/rejected": -4.832006931304932, + "step": 476 + }, + { + "epoch": 0.61, + "learning_rate": 3.5140026925685804e-08, + "logits/chosen": -3.2362141609191895, + "logits/rejected": -3.0880560874938965, + "logps/chosen": -483.1852722167969, + "logps/rejected": -1349.63916015625, + "loss": 0.5468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8498016595840454, + "rewards/margins": 4.757607936859131, + "rewards/rejected": -3.907806396484375, + "step": 477 + }, + { + "epoch": 0.61, + "learning_rate": 3.494280963653463e-08, + "logits/chosen": -3.32125186920166, + "logits/rejected": -3.1975340843200684, + "logps/chosen": -480.2544860839844, + "logps/rejected": -1239.076904296875, + "loss": 0.5489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8819351196289062, + "rewards/margins": 3.7230119705200195, + "rewards/rejected": -2.8410768508911133, + "step": 478 + }, + { + "epoch": 0.61, + "learning_rate": 3.474584963322256e-08, + "logits/chosen": -3.2891433238983154, + "logits/rejected": -3.2097654342651367, + "logps/chosen": -509.1554260253906, + "logps/rejected": -542.6946411132812, + "loss": 0.5738, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0463577508926392, + "rewards/margins": 2.42995023727417, + "rewards/rejected": -1.3835922479629517, + "step": 479 + }, + { + "epoch": 0.61, + "learning_rate": 3.4549150281252633e-08, + "logits/chosen": -3.185882329940796, + "logits/rejected": -3.146146297454834, + "logps/chosen": -565.9609375, + "logps/rejected": -1313.558837890625, + "loss": 0.573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7622970342636108, + "rewards/margins": 4.422374248504639, + "rewards/rejected": -3.6600770950317383, + "step": 480 + }, + { + "epoch": 0.61, + "learning_rate": 3.435271494167404e-08, + "logits/chosen": -3.270577907562256, + "logits/rejected": -3.0544257164001465, + "logps/chosen": -451.62939453125, + "logps/rejected": -1608.140869140625, + "loss": 0.5137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9048492908477783, + "rewards/margins": 5.322565078735352, + "rewards/rejected": -4.417715549468994, + "step": 481 + }, + { + "epoch": 0.61, + "learning_rate": 3.415654697102478e-08, + "logits/chosen": -3.2363386154174805, + "logits/rejected": -3.13822603225708, + "logps/chosen": -530.2109985351562, + "logps/rejected": -1801.3101806640625, + "loss": 0.576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9653961658477783, + "rewards/margins": 5.3656158447265625, + "rewards/rejected": -4.400219917297363, + "step": 482 + }, + { + "epoch": 0.62, + "learning_rate": 3.396064972127421e-08, + "logits/chosen": -3.307049036026001, + "logits/rejected": -3.1932950019836426, + "logps/chosen": -576.7259521484375, + "logps/rejected": -1479.3291015625, + "loss": 0.5627, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1741806268692017, + "rewards/margins": 5.2777509689331055, + "rewards/rejected": -4.103570461273193, + "step": 483 + }, + { + "epoch": 0.62, + "learning_rate": 3.376502653976583e-08, + "logits/chosen": -3.318411111831665, + "logits/rejected": -3.1726787090301514, + "logps/chosen": -481.02813720703125, + "logps/rejected": -1146.5479736328125, + "loss": 0.5591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6550629138946533, + "rewards/margins": 3.0057101249694824, + "rewards/rejected": -2.35064697265625, + "step": 484 + }, + { + "epoch": 0.62, + "learning_rate": 3.356968076916006e-08, + "logits/chosen": -3.323089838027954, + "logits/rejected": -3.138510227203369, + "logps/chosen": -517.1570434570312, + "logps/rejected": -1814.26416015625, + "loss": 0.5808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7787719964981079, + "rewards/margins": 4.5832624435424805, + "rewards/rejected": -3.804490566253662, + "step": 485 + }, + { + "epoch": 0.62, + "learning_rate": 3.337461574737716e-08, + "logits/chosen": -3.192356586456299, + "logits/rejected": -3.1332874298095703, + "logps/chosen": -523.3148193359375, + "logps/rejected": -860.64794921875, + "loss": 0.5501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8992522954940796, + "rewards/margins": 2.9847898483276367, + "rewards/rejected": -2.0855376720428467, + "step": 486 + }, + { + "epoch": 0.62, + "learning_rate": 3.317983480754015e-08, + "logits/chosen": -3.286533832550049, + "logits/rejected": -3.16586971282959, + "logps/chosen": -513.792236328125, + "logps/rejected": -1185.9315185546875, + "loss": 0.5633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8015625476837158, + "rewards/margins": 3.542257785797119, + "rewards/rejected": -2.7406952381134033, + "step": 487 + }, + { + "epoch": 0.62, + "learning_rate": 3.298534127791784e-08, + "logits/chosen": -3.2776873111724854, + "logits/rejected": -3.1799676418304443, + "logps/chosen": -491.42547607421875, + "logps/rejected": -1059.334716796875, + "loss": 0.513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8789581656455994, + "rewards/margins": 3.4896302223205566, + "rewards/rejected": -2.6106719970703125, + "step": 488 + }, + { + "epoch": 0.62, + "learning_rate": 3.279113848186808e-08, + "logits/chosen": -3.2850050926208496, + "logits/rejected": -3.107992649078369, + "logps/chosen": -497.4919128417969, + "logps/rejected": -1275.5811767578125, + "loss": 0.6031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8299102783203125, + "rewards/margins": 3.5129456520080566, + "rewards/rejected": -2.683035373687744, + "step": 489 + }, + { + "epoch": 0.62, + "learning_rate": 3.259722973778077e-08, + "logits/chosen": -3.205706834793091, + "logits/rejected": -3.1334261894226074, + "logps/chosen": -474.6579284667969, + "logps/rejected": -1482.4947509765625, + "loss": 0.5637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0422486066818237, + "rewards/margins": 5.301428318023682, + "rewards/rejected": -4.259179592132568, + "step": 490 + }, + { + "epoch": 0.63, + "learning_rate": 3.24036183590214e-08, + "logits/chosen": -3.2103633880615234, + "logits/rejected": -3.1249797344207764, + "logps/chosen": -538.3001708984375, + "logps/rejected": -1642.535888671875, + "loss": 0.5458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8971084952354431, + "rewards/margins": 4.965883255004883, + "rewards/rejected": -4.068774223327637, + "step": 491 + }, + { + "epoch": 0.63, + "learning_rate": 3.221030765387417e-08, + "logits/chosen": -3.3040552139282227, + "logits/rejected": -3.1499414443969727, + "logps/chosen": -490.07098388671875, + "logps/rejected": -1148.6827392578125, + "loss": 0.5613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7923462390899658, + "rewards/margins": 4.415926933288574, + "rewards/rejected": -3.6235809326171875, + "step": 492 + }, + { + "epoch": 0.63, + "learning_rate": 3.201730092548573e-08, + "logits/chosen": -3.2446584701538086, + "logits/rejected": -3.2466821670532227, + "logps/chosen": -488.1404724121094, + "logps/rejected": -1534.698486328125, + "loss": 0.5173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8857483267784119, + "rewards/margins": 5.277997016906738, + "rewards/rejected": -4.392248630523682, + "step": 493 + }, + { + "epoch": 0.63, + "learning_rate": 3.18246014718085e-08, + "logits/chosen": -3.2457187175750732, + "logits/rejected": -3.1179747581481934, + "logps/chosen": -454.25872802734375, + "logps/rejected": -1018.7896118164062, + "loss": 0.5185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7396743893623352, + "rewards/margins": 3.1274001598358154, + "rewards/rejected": -2.387725830078125, + "step": 494 + }, + { + "epoch": 0.63, + "learning_rate": 3.16322125855445e-08, + "logits/chosen": -3.2759056091308594, + "logits/rejected": -3.163102149963379, + "logps/chosen": -484.62725830078125, + "logps/rejected": -1137.947265625, + "loss": 0.5627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8981033563613892, + "rewards/margins": 3.3117966651916504, + "rewards/rejected": -2.413693428039551, + "step": 495 + }, + { + "epoch": 0.63, + "learning_rate": 3.1440137554088955e-08, + "logits/chosen": -3.3068900108337402, + "logits/rejected": -3.225459575653076, + "logps/chosen": -486.17034912109375, + "logps/rejected": -1330.664794921875, + "loss": 0.5542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9101775884628296, + "rewards/margins": 4.315561294555664, + "rewards/rejected": -3.405383348464966, + "step": 496 + }, + { + "epoch": 0.63, + "learning_rate": 3.1248379659474225e-08, + "logits/chosen": -3.2838680744171143, + "logits/rejected": -3.179647922515869, + "logps/chosen": -538.3533935546875, + "logps/rejected": -1450.966552734375, + "loss": 0.5392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9407624006271362, + "rewards/margins": 4.5906524658203125, + "rewards/rejected": -3.649890184402466, + "step": 497 + }, + { + "epoch": 0.63, + "learning_rate": 3.1056942178313604e-08, + "logits/chosen": -3.3236327171325684, + "logits/rejected": -3.0883188247680664, + "logps/chosen": -502.60406494140625, + "logps/rejected": -2377.287841796875, + "loss": 0.5501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9077774286270142, + "rewards/margins": 5.7036452293396, + "rewards/rejected": -4.795867919921875, + "step": 498 + }, + { + "epoch": 0.64, + "learning_rate": 3.086582838174551e-08, + "logits/chosen": -3.3181629180908203, + "logits/rejected": -3.15497088432312, + "logps/chosen": -494.78668212890625, + "logps/rejected": -2214.68212890625, + "loss": 0.5543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7361053228378296, + "rewards/margins": 6.318307399749756, + "rewards/rejected": -5.582201957702637, + "step": 499 + }, + { + "epoch": 0.64, + "learning_rate": 3.0675041535377396e-08, + "logits/chosen": -3.2899513244628906, + "logits/rejected": -3.144716739654541, + "logps/chosen": -496.66302490234375, + "logps/rejected": -709.6910400390625, + "loss": 0.5479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8565292358398438, + "rewards/margins": 2.6038713455200195, + "rewards/rejected": -1.7473418712615967, + "step": 500 + }, + { + "epoch": 0.64, + "learning_rate": 3.048458489923005e-08, + "logits/chosen": -3.200637102127075, + "logits/rejected": -3.128419876098633, + "logps/chosen": -549.2447509765625, + "logps/rejected": -931.1070556640625, + "loss": 0.551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8357208371162415, + "rewards/margins": 2.9704620838165283, + "rewards/rejected": -2.1347413063049316, + "step": 501 + }, + { + "epoch": 0.64, + "learning_rate": 3.029446172768193e-08, + "logits/chosen": -3.2949719429016113, + "logits/rejected": -3.1720499992370605, + "logps/chosen": -538.5635986328125, + "logps/rejected": -1977.1734619140625, + "loss": 0.5762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9505600333213806, + "rewards/margins": 5.824473857879639, + "rewards/rejected": -4.873913764953613, + "step": 502 + }, + { + "epoch": 0.64, + "learning_rate": 3.0104675269413436e-08, + "logits/chosen": -3.332315444946289, + "logits/rejected": -3.2078475952148438, + "logps/chosen": -505.5064697265625, + "logps/rejected": -1625.999755859375, + "loss": 0.5442, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0528533458709717, + "rewards/margins": 5.245638847351074, + "rewards/rejected": -4.192785739898682, + "step": 503 + }, + { + "epoch": 0.64, + "learning_rate": 2.991522876735154e-08, + "logits/chosen": -3.3161754608154297, + "logits/rejected": -3.1710739135742188, + "logps/chosen": -499.0768737792969, + "logps/rejected": -1260.60107421875, + "loss": 0.584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6688766479492188, + "rewards/margins": 3.3476943969726562, + "rewards/rejected": -2.6788177490234375, + "step": 504 + }, + { + "epoch": 0.64, + "learning_rate": 2.9726125458614215e-08, + "logits/chosen": -3.2855653762817383, + "logits/rejected": -3.1966323852539062, + "logps/chosen": -558.8660278320312, + "logps/rejected": -1318.2232666015625, + "loss": 0.5898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8564178943634033, + "rewards/margins": 3.977755546569824, + "rewards/rejected": -3.121337890625, + "step": 505 + }, + { + "epoch": 0.64, + "learning_rate": 2.9537368574455303e-08, + "logits/chosen": -3.2340104579925537, + "logits/rejected": -3.134324550628662, + "logps/chosen": -525.7532958984375, + "logps/rejected": -987.2208862304688, + "loss": 0.5537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.890393078327179, + "rewards/margins": 3.2280547618865967, + "rewards/rejected": -2.3376617431640625, + "step": 506 + }, + { + "epoch": 0.65, + "learning_rate": 2.9348961340209117e-08, + "logits/chosen": -3.224386215209961, + "logits/rejected": -3.062349796295166, + "logps/chosen": -499.90911865234375, + "logps/rejected": -1035.1923828125, + "loss": 0.5462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.702008068561554, + "rewards/margins": 3.1669678688049316, + "rewards/rejected": -2.4649596214294434, + "step": 507 + }, + { + "epoch": 0.65, + "learning_rate": 2.916090697523549e-08, + "logits/chosen": -3.267133951187134, + "logits/rejected": -3.017885684967041, + "logps/chosen": -579.9769287109375, + "logps/rejected": -3158.091796875, + "loss": 0.5585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0068756341934204, + "rewards/margins": 7.673147678375244, + "rewards/rejected": -6.666272163391113, + "step": 508 + }, + { + "epoch": 0.65, + "learning_rate": 2.897320869286462e-08, + "logits/chosen": -3.266256809234619, + "logits/rejected": -3.2155585289001465, + "logps/chosen": -477.8050842285156, + "logps/rejected": -1134.6146240234375, + "loss": 0.5292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.769061267375946, + "rewards/margins": 3.1923158168792725, + "rewards/rejected": -2.4232544898986816, + "step": 509 + }, + { + "epoch": 0.65, + "learning_rate": 2.8785869700342317e-08, + "logits/chosen": -3.3355679512023926, + "logits/rejected": -3.230647563934326, + "logps/chosen": -491.60540771484375, + "logps/rejected": -805.3585815429688, + "loss": 0.5462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7465484738349915, + "rewards/margins": 2.939082145690918, + "rewards/rejected": -2.1925339698791504, + "step": 510 + }, + { + "epoch": 0.65, + "learning_rate": 2.8598893198775044e-08, + "logits/chosen": -3.285409688949585, + "logits/rejected": -3.106088399887085, + "logps/chosen": -500.78582763671875, + "logps/rejected": -3209.60546875, + "loss": 0.5557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0344161987304688, + "rewards/margins": 8.860172271728516, + "rewards/rejected": -7.825757026672363, + "step": 511 + }, + { + "epoch": 0.65, + "learning_rate": 2.841228238307536e-08, + "logits/chosen": -3.2574031352996826, + "logits/rejected": -3.2415056228637695, + "logps/chosen": -529.001953125, + "logps/rejected": -1196.213623046875, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9774261713027954, + "rewards/margins": 4.2907562255859375, + "rewards/rejected": -3.3133301734924316, + "step": 512 + }, + { + "epoch": 0.65, + "learning_rate": 2.8226040441907207e-08, + "logits/chosen": -3.258190631866455, + "logits/rejected": -3.1210532188415527, + "logps/chosen": -494.2944641113281, + "logps/rejected": -1794.0140380859375, + "loss": 0.5349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7813599109649658, + "rewards/margins": 4.763684272766113, + "rewards/rejected": -3.9823241233825684, + "step": 513 + }, + { + "epoch": 0.66, + "learning_rate": 2.8040170557631488e-08, + "logits/chosen": -3.2921142578125, + "logits/rejected": -3.1454508304595947, + "logps/chosen": -504.47528076171875, + "logps/rejected": -2080.66162109375, + "loss": 0.5687, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1726486682891846, + "rewards/margins": 6.410029411315918, + "rewards/rejected": -5.2373809814453125, + "step": 514 + }, + { + "epoch": 0.66, + "learning_rate": 2.7854675906251723e-08, + "logits/chosen": -3.2926697731018066, + "logits/rejected": -3.1604301929473877, + "logps/chosen": -501.97869873046875, + "logps/rejected": -1823.5614013671875, + "loss": 0.5383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7891266345977783, + "rewards/margins": 5.015353202819824, + "rewards/rejected": -4.226226806640625, + "step": 515 + }, + { + "epoch": 0.66, + "learning_rate": 2.7669559657359676e-08, + "logits/chosen": -3.2876546382904053, + "logits/rejected": -3.1496524810791016, + "logps/chosen": -546.7208251953125, + "logps/rejected": -1326.244140625, + "loss": 0.5544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8718322515487671, + "rewards/margins": 4.31715726852417, + "rewards/rejected": -3.4453248977661133, + "step": 516 + }, + { + "epoch": 0.66, + "learning_rate": 2.7484824974081323e-08, + "logits/chosen": -3.2680788040161133, + "logits/rejected": -3.1057240962982178, + "logps/chosen": -499.856201171875, + "logps/rejected": -1481.234619140625, + "loss": 0.5736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8898559808731079, + "rewards/margins": 4.498141765594482, + "rewards/rejected": -3.608285665512085, + "step": 517 + }, + { + "epoch": 0.66, + "learning_rate": 2.730047501302266e-08, + "logits/chosen": -3.2831130027770996, + "logits/rejected": -3.2360353469848633, + "logps/chosen": -531.6681518554688, + "logps/rejected": -1843.2774658203125, + "loss": 0.5193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9364669919013977, + "rewards/margins": 5.23900032043457, + "rewards/rejected": -4.302533149719238, + "step": 518 + }, + { + "epoch": 0.66, + "learning_rate": 2.711651292421593e-08, + "logits/chosen": -3.3279433250427246, + "logits/rejected": -3.1668801307678223, + "logps/chosen": -497.1512451171875, + "logps/rejected": -940.541259765625, + "loss": 0.5241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9467499256134033, + "rewards/margins": 3.435473918914795, + "rewards/rejected": -2.4887237548828125, + "step": 519 + }, + { + "epoch": 0.66, + "learning_rate": 2.6932941851065616e-08, + "logits/chosen": -3.271801233291626, + "logits/rejected": -3.056286573410034, + "logps/chosen": -516.3665771484375, + "logps/rejected": -1291.4041748046875, + "loss": 0.5721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8172760009765625, + "rewards/margins": 4.02656888961792, + "rewards/rejected": -3.2092928886413574, + "step": 520 + }, + { + "epoch": 0.66, + "learning_rate": 2.6749764930294905e-08, + "logits/chosen": -3.2757744789123535, + "logits/rejected": -3.2053580284118652, + "logps/chosen": -559.0560913085938, + "logps/rejected": -1104.52685546875, + "loss": 0.5642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6331726312637329, + "rewards/margins": 3.32232666015625, + "rewards/rejected": -2.6891541481018066, + "step": 521 + }, + { + "epoch": 0.67, + "learning_rate": 2.656698529189193e-08, + "logits/chosen": -3.3288352489471436, + "logits/rejected": -3.2301511764526367, + "logps/chosen": -478.8522033691406, + "logps/rejected": -1185.4735107421875, + "loss": 0.5171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9718719720840454, + "rewards/margins": 3.9949676990509033, + "rewards/rejected": -3.0230956077575684, + "step": 522 + }, + { + "epoch": 0.67, + "learning_rate": 2.638460605905646e-08, + "logits/chosen": -3.284107208251953, + "logits/rejected": -3.2036948204040527, + "logps/chosen": -492.54150390625, + "logps/rejected": -2117.4501953125, + "loss": 0.555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9375473260879517, + "rewards/margins": 6.3798322677612305, + "rewards/rejected": -5.442285537719727, + "step": 523 + }, + { + "epoch": 0.67, + "learning_rate": 2.620263034814632e-08, + "logits/chosen": -3.2878360748291016, + "logits/rejected": -3.0872349739074707, + "logps/chosen": -529.2218017578125, + "logps/rejected": -1094.71826171875, + "loss": 0.5288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9651641845703125, + "rewards/margins": 3.204672336578369, + "rewards/rejected": -2.2395081520080566, + "step": 524 + }, + { + "epoch": 0.67, + "learning_rate": 2.6021061268624378e-08, + "logits/chosen": -3.3734192848205566, + "logits/rejected": -3.272207736968994, + "logps/chosen": -499.404541015625, + "logps/rejected": -991.6915283203125, + "loss": 0.5498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9078598022460938, + "rewards/margins": 3.345057725906372, + "rewards/rejected": -2.4371979236602783, + "step": 525 + }, + { + "epoch": 0.67, + "learning_rate": 2.5839901923005202e-08, + "logits/chosen": -3.2724125385284424, + "logits/rejected": -3.2033772468566895, + "logps/chosen": -500.8013000488281, + "logps/rejected": -1800.0924072265625, + "loss": 0.5504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9099777340888977, + "rewards/margins": 5.734422206878662, + "rewards/rejected": -4.824444770812988, + "step": 526 + }, + { + "epoch": 0.67, + "learning_rate": 2.5659155406802192e-08, + "logits/chosen": -3.224398612976074, + "logits/rejected": -3.137464761734009, + "logps/chosen": -516.0178833007812, + "logps/rejected": -2334.95361328125, + "loss": 0.5351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.807904064655304, + "rewards/margins": 6.616449356079102, + "rewards/rejected": -5.808545112609863, + "step": 527 + }, + { + "epoch": 0.67, + "learning_rate": 2.5478824808474607e-08, + "logits/chosen": -3.3133997917175293, + "logits/rejected": -3.1030893325805664, + "logps/chosen": -479.4396057128906, + "logps/rejected": -1492.703857421875, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9556839466094971, + "rewards/margins": 4.9656267166137695, + "rewards/rejected": -4.009943008422852, + "step": 528 + }, + { + "epoch": 0.67, + "learning_rate": 2.5298913209374804e-08, + "logits/chosen": -3.246680736541748, + "logits/rejected": -3.1375417709350586, + "logps/chosen": -517.14697265625, + "logps/rejected": -1381.69140625, + "loss": 0.5702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.854766845703125, + "rewards/margins": 3.829336643218994, + "rewards/rejected": -2.974569797515869, + "step": 529 + }, + { + "epoch": 0.68, + "learning_rate": 2.5119423683695657e-08, + "logits/chosen": -3.2140164375305176, + "logits/rejected": -3.1486892700195312, + "logps/chosen": -472.52935791015625, + "logps/rejected": -1297.09912109375, + "loss": 0.5562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8393020629882812, + "rewards/margins": 4.385896682739258, + "rewards/rejected": -3.5465943813323975, + "step": 530 + }, + { + "epoch": 0.68, + "learning_rate": 2.494035929841789e-08, + "logits/chosen": -3.266085624694824, + "logits/rejected": -3.1220054626464844, + "logps/chosen": -494.1023864746094, + "logps/rejected": -1118.7181396484375, + "loss": 0.588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7671082019805908, + "rewards/margins": 2.902750015258789, + "rewards/rejected": -2.135641574859619, + "step": 531 + }, + { + "epoch": 0.68, + "learning_rate": 2.4761723113257826e-08, + "logits/chosen": -3.245163679122925, + "logits/rejected": -3.1677820682525635, + "logps/chosen": -523.79736328125, + "logps/rejected": -1265.143310546875, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7837463617324829, + "rewards/margins": 4.375360488891602, + "rewards/rejected": -3.59161376953125, + "step": 532 + }, + { + "epoch": 0.68, + "learning_rate": 2.458351818061497e-08, + "logits/chosen": -3.2220916748046875, + "logits/rejected": -3.171330213546753, + "logps/chosen": -545.365966796875, + "logps/rejected": -1539.40234375, + "loss": 0.5574, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0317474603652954, + "rewards/margins": 4.597622871398926, + "rewards/rejected": -3.565875291824341, + "step": 533 + }, + { + "epoch": 0.68, + "learning_rate": 2.4405747545519962e-08, + "logits/chosen": -3.219789981842041, + "logits/rejected": -3.1855878829956055, + "logps/chosen": -560.1619873046875, + "logps/rejected": -846.3948364257812, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8310486078262329, + "rewards/margins": 2.8713135719299316, + "rewards/rejected": -2.0402650833129883, + "step": 534 + }, + { + "epoch": 0.68, + "learning_rate": 2.422841424558244e-08, + "logits/chosen": -3.2806906700134277, + "logits/rejected": -3.1795220375061035, + "logps/chosen": -506.02642822265625, + "logps/rejected": -1521.6407470703125, + "loss": 0.5328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0771805047988892, + "rewards/margins": 4.817359924316406, + "rewards/rejected": -3.7401795387268066, + "step": 535 + }, + { + "epoch": 0.68, + "learning_rate": 2.4051521310939256e-08, + "logits/chosen": -3.2768828868865967, + "logits/rejected": -3.137404441833496, + "logps/chosen": -449.6060791015625, + "logps/rejected": -1601.598876953125, + "loss": 0.515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8086868524551392, + "rewards/margins": 5.1655988693237305, + "rewards/rejected": -4.356912136077881, + "step": 536 + }, + { + "epoch": 0.68, + "learning_rate": 2.3875071764202558e-08, + "logits/chosen": -3.1918461322784424, + "logits/rejected": -3.192209482192993, + "logps/chosen": -503.2213134765625, + "logps/rejected": -1250.10595703125, + "loss": 0.5247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9166793823242188, + "rewards/margins": 4.447203159332275, + "rewards/rejected": -3.5305237770080566, + "step": 537 + }, + { + "epoch": 0.69, + "learning_rate": 2.3699068620408304e-08, + "logits/chosen": -3.26702880859375, + "logits/rejected": -3.2236111164093018, + "logps/chosen": -496.1551818847656, + "logps/rejected": -961.1436767578125, + "loss": 0.5565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.81414794921875, + "rewards/margins": 3.487780809402466, + "rewards/rejected": -2.673632860183716, + "step": 538 + }, + { + "epoch": 0.69, + "learning_rate": 2.352351488696457e-08, + "logits/chosen": -3.270305871963501, + "logits/rejected": -3.149235248565674, + "logps/chosen": -495.1312255859375, + "logps/rejected": -1452.4599609375, + "loss": 0.5412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8206405639648438, + "rewards/margins": 4.501530647277832, + "rewards/rejected": -3.6808900833129883, + "step": 539 + }, + { + "epoch": 0.69, + "learning_rate": 2.3348413563600322e-08, + "logits/chosen": -3.2771310806274414, + "logits/rejected": -3.1217708587646484, + "logps/chosen": -470.9763488769531, + "logps/rejected": -1043.2138671875, + "loss": 0.5328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7715988159179688, + "rewards/margins": 3.2312850952148438, + "rewards/rejected": -2.459686279296875, + "step": 540 + }, + { + "epoch": 0.69, + "learning_rate": 2.317376764231403e-08, + "logits/chosen": -3.322974681854248, + "logits/rejected": -3.165584087371826, + "logps/chosen": -485.7454833984375, + "logps/rejected": -1083.45068359375, + "loss": 0.5428, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0993865728378296, + "rewards/margins": 4.076147556304932, + "rewards/rejected": -2.9767608642578125, + "step": 541 + }, + { + "epoch": 0.69, + "learning_rate": 2.2999580107322654e-08, + "logits/chosen": -3.2605652809143066, + "logits/rejected": -3.16916561126709, + "logps/chosen": -520.25244140625, + "logps/rejected": -1795.2515869140625, + "loss": 0.5619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9184356927871704, + "rewards/margins": 5.906045913696289, + "rewards/rejected": -4.98760986328125, + "step": 542 + }, + { + "epoch": 0.69, + "learning_rate": 2.2825853935010535e-08, + "logits/chosen": -3.246682643890381, + "logits/rejected": -3.083446502685547, + "logps/chosen": -530.9531860351562, + "logps/rejected": -1063.272705078125, + "loss": 0.5789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8891464471817017, + "rewards/margins": 3.178025960922241, + "rewards/rejected": -2.28887939453125, + "step": 543 + }, + { + "epoch": 0.69, + "learning_rate": 2.2652592093878663e-08, + "logits/chosen": -3.252861499786377, + "logits/rejected": -3.1045494079589844, + "logps/chosen": -528.0704956054688, + "logps/rejected": -1241.9468994140625, + "loss": 0.5119, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.142053246498108, + "rewards/margins": 4.500921726226807, + "rewards/rejected": -3.3588685989379883, + "step": 544 + }, + { + "epoch": 0.69, + "learning_rate": 2.2479797544493827e-08, + "logits/chosen": -3.2186708450317383, + "logits/rejected": -3.138657808303833, + "logps/chosen": -499.095458984375, + "logps/rejected": -1803.463623046875, + "loss": 0.5251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7712295651435852, + "rewards/margins": 5.060554504394531, + "rewards/rejected": -4.289324760437012, + "step": 545 + }, + { + "epoch": 0.7, + "learning_rate": 2.2307473239438153e-08, + "logits/chosen": -3.2676544189453125, + "logits/rejected": -3.030117988586426, + "logps/chosen": -432.79132080078125, + "logps/rejected": -1534.377197265625, + "loss": 0.5053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.84002685546875, + "rewards/margins": 3.945263624191284, + "rewards/rejected": -3.105236768722534, + "step": 546 + }, + { + "epoch": 0.7, + "learning_rate": 2.2135622123258513e-08, + "logits/chosen": -3.290472984313965, + "logits/rejected": -3.215170383453369, + "logps/chosen": -530.1010131835938, + "logps/rejected": -4982.5771484375, + "loss": 0.5442, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0792557001113892, + "rewards/margins": 6.137672424316406, + "rewards/rejected": -5.058416843414307, + "step": 547 + }, + { + "epoch": 0.7, + "learning_rate": 2.196424713241637e-08, + "logits/chosen": -3.2276227474212646, + "logits/rejected": -3.0632028579711914, + "logps/chosen": -537.387451171875, + "logps/rejected": -1065.83154296875, + "loss": 0.5554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9848358631134033, + "rewards/margins": 3.322744846343994, + "rewards/rejected": -2.337908983230591, + "step": 548 + }, + { + "epoch": 0.7, + "learning_rate": 2.1793351195237446e-08, + "logits/chosen": -3.3336005210876465, + "logits/rejected": -3.216538906097412, + "logps/chosen": -513.178466796875, + "logps/rejected": -910.9251098632812, + "loss": 0.5465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9194549322128296, + "rewards/margins": 2.958737373352051, + "rewards/rejected": -2.0392823219299316, + "step": 549 + }, + { + "epoch": 0.7, + "learning_rate": 2.162293723186182e-08, + "logits/chosen": -3.2610511779785156, + "logits/rejected": -3.1958627700805664, + "logps/chosen": -506.4007263183594, + "logps/rejected": -1825.3572998046875, + "loss": 0.5312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7476882934570312, + "rewards/margins": 5.687587261199951, + "rewards/rejected": -4.93989896774292, + "step": 550 + }, + { + "epoch": 0.7, + "learning_rate": 2.1453008154193904e-08, + "logits/chosen": -3.2778303623199463, + "logits/rejected": -3.178175449371338, + "logps/chosen": -464.20245361328125, + "logps/rejected": -2415.3818359375, + "loss": 0.5181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0109329223632812, + "rewards/margins": 6.7156081199646, + "rewards/rejected": -5.704675674438477, + "step": 551 + }, + { + "epoch": 0.7, + "learning_rate": 2.128356686585282e-08, + "logits/chosen": -3.2930946350097656, + "logits/rejected": -3.089268684387207, + "logps/chosen": -573.373779296875, + "logps/rejected": -1824.6397705078125, + "loss": 0.5861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6884827017784119, + "rewards/margins": 4.562292575836182, + "rewards/rejected": -3.873809814453125, + "step": 552 + }, + { + "epoch": 0.7, + "learning_rate": 2.1114616262122648e-08, + "logits/chosen": -3.2644119262695312, + "logits/rejected": -3.2030720710754395, + "logps/chosen": -560.570556640625, + "logps/rejected": -1110.71337890625, + "loss": 0.5546, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.007093906402588, + "rewards/margins": 3.8939714431762695, + "rewards/rejected": -2.8868775367736816, + "step": 553 + }, + { + "epoch": 0.71, + "learning_rate": 2.0946159229903088e-08, + "logits/chosen": -3.2679026126861572, + "logits/rejected": -3.1286392211914062, + "logps/chosen": -566.8541870117188, + "logps/rejected": -1904.6856689453125, + "loss": 0.576, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0476776361465454, + "rewards/margins": 6.282681465148926, + "rewards/rejected": -5.23500394821167, + "step": 554 + }, + { + "epoch": 0.71, + "learning_rate": 2.077819864766e-08, + "logits/chosen": -3.3025963306427, + "logits/rejected": -3.215341329574585, + "logps/chosen": -491.45794677734375, + "logps/rejected": -819.1390380859375, + "loss": 0.5709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5529541373252869, + "rewards/margins": 2.881063938140869, + "rewards/rejected": -2.3281097412109375, + "step": 555 + }, + { + "epoch": 0.71, + "learning_rate": 2.0610737385376347e-08, + "logits/chosen": -3.2655301094055176, + "logits/rejected": -3.1780314445495605, + "logps/chosen": -475.7291259765625, + "logps/rejected": -1276.0263671875, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6448074579238892, + "rewards/margins": 3.9494431018829346, + "rewards/rejected": -3.304635763168335, + "step": 556 + }, + { + "epoch": 0.71, + "learning_rate": 2.0443778304503024e-08, + "logits/chosen": -3.227609634399414, + "logits/rejected": -3.135899066925049, + "logps/chosen": -551.808837890625, + "logps/rejected": -2073.402587890625, + "loss": 0.5578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8510925769805908, + "rewards/margins": 5.875482559204102, + "rewards/rejected": -5.024389743804932, + "step": 557 + }, + { + "epoch": 0.71, + "learning_rate": 2.0277324257910105e-08, + "logits/chosen": -3.253573417663574, + "logits/rejected": -3.1504926681518555, + "logps/chosen": -551.0051879882812, + "logps/rejected": -1301.76904296875, + "loss": 0.5596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9715942144393921, + "rewards/margins": 4.536444187164307, + "rewards/rejected": -3.564849853515625, + "step": 558 + }, + { + "epoch": 0.71, + "learning_rate": 2.0111378089837954e-08, + "logits/chosen": -3.2932891845703125, + "logits/rejected": -3.141566038131714, + "logps/chosen": -490.77099609375, + "logps/rejected": -1060.8128662109375, + "loss": 0.5143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9470856189727783, + "rewards/margins": 2.658529758453369, + "rewards/rejected": -1.7114441394805908, + "step": 559 + }, + { + "epoch": 0.71, + "learning_rate": 1.9945942635848744e-08, + "logits/chosen": -3.2797091007232666, + "logits/rejected": -3.174903631210327, + "logps/chosen": -498.0359191894531, + "logps/rejected": -985.9560546875, + "loss": 0.5682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9299468994140625, + "rewards/margins": 3.6362671852111816, + "rewards/rejected": -2.706320285797119, + "step": 560 + }, + { + "epoch": 0.72, + "learning_rate": 1.978102072277791e-08, + "logits/chosen": -3.3038535118103027, + "logits/rejected": -3.2029380798339844, + "logps/chosen": -564.786865234375, + "logps/rejected": -1008.978271484375, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9523559808731079, + "rewards/margins": 3.533444404602051, + "rewards/rejected": -2.5810883045196533, + "step": 561 + }, + { + "epoch": 0.72, + "learning_rate": 1.961661516868594e-08, + "logits/chosen": -3.254497528076172, + "logits/rejected": -3.13930082321167, + "logps/chosen": -496.1990051269531, + "logps/rejected": -778.8037109375, + "loss": 0.5431, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0087127685546875, + "rewards/margins": 2.8258728981018066, + "rewards/rejected": -1.8171601295471191, + "step": 562 + }, + { + "epoch": 0.72, + "learning_rate": 1.9452728782810107e-08, + "logits/chosen": -3.278909683227539, + "logits/rejected": -3.1413214206695557, + "logps/chosen": -563.8138427734375, + "logps/rejected": -1420.7572021484375, + "loss": 0.5714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7620452642440796, + "rewards/margins": 4.826098442077637, + "rewards/rejected": -4.064053535461426, + "step": 563 + }, + { + "epoch": 0.72, + "learning_rate": 1.928936436551661e-08, + "logits/chosen": -3.2535388469696045, + "logits/rejected": -3.2385048866271973, + "logps/chosen": -560.4786376953125, + "logps/rejected": -1571.0474853515625, + "loss": 0.5398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7893127202987671, + "rewards/margins": 4.7694549560546875, + "rewards/rejected": -3.98014235496521, + "step": 564 + }, + { + "epoch": 0.72, + "learning_rate": 1.9126524708252555e-08, + "logits/chosen": -3.2668652534484863, + "logits/rejected": -3.1626393795013428, + "logps/chosen": -422.2543029785156, + "logps/rejected": -988.6322631835938, + "loss": 0.4889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8543503284454346, + "rewards/margins": 3.560206651687622, + "rewards/rejected": -2.7058563232421875, + "step": 565 + }, + { + "epoch": 0.72, + "learning_rate": 1.8964212593498442e-08, + "logits/chosen": -3.258924961090088, + "logits/rejected": -3.1658058166503906, + "logps/chosen": -521.295166015625, + "logps/rejected": -1540.6131591796875, + "loss": 0.5219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.982403576374054, + "rewards/margins": 4.883447170257568, + "rewards/rejected": -3.901043653488159, + "step": 566 + }, + { + "epoch": 0.72, + "learning_rate": 1.8802430794720454e-08, + "logits/chosen": -3.2887516021728516, + "logits/rejected": -3.0820469856262207, + "logps/chosen": -472.582763671875, + "logps/rejected": -1930.1298828125, + "loss": 0.5369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7001190185546875, + "rewards/margins": 5.482205390930176, + "rewards/rejected": -4.782086372375488, + "step": 567 + }, + { + "epoch": 0.72, + "learning_rate": 1.8641182076323148e-08, + "logits/chosen": -3.250495433807373, + "logits/rejected": -3.2006185054779053, + "logps/chosen": -471.3146667480469, + "logps/rejected": -953.2779541015625, + "loss": 0.5409, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.004919409751892, + "rewards/margins": 3.8475587368011475, + "rewards/rejected": -2.842639207839966, + "step": 568 + }, + { + "epoch": 0.73, + "learning_rate": 1.848046919360225e-08, + "logits/chosen": -3.2906408309936523, + "logits/rejected": -3.140500545501709, + "logps/chosen": -427.2916564941406, + "logps/rejected": -1506.3935546875, + "loss": 0.5022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6466644406318665, + "rewards/margins": 4.257104873657227, + "rewards/rejected": -3.610440254211426, + "step": 569 + }, + { + "epoch": 0.73, + "learning_rate": 1.8320294892697475e-08, + "logits/chosen": -3.2876157760620117, + "logits/rejected": -3.1893420219421387, + "logps/chosen": -478.33001708984375, + "logps/rejected": -1826.8287353515625, + "loss": 0.5102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8764374256134033, + "rewards/margins": 5.1429595947265625, + "rewards/rejected": -4.266522407531738, + "step": 570 + }, + { + "epoch": 0.73, + "learning_rate": 1.8160661910545715e-08, + "logits/chosen": -3.2547197341918945, + "logits/rejected": -3.2008512020111084, + "logps/chosen": -557.116943359375, + "logps/rejected": -1584.36279296875, + "loss": 0.5875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8199402093887329, + "rewards/margins": 4.461053371429443, + "rewards/rejected": -3.64111328125, + "step": 571 + }, + { + "epoch": 0.73, + "learning_rate": 1.8001572974834166e-08, + "logits/chosen": -3.2416815757751465, + "logits/rejected": -3.188992977142334, + "logps/chosen": -443.98455810546875, + "logps/rejected": -1163.37255859375, + "loss": 0.5182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6969283819198608, + "rewards/margins": 3.883043050765991, + "rewards/rejected": -3.186114549636841, + "step": 572 + }, + { + "epoch": 0.73, + "learning_rate": 1.7843030803953834e-08, + "logits/chosen": -3.283024311065674, + "logits/rejected": -3.213104486465454, + "logps/chosen": -523.5455322265625, + "logps/rejected": -1045.101318359375, + "loss": 0.5473, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1586517095565796, + "rewards/margins": 3.7397522926330566, + "rewards/rejected": -2.5811004638671875, + "step": 573 + }, + { + "epoch": 0.73, + "learning_rate": 1.768503810695295e-08, + "logits/chosen": -3.236114263534546, + "logits/rejected": -3.11175537109375, + "logps/chosen": -548.007080078125, + "logps/rejected": -1014.7822265625, + "loss": 0.5459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8372207880020142, + "rewards/margins": 2.9879226684570312, + "rewards/rejected": -2.1507019996643066, + "step": 574 + }, + { + "epoch": 0.73, + "learning_rate": 1.7527597583490822e-08, + "logits/chosen": -3.273951768875122, + "logits/rejected": -3.1915106773376465, + "logps/chosen": -516.33837890625, + "logps/rejected": -1560.651123046875, + "loss": 0.5443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7030685544013977, + "rewards/margins": 4.286314487457275, + "rewards/rejected": -3.5832457542419434, + "step": 575 + }, + { + "epoch": 0.73, + "learning_rate": 1.7370711923791564e-08, + "logits/chosen": -3.228973865509033, + "logits/rejected": -3.050318717956543, + "logps/chosen": -542.5596923828125, + "logps/rejected": -1573.891357421875, + "loss": 0.5095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9608551263809204, + "rewards/margins": 4.594943046569824, + "rewards/rejected": -3.6340880393981934, + "step": 576 + }, + { + "epoch": 0.74, + "learning_rate": 1.7214383808598282e-08, + "logits/chosen": -3.2220959663391113, + "logits/rejected": -3.180255889892578, + "logps/chosen": -490.1199035644531, + "logps/rejected": -1055.039306640625, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9420517683029175, + "rewards/margins": 3.344377279281616, + "rewards/rejected": -2.4023256301879883, + "step": 577 + }, + { + "epoch": 0.74, + "learning_rate": 1.70586159091271e-08, + "logits/chosen": -3.2542967796325684, + "logits/rejected": -3.1679575443267822, + "logps/chosen": -527.9521484375, + "logps/rejected": -1235.9561767578125, + "loss": 0.5745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9730942249298096, + "rewards/margins": 4.826292514801025, + "rewards/rejected": -3.853198289871216, + "step": 578 + }, + { + "epoch": 0.74, + "learning_rate": 1.6903410887021675e-08, + "logits/chosen": -3.27130389213562, + "logits/rejected": -3.1413440704345703, + "logps/chosen": -464.9495849609375, + "logps/rejected": -1652.559326171875, + "loss": 0.5142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9582198858261108, + "rewards/margins": 5.000144958496094, + "rewards/rejected": -4.041924953460693, + "step": 579 + }, + { + "epoch": 0.74, + "learning_rate": 1.674877139430758e-08, + "logits/chosen": -3.3081440925598145, + "logits/rejected": -3.222533702850342, + "logps/chosen": -470.09332275390625, + "logps/rejected": -1679.1368408203125, + "loss": 0.4845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8594955205917358, + "rewards/margins": 5.182127475738525, + "rewards/rejected": -4.3226318359375, + "step": 580 + }, + { + "epoch": 0.74, + "learning_rate": 1.6594700073347112e-08, + "logits/chosen": -3.3050801753997803, + "logits/rejected": -3.1436996459960938, + "logps/chosen": -532.3234252929688, + "logps/rejected": -880.31396484375, + "loss": 0.548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8529541492462158, + "rewards/margins": 3.108551025390625, + "rewards/rejected": -2.255596876144409, + "step": 581 + }, + { + "epoch": 0.74, + "learning_rate": 1.6441199556794033e-08, + "logits/chosen": -3.2554519176483154, + "logits/rejected": -3.1056418418884277, + "logps/chosen": -521.5010986328125, + "logps/rejected": -785.8870849609375, + "loss": 0.5671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.892681896686554, + "rewards/margins": 2.98638916015625, + "rewards/rejected": -2.093707323074341, + "step": 582 + }, + { + "epoch": 0.74, + "learning_rate": 1.6288272467548632e-08, + "logits/chosen": -3.265867233276367, + "logits/rejected": -3.199401617050171, + "logps/chosen": -503.39178466796875, + "logps/rejected": -1781.214111328125, + "loss": 0.5192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7352707386016846, + "rewards/margins": 5.821104526519775, + "rewards/rejected": -5.085833549499512, + "step": 583 + }, + { + "epoch": 0.74, + "learning_rate": 1.6135921418712954e-08, + "logits/chosen": -3.2462944984436035, + "logits/rejected": -3.1886630058288574, + "logps/chosen": -553.0913696289062, + "logps/rejected": -2340.687744140625, + "loss": 0.5452, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0005004405975342, + "rewards/margins": 7.178991317749023, + "rewards/rejected": -6.178491115570068, + "step": 584 + }, + { + "epoch": 0.75, + "learning_rate": 1.5984149013546046e-08, + "logits/chosen": -3.226921558380127, + "logits/rejected": -3.107053279876709, + "logps/chosen": -466.3231201171875, + "logps/rejected": -1181.6063232421875, + "loss": 0.5564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9020553827285767, + "rewards/margins": 3.462303400039673, + "rewards/rejected": -2.5602478981018066, + "step": 585 + }, + { + "epoch": 0.75, + "learning_rate": 1.583295784541958e-08, + "logits/chosen": -3.2665891647338867, + "logits/rejected": -3.2414889335632324, + "logps/chosen": -502.5987548828125, + "logps/rejected": -1626.38623046875, + "loss": 0.5273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1327178478240967, + "rewards/margins": 6.0815887451171875, + "rewards/rejected": -4.94887113571167, + "step": 586 + }, + { + "epoch": 0.75, + "learning_rate": 1.568235049777345e-08, + "logits/chosen": -3.2849860191345215, + "logits/rejected": -3.136348009109497, + "logps/chosen": -482.8161315917969, + "logps/rejected": -1285.14794921875, + "loss": 0.5613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9248917102813721, + "rewards/margins": 4.056623935699463, + "rewards/rejected": -3.1317319869995117, + "step": 587 + }, + { + "epoch": 0.75, + "learning_rate": 1.553232954407171e-08, + "logits/chosen": -3.3003969192504883, + "logits/rejected": -3.147477149963379, + "logps/chosen": -518.7779541015625, + "logps/rejected": -1944.8837890625, + "loss": 0.5369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6639007329940796, + "rewards/margins": 6.083993434906006, + "rewards/rejected": -5.420092582702637, + "step": 588 + }, + { + "epoch": 0.75, + "learning_rate": 1.5382897547758512e-08, + "logits/chosen": -3.2766730785369873, + "logits/rejected": -3.208791971206665, + "logps/chosen": -493.43646240234375, + "logps/rejected": -1040.6142578125, + "loss": 0.5681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8958572149276733, + "rewards/margins": 3.3139266967773438, + "rewards/rejected": -2.418069362640381, + "step": 589 + }, + { + "epoch": 0.75, + "learning_rate": 1.52340570622144e-08, + "logits/chosen": -3.2803478240966797, + "logits/rejected": -3.141474723815918, + "logps/chosen": -530.053466796875, + "logps/rejected": -1981.6461181640625, + "loss": 0.554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8116852045059204, + "rewards/margins": 5.325564384460449, + "rewards/rejected": -4.513879299163818, + "step": 590 + }, + { + "epoch": 0.75, + "learning_rate": 1.508581063071258e-08, + "logits/chosen": -3.237386465072632, + "logits/rejected": -3.187905788421631, + "logps/chosen": -574.0392456054688, + "logps/rejected": -1631.4453125, + "loss": 0.6234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8567230701446533, + "rewards/margins": 4.902676582336426, + "rewards/rejected": -4.045953750610352, + "step": 591 + }, + { + "epoch": 0.75, + "learning_rate": 1.493816078637557e-08, + "logits/chosen": -3.2787463665008545, + "logits/rejected": -3.188897132873535, + "logps/chosen": -475.8215026855469, + "logps/rejected": -1429.17626953125, + "loss": 0.512, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0504134893417358, + "rewards/margins": 4.680158615112305, + "rewards/rejected": -3.6297454833984375, + "step": 592 + }, + { + "epoch": 0.76, + "learning_rate": 1.47911100521318e-08, + "logits/chosen": -3.3396472930908203, + "logits/rejected": -3.1402735710144043, + "logps/chosen": -492.396484375, + "logps/rejected": -971.4962158203125, + "loss": 0.5051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8902664184570312, + "rewards/margins": 3.768376350402832, + "rewards/rejected": -2.878109931945801, + "step": 593 + }, + { + "epoch": 0.76, + "learning_rate": 1.4644660940672625e-08, + "logits/chosen": -3.283643960952759, + "logits/rejected": -3.192488193511963, + "logps/chosen": -443.92333984375, + "logps/rejected": -1136.778076171875, + "loss": 0.525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8101059198379517, + "rewards/margins": 4.401628494262695, + "rewards/rejected": -3.591522216796875, + "step": 594 + }, + { + "epoch": 0.76, + "learning_rate": 1.4498815954409278e-08, + "logits/chosen": -3.2841365337371826, + "logits/rejected": -3.196065902709961, + "logps/chosen": -492.9687194824219, + "logps/rejected": -1416.149658203125, + "loss": 0.5488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9335113763809204, + "rewards/margins": 4.778027534484863, + "rewards/rejected": -3.8445162773132324, + "step": 595 + }, + { + "epoch": 0.76, + "learning_rate": 1.4353577585430148e-08, + "logits/chosen": -3.275959014892578, + "logits/rejected": -3.1772756576538086, + "logps/chosen": -427.780029296875, + "logps/rejected": -1096.131591796875, + "loss": 0.5273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6605056524276733, + "rewards/margins": 3.3248672485351562, + "rewards/rejected": -2.6643614768981934, + "step": 596 + }, + { + "epoch": 0.76, + "learning_rate": 1.4208948315458275e-08, + "logits/chosen": -3.28204607963562, + "logits/rejected": -3.093967914581299, + "logps/chosen": -469.8770751953125, + "logps/rejected": -1084.998046875, + "loss": 0.566, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.010554552078247, + "rewards/margins": 3.9270217418670654, + "rewards/rejected": -2.9164671897888184, + "step": 597 + }, + { + "epoch": 0.76, + "learning_rate": 1.4064930615808806e-08, + "logits/chosen": -3.234708786010742, + "logits/rejected": -3.1319332122802734, + "logps/chosen": -502.17755126953125, + "logps/rejected": -1328.61328125, + "loss": 0.5233, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0982024669647217, + "rewards/margins": 4.135836601257324, + "rewards/rejected": -3.0376343727111816, + "step": 598 + }, + { + "epoch": 0.76, + "learning_rate": 1.39215269473469e-08, + "logits/chosen": -3.2624077796936035, + "logits/rejected": -3.0035481452941895, + "logps/chosen": -500.248291015625, + "logps/rejected": -1951.5072021484375, + "loss": 0.5739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8983627557754517, + "rewards/margins": 4.934138774871826, + "rewards/rejected": -4.035775661468506, + "step": 599 + }, + { + "epoch": 0.76, + "learning_rate": 1.3778739760445552e-08, + "logits/chosen": -3.206061363220215, + "logits/rejected": -3.1276235580444336, + "logps/chosen": -447.5180358886719, + "logps/rejected": -894.3846435546875, + "loss": 0.5136, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9446197748184204, + "rewards/margins": 3.2378387451171875, + "rewards/rejected": -2.2932190895080566, + "step": 600 + }, + { + "epoch": 0.77, + "learning_rate": 1.3636571494943861e-08, + "logits/chosen": -3.2515861988067627, + "logits/rejected": -3.2166242599487305, + "logps/chosen": -491.25860595703125, + "logps/rejected": -1492.406005859375, + "loss": 0.5117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8956466913223267, + "rewards/margins": 4.977055549621582, + "rewards/rejected": -4.081408977508545, + "step": 601 + }, + { + "epoch": 0.77, + "learning_rate": 1.349502458010519e-08, + "logits/chosen": -3.2762112617492676, + "logits/rejected": -3.2132956981658936, + "logps/chosen": -475.12176513671875, + "logps/rejected": -1399.455810546875, + "loss": 0.5296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8266647458076477, + "rewards/margins": 4.317222595214844, + "rewards/rejected": -3.4905576705932617, + "step": 602 + }, + { + "epoch": 0.77, + "learning_rate": 1.3354101434575805e-08, + "logits/chosen": -3.218088150024414, + "logits/rejected": -3.1616110801696777, + "logps/chosen": -483.9275817871094, + "logps/rejected": -1253.013427734375, + "loss": 0.5415, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2044708728790283, + "rewards/margins": 4.52554988861084, + "rewards/rejected": -3.3210787773132324, + "step": 603 + }, + { + "epoch": 0.77, + "learning_rate": 1.321380446634342e-08, + "logits/chosen": -3.2632791996002197, + "logits/rejected": -3.060584545135498, + "logps/chosen": -575.7451782226562, + "logps/rejected": -970.8612060546875, + "loss": 0.5606, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0925660133361816, + "rewards/margins": 3.1974029541015625, + "rewards/rejected": -2.10483717918396, + "step": 604 + }, + { + "epoch": 0.77, + "learning_rate": 1.3074136072696147e-08, + "logits/chosen": -3.280522584915161, + "logits/rejected": -3.1792373657226562, + "logps/chosen": -508.2633972167969, + "logps/rejected": -1730.4144287109375, + "loss": 0.5386, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.151635766029358, + "rewards/margins": 5.8191986083984375, + "rewards/rejected": -4.667562961578369, + "step": 605 + }, + { + "epoch": 0.77, + "learning_rate": 1.2935098640181458e-08, + "logits/chosen": -3.298741340637207, + "logits/rejected": -3.119478225708008, + "logps/chosen": -495.79815673828125, + "logps/rejected": -1654.6796875, + "loss": 0.557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7630584836006165, + "rewards/margins": 5.141489028930664, + "rewards/rejected": -4.378430366516113, + "step": 606 + }, + { + "epoch": 0.77, + "learning_rate": 1.2796694544565478e-08, + "logits/chosen": -3.259613513946533, + "logits/rejected": -3.1690309047698975, + "logps/chosen": -536.1697998046875, + "logps/rejected": -1689.89306640625, + "loss": 0.5759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9803253412246704, + "rewards/margins": 5.329751968383789, + "rewards/rejected": -4.34942626953125, + "step": 607 + }, + { + "epoch": 0.78, + "learning_rate": 1.2658926150792321e-08, + "logits/chosen": -3.2450332641601562, + "logits/rejected": -3.1413888931274414, + "logps/chosen": -494.7772216796875, + "logps/rejected": -1603.0555419921875, + "loss": 0.5408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9026199579238892, + "rewards/margins": 5.131349563598633, + "rewards/rejected": -4.228729248046875, + "step": 608 + }, + { + "epoch": 0.78, + "learning_rate": 1.2521795812943703e-08, + "logits/chosen": -3.2927680015563965, + "logits/rejected": -3.137226104736328, + "logps/chosen": -469.3790283203125, + "logps/rejected": -1343.9912109375, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.019934058189392, + "rewards/margins": 3.523329257965088, + "rewards/rejected": -2.5033950805664062, + "step": 609 + }, + { + "epoch": 0.78, + "learning_rate": 1.2385305874198776e-08, + "logits/chosen": -3.2736258506774902, + "logits/rejected": -3.2017273902893066, + "logps/chosen": -525.499755859375, + "logps/rejected": -2217.780029296875, + "loss": 0.5678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8976624011993408, + "rewards/margins": 6.097998142242432, + "rewards/rejected": -5.200335502624512, + "step": 610 + }, + { + "epoch": 0.78, + "learning_rate": 1.2249458666793966e-08, + "logits/chosen": -3.2263126373291016, + "logits/rejected": -3.140733242034912, + "logps/chosen": -558.6810302734375, + "logps/rejected": -1327.4666748046875, + "loss": 0.5531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8927948474884033, + "rewards/margins": 4.329565525054932, + "rewards/rejected": -3.4367709159851074, + "step": 611 + }, + { + "epoch": 0.78, + "learning_rate": 1.2114256511983274e-08, + "logits/chosen": -3.266214609146118, + "logits/rejected": -3.183483600616455, + "logps/chosen": -550.9781494140625, + "logps/rejected": -1126.8109130859375, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8137847781181335, + "rewards/margins": 3.623687744140625, + "rewards/rejected": -2.8099029064178467, + "step": 612 + }, + { + "epoch": 0.78, + "learning_rate": 1.1979701719998452e-08, + "logits/chosen": -3.263979434967041, + "logits/rejected": -3.1895852088928223, + "logps/chosen": -539.4984130859375, + "logps/rejected": -1284.59521484375, + "loss": 0.5834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9558823108673096, + "rewards/margins": 4.209660530090332, + "rewards/rejected": -3.2537779808044434, + "step": 613 + }, + { + "epoch": 0.78, + "learning_rate": 1.1845796590009682e-08, + "logits/chosen": -3.158144235610962, + "logits/rejected": -3.1240835189819336, + "logps/chosen": -594.01806640625, + "logps/rejected": -1798.33203125, + "loss": 0.5885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9604095816612244, + "rewards/margins": 5.588553428649902, + "rewards/rejected": -4.628143310546875, + "step": 614 + }, + { + "epoch": 0.78, + "learning_rate": 1.1712543410086145e-08, + "logits/chosen": -3.2187581062316895, + "logits/rejected": -3.1517980098724365, + "logps/chosen": -503.3832702636719, + "logps/rejected": -1325.786376953125, + "loss": 0.5035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8536621332168579, + "rewards/margins": 4.82594633102417, + "rewards/rejected": -3.9722840785980225, + "step": 615 + }, + { + "epoch": 0.79, + "learning_rate": 1.157994445715706e-08, + "logits/chosen": -3.201582908630371, + "logits/rejected": -3.0708694458007812, + "logps/chosen": -510.9700927734375, + "logps/rejected": -1859.587646484375, + "loss": 0.5802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8468735218048096, + "rewards/margins": 6.183253765106201, + "rewards/rejected": -5.3363800048828125, + "step": 616 + }, + { + "epoch": 0.79, + "learning_rate": 1.1448001996972645e-08, + "logits/chosen": -3.2241358757019043, + "logits/rejected": -3.1930160522460938, + "logps/chosen": -527.26904296875, + "logps/rejected": -2686.52392578125, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.939526379108429, + "rewards/margins": 7.548620700836182, + "rewards/rejected": -6.609094619750977, + "step": 617 + }, + { + "epoch": 0.79, + "learning_rate": 1.1316718284065534e-08, + "logits/chosen": -3.257094383239746, + "logits/rejected": -3.193955898284912, + "logps/chosen": -539.5301513671875, + "logps/rejected": -2030.007568359375, + "loss": 0.5308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7547546625137329, + "rewards/margins": 5.918988227844238, + "rewards/rejected": -5.164233207702637, + "step": 618 + }, + { + "epoch": 0.79, + "learning_rate": 1.1186095561712128e-08, + "logits/chosen": -3.2897109985351562, + "logits/rejected": -3.2238056659698486, + "logps/chosen": -553.4278564453125, + "logps/rejected": -715.8270263671875, + "loss": 0.5912, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9236053824424744, + "rewards/margins": 3.173123359680176, + "rewards/rejected": -2.2495179176330566, + "step": 619 + }, + { + "epoch": 0.79, + "learning_rate": 1.1056136061894383e-08, + "logits/chosen": -3.2667815685272217, + "logits/rejected": -3.2253613471984863, + "logps/chosen": -535.6884765625, + "logps/rejected": -1350.611572265625, + "loss": 0.5728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.816741943359375, + "rewards/margins": 4.787072658538818, + "rewards/rejected": -3.9703307151794434, + "step": 620 + }, + { + "epoch": 0.79, + "learning_rate": 1.0926842005261549e-08, + "logits/chosen": -3.236727714538574, + "logits/rejected": -3.23606538772583, + "logps/chosen": -552.8233032226562, + "logps/rejected": -1342.654296875, + "loss": 0.5687, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.020100474357605, + "rewards/margins": 5.1702165603637695, + "rewards/rejected": -4.150115966796875, + "step": 621 + }, + { + "epoch": 0.79, + "learning_rate": 1.0798215601092353e-08, + "logits/chosen": -3.3441004753112793, + "logits/rejected": -3.1972224712371826, + "logps/chosen": -516.3216552734375, + "logps/rejected": -1337.761474609375, + "loss": 0.5384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8255096673965454, + "rewards/margins": 4.040060520172119, + "rewards/rejected": -3.2145509719848633, + "step": 622 + }, + { + "epoch": 0.79, + "learning_rate": 1.067025904725713e-08, + "logits/chosen": -3.26427960395813, + "logits/rejected": -3.227125406265259, + "logps/chosen": -489.964111328125, + "logps/rejected": -1432.524169921875, + "loss": 0.5196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8781845569610596, + "rewards/margins": 4.442686557769775, + "rewards/rejected": -3.564502000808716, + "step": 623 + }, + { + "epoch": 0.8, + "learning_rate": 1.0542974530180326e-08, + "logits/chosen": -3.21004056930542, + "logits/rejected": -3.1922824382781982, + "logps/chosen": -563.3863525390625, + "logps/rejected": -1275.142822265625, + "loss": 0.5668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1080443859100342, + "rewards/margins": 4.167633056640625, + "rewards/rejected": -3.059588670730591, + "step": 624 + }, + { + "epoch": 0.8, + "learning_rate": 1.0416364224803182e-08, + "logits/chosen": -3.2319741249084473, + "logits/rejected": -3.0974478721618652, + "logps/chosen": -514.8078002929688, + "logps/rejected": -750.9189453125, + "loss": 0.5848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1384124755859375, + "rewards/margins": 2.6315979957580566, + "rewards/rejected": -1.4931855201721191, + "step": 625 + }, + { + "epoch": 0.8, + "learning_rate": 1.0290430294546448e-08, + "logits/chosen": -3.2930827140808105, + "logits/rejected": -3.176502227783203, + "logps/chosen": -498.23309326171875, + "logps/rejected": -1517.8179931640625, + "loss": 0.5324, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.064671277999878, + "rewards/margins": 5.406736850738525, + "rewards/rejected": -4.342065811157227, + "step": 626 + }, + { + "epoch": 0.8, + "learning_rate": 1.016517489127357e-08, + "logits/chosen": -3.2802019119262695, + "logits/rejected": -3.16024112701416, + "logps/chosen": -518.996337890625, + "logps/rejected": -1460.343994140625, + "loss": 0.5444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9034942388534546, + "rewards/margins": 4.343417167663574, + "rewards/rejected": -3.4399232864379883, + "step": 627 + }, + { + "epoch": 0.8, + "learning_rate": 1.0040600155253764e-08, + "logits/chosen": -3.3067891597747803, + "logits/rejected": -3.1370906829833984, + "logps/chosen": -544.6298828125, + "logps/rejected": -1807.20263671875, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0639556646347046, + "rewards/margins": 5.836899280548096, + "rewards/rejected": -4.772943496704102, + "step": 628 + }, + { + "epoch": 0.8, + "learning_rate": 9.916708215125585e-09, + "logits/chosen": -3.314157485961914, + "logits/rejected": -3.182875633239746, + "logps/chosen": -504.99420166015625, + "logps/rejected": -946.8223876953125, + "loss": 0.5323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9859817624092102, + "rewards/margins": 3.5985918045043945, + "rewards/rejected": -2.61260986328125, + "step": 629 + }, + { + "epoch": 0.8, + "learning_rate": 9.793501187860431e-09, + "logits/chosen": -3.276620864868164, + "logits/rejected": -3.0861263275146484, + "logps/chosen": -446.55145263671875, + "logps/rejected": -1126.7900390625, + "loss": 0.505, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0708481073379517, + "rewards/margins": 4.048344612121582, + "rewards/rejected": -2.977496385574341, + "step": 630 + }, + { + "epoch": 0.8, + "learning_rate": 9.670981178726485e-09, + "logits/chosen": -3.2699341773986816, + "logits/rejected": -3.163935899734497, + "logps/chosen": -533.1836547851562, + "logps/rejected": -1403.18115234375, + "loss": 0.592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7690505981445312, + "rewards/margins": 3.8990979194641113, + "rewards/rejected": -3.130047559738159, + "step": 631 + }, + { + "epoch": 0.81, + "learning_rate": 9.549150281252633e-09, + "logits/chosen": -3.246284246444702, + "logits/rejected": -3.1043219566345215, + "logps/chosen": -548.735107421875, + "logps/rejected": -1521.9052734375, + "loss": 0.5772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9258575439453125, + "rewards/margins": 4.680020332336426, + "rewards/rejected": -3.7541627883911133, + "step": 632 + }, + { + "epoch": 0.81, + "learning_rate": 9.428010577192796e-09, + "logits/chosen": -3.3100271224975586, + "logits/rejected": -3.16245174407959, + "logps/chosen": -490.23211669921875, + "logps/rejected": -1652.72802734375, + "loss": 0.525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9224823117256165, + "rewards/margins": 5.606961250305176, + "rewards/rejected": -4.684478759765625, + "step": 633 + }, + { + "epoch": 0.81, + "learning_rate": 9.307564136490254e-09, + "logits/chosen": -3.2679991722106934, + "logits/rejected": -3.1193461418151855, + "logps/chosen": -513.5745849609375, + "logps/rejected": -1936.067626953125, + "loss": 0.5529, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2243103981018066, + "rewards/margins": 5.5730133056640625, + "rewards/rejected": -4.348702907562256, + "step": 634 + }, + { + "epoch": 0.81, + "learning_rate": 9.187813017242386e-09, + "logits/chosen": -3.2741870880126953, + "logits/rejected": -3.1222543716430664, + "logps/chosen": -541.1133422851562, + "logps/rejected": -1055.6983642578125, + "loss": 0.5491, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0565093755722046, + "rewards/margins": 4.146154880523682, + "rewards/rejected": -3.0896453857421875, + "step": 635 + }, + { + "epoch": 0.81, + "learning_rate": 9.068759265665382e-09, + "logits/chosen": -3.234203338623047, + "logits/rejected": -3.20082688331604, + "logps/chosen": -552.5906982421875, + "logps/rejected": -1117.6932373046875, + "loss": 0.5933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7513961791992188, + "rewards/margins": 3.7499070167541504, + "rewards/rejected": -2.9985108375549316, + "step": 636 + }, + { + "epoch": 0.81, + "learning_rate": 8.950404916059406e-09, + "logits/chosen": -3.304950714111328, + "logits/rejected": -3.089533805847168, + "logps/chosen": -476.53289794921875, + "logps/rejected": -2192.740478515625, + "loss": 0.5407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9017089605331421, + "rewards/margins": 5.9512481689453125, + "rewards/rejected": -5.049539566040039, + "step": 637 + }, + { + "epoch": 0.81, + "learning_rate": 8.832751990773713e-09, + "logits/chosen": -3.259803295135498, + "logits/rejected": -3.094609260559082, + "logps/chosen": -518.7734375, + "logps/rejected": -1512.632568359375, + "loss": 0.5493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.64453125, + "rewards/margins": 4.151660442352295, + "rewards/rejected": -3.507128953933716, + "step": 638 + }, + { + "epoch": 0.81, + "learning_rate": 8.715802500172214e-09, + "logits/chosen": -3.2914319038391113, + "logits/rejected": -3.1654624938964844, + "logps/chosen": -517.2841796875, + "logps/rejected": -825.1041259765625, + "loss": 0.5713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9647430181503296, + "rewards/margins": 3.068966865539551, + "rewards/rejected": -2.1042237281799316, + "step": 639 + }, + { + "epoch": 0.82, + "learning_rate": 8.599558442598998e-09, + "logits/chosen": -3.246100902557373, + "logits/rejected": -3.1185903549194336, + "logps/chosen": -513.03955078125, + "logps/rejected": -1875.168212890625, + "loss": 0.5353, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0789337158203125, + "rewards/margins": 5.408682346343994, + "rewards/rejected": -4.329748630523682, + "step": 640 + }, + { + "epoch": 0.82, + "learning_rate": 8.484021804344305e-09, + "logits/chosen": -3.253533363342285, + "logits/rejected": -3.1792359352111816, + "logps/chosen": -497.86785888671875, + "logps/rejected": -784.9967041015625, + "loss": 0.5599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9847702383995056, + "rewards/margins": 3.287370443344116, + "rewards/rejected": -2.302600145339966, + "step": 641 + }, + { + "epoch": 0.82, + "learning_rate": 8.369194559610482e-09, + "logits/chosen": -3.2588648796081543, + "logits/rejected": -3.175891399383545, + "logps/chosen": -496.6927795410156, + "logps/rejected": -1414.29248046875, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9452881217002869, + "rewards/margins": 5.592749118804932, + "rewards/rejected": -4.6474609375, + "step": 642 + }, + { + "epoch": 0.82, + "learning_rate": 8.25507867047835e-09, + "logits/chosen": -3.202993154525757, + "logits/rejected": -3.0989270210266113, + "logps/chosen": -521.8204956054688, + "logps/rejected": -1228.619873046875, + "loss": 0.5186, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2777297496795654, + "rewards/margins": 4.649263381958008, + "rewards/rejected": -3.3715333938598633, + "step": 643 + }, + { + "epoch": 0.82, + "learning_rate": 8.141676086873572e-09, + "logits/chosen": -3.300018787384033, + "logits/rejected": -3.161355972290039, + "logps/chosen": -492.9678039550781, + "logps/rejected": -2370.0888671875, + "loss": 0.5599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7420288324356079, + "rewards/margins": 6.294952392578125, + "rewards/rejected": -5.552923679351807, + "step": 644 + }, + { + "epoch": 0.82, + "learning_rate": 8.028988746533432e-09, + "logits/chosen": -3.259690284729004, + "logits/rejected": -3.1386678218841553, + "logps/chosen": -417.5417785644531, + "logps/rejected": -3297.59912109375, + "loss": 0.5155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.660980224609375, + "rewards/margins": 6.817657470703125, + "rewards/rejected": -6.15667724609375, + "step": 645 + }, + { + "epoch": 0.82, + "learning_rate": 7.917018574973644e-09, + "logits/chosen": -3.2512149810791016, + "logits/rejected": -3.232966899871826, + "logps/chosen": -466.87335205078125, + "logps/rejected": -1787.6072998046875, + "loss": 0.5268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7217742800712585, + "rewards/margins": 5.366091728210449, + "rewards/rejected": -4.644317626953125, + "step": 646 + }, + { + "epoch": 0.82, + "learning_rate": 7.805767485455527e-09, + "logits/chosen": -3.304354429244995, + "logits/rejected": -3.1981911659240723, + "logps/chosen": -515.836181640625, + "logps/rejected": -884.7116088867188, + "loss": 0.5552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.790112316608429, + "rewards/margins": 3.0128173828125, + "rewards/rejected": -2.222705125808716, + "step": 647 + }, + { + "epoch": 0.83, + "learning_rate": 7.695237378953223e-09, + "logits/chosen": -3.241461753845215, + "logits/rejected": -3.1582155227661133, + "logps/chosen": -517.9677734375, + "logps/rejected": -1687.3985595703125, + "loss": 0.5711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9383789300918579, + "rewards/margins": 5.857824802398682, + "rewards/rejected": -4.919445991516113, + "step": 648 + }, + { + "epoch": 0.83, + "learning_rate": 7.585430144121319e-09, + "logits/chosen": -3.272430419921875, + "logits/rejected": -3.171295642852783, + "logps/chosen": -497.3233642578125, + "logps/rejected": -1496.18115234375, + "loss": 0.5131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9470779895782471, + "rewards/margins": 5.443819046020508, + "rewards/rejected": -4.496740818023682, + "step": 649 + }, + { + "epoch": 0.83, + "learning_rate": 7.476347657262455e-09, + "logits/chosen": -3.2148280143737793, + "logits/rejected": -3.094994068145752, + "logps/chosen": -533.26708984375, + "logps/rejected": -2363.24267578125, + "loss": 0.5468, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0145752429962158, + "rewards/margins": 6.197094917297363, + "rewards/rejected": -5.182519912719727, + "step": 650 + }, + { + "epoch": 0.83, + "learning_rate": 7.367991782295391e-09, + "logits/chosen": -3.2490243911743164, + "logits/rejected": -3.134901523590088, + "logps/chosen": -514.8054809570312, + "logps/rejected": -1385.6116943359375, + "loss": 0.5463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9060409665107727, + "rewards/margins": 4.399455547332764, + "rewards/rejected": -3.4934144020080566, + "step": 651 + }, + { + "epoch": 0.83, + "learning_rate": 7.260364370723044e-09, + "logits/chosen": -3.2896392345428467, + "logits/rejected": -3.1504464149475098, + "logps/chosen": -413.75372314453125, + "logps/rejected": -1951.52099609375, + "loss": 0.4968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.853741466999054, + "rewards/margins": 5.7018890380859375, + "rewards/rejected": -4.848147869110107, + "step": 652 + }, + { + "epoch": 0.83, + "learning_rate": 7.153467261600948e-09, + "logits/chosen": -3.2555949687957764, + "logits/rejected": -3.120340347290039, + "logps/chosen": -492.64520263671875, + "logps/rejected": -1663.0343017578125, + "loss": 0.5342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9905807375907898, + "rewards/margins": 4.779050827026367, + "rewards/rejected": -3.788470506668091, + "step": 653 + }, + { + "epoch": 0.83, + "learning_rate": 7.047302281505735e-09, + "logits/chosen": -3.2730631828308105, + "logits/rejected": -3.2080812454223633, + "logps/chosen": -502.20758056640625, + "logps/rejected": -1548.539306640625, + "loss": 0.5537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9559677243232727, + "rewards/margins": 4.739479064941406, + "rewards/rejected": -3.7835114002227783, + "step": 654 + }, + { + "epoch": 0.83, + "learning_rate": 6.9418712445040165e-09, + "logits/chosen": -3.268894672393799, + "logits/rejected": -3.0560264587402344, + "logps/chosen": -557.4228515625, + "logps/rejected": -975.9409790039062, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9773010015487671, + "rewards/margins": 2.844897747039795, + "rewards/rejected": -1.8675965070724487, + "step": 655 + }, + { + "epoch": 0.84, + "learning_rate": 6.837175952121305e-09, + "logits/chosen": -3.307309627532959, + "logits/rejected": -3.0540690422058105, + "logps/chosen": -463.05908203125, + "logps/rejected": -3047.083251953125, + "loss": 0.5188, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1501755714416504, + "rewards/margins": 7.511552810668945, + "rewards/rejected": -6.361377239227295, + "step": 656 + }, + { + "epoch": 0.84, + "learning_rate": 6.733218193311291e-09, + "logits/chosen": -3.2808468341827393, + "logits/rejected": -3.2007336616516113, + "logps/chosen": -520.138427734375, + "logps/rejected": -1454.3843994140625, + "loss": 0.5731, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.002587914466858, + "rewards/margins": 4.94744873046875, + "rewards/rejected": -3.9448609352111816, + "step": 657 + }, + { + "epoch": 0.84, + "learning_rate": 6.629999744425236e-09, + "logits/chosen": -3.2775158882141113, + "logits/rejected": -3.168809413909912, + "logps/chosen": -494.6551818847656, + "logps/rejected": -1103.895263671875, + "loss": 0.537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9199692010879517, + "rewards/margins": 3.5804200172424316, + "rewards/rejected": -2.6604509353637695, + "step": 658 + }, + { + "epoch": 0.84, + "learning_rate": 6.527522369181654e-09, + "logits/chosen": -3.220876693725586, + "logits/rejected": -3.1901180744171143, + "logps/chosen": -499.404052734375, + "logps/rejected": -1197.2578125, + "loss": 0.5268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6940872073173523, + "rewards/margins": 4.0110063552856445, + "rewards/rejected": -3.3169188499450684, + "step": 659 + }, + { + "epoch": 0.84, + "learning_rate": 6.42578781863613e-09, + "logits/chosen": -3.2771008014678955, + "logits/rejected": -3.186636447906494, + "logps/chosen": -504.7525329589844, + "logps/rejected": -982.5352783203125, + "loss": 0.5392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8440002799034119, + "rewards/margins": 3.79634428024292, + "rewards/rejected": -2.9523439407348633, + "step": 660 + }, + { + "epoch": 0.84, + "learning_rate": 6.324797831151452e-09, + "logits/chosen": -3.265892744064331, + "logits/rejected": -3.1310534477233887, + "logps/chosen": -546.9613037109375, + "logps/rejected": -4539.25830078125, + "loss": 0.5383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9458709955215454, + "rewards/margins": 7.509116172790527, + "rewards/rejected": -6.563244819641113, + "step": 661 + }, + { + "epoch": 0.84, + "learning_rate": 6.22455413236786e-09, + "logits/chosen": -3.269641637802124, + "logits/rejected": -3.2529239654541016, + "logps/chosen": -487.1707763671875, + "logps/rejected": -928.325927734375, + "loss": 0.5541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.808380126953125, + "rewards/margins": 3.14414381980896, + "rewards/rejected": -2.335763692855835, + "step": 662 + }, + { + "epoch": 0.85, + "learning_rate": 6.125058435173569e-09, + "logits/chosen": -3.217473030090332, + "logits/rejected": -3.142383575439453, + "logps/chosen": -516.3133544921875, + "logps/rejected": -1517.804931640625, + "loss": 0.5383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8069076538085938, + "rewards/margins": 5.321245193481445, + "rewards/rejected": -4.514337539672852, + "step": 663 + }, + { + "epoch": 0.85, + "learning_rate": 6.026312439675552e-09, + "logits/chosen": -3.2809057235717773, + "logits/rejected": -3.1446235179901123, + "logps/chosen": -530.5340576171875, + "logps/rejected": -1314.87939453125, + "loss": 0.5762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1424133777618408, + "rewards/margins": 4.721893310546875, + "rewards/rejected": -3.5794801712036133, + "step": 664 + }, + { + "epoch": 0.85, + "learning_rate": 5.928317833170393e-09, + "logits/chosen": -3.219229221343994, + "logits/rejected": -2.973115921020508, + "logps/chosen": -562.8023071289062, + "logps/rejected": -2859.97314453125, + "loss": 0.5846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1911301612854004, + "rewards/margins": 6.33035135269165, + "rewards/rejected": -5.13922119140625, + "step": 665 + }, + { + "epoch": 0.85, + "learning_rate": 5.831076290115572e-09, + "logits/chosen": -3.2713067531585693, + "logits/rejected": -3.0446228981018066, + "logps/chosen": -539.3695068359375, + "logps/rejected": -2209.10595703125, + "loss": 0.5592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.87567138671875, + "rewards/margins": 6.089624404907227, + "rewards/rejected": -5.213952541351318, + "step": 666 + }, + { + "epoch": 0.85, + "learning_rate": 5.734589472100737e-09, + "logits/chosen": -3.3535311222076416, + "logits/rejected": -3.171332359313965, + "logps/chosen": -517.5323486328125, + "logps/rejected": -1000.544189453125, + "loss": 0.533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9610962271690369, + "rewards/margins": 3.3394837379455566, + "rewards/rejected": -2.378387451171875, + "step": 667 + }, + { + "epoch": 0.85, + "learning_rate": 5.638859027819409e-09, + "logits/chosen": -3.2298359870910645, + "logits/rejected": -3.1427388191223145, + "logps/chosen": -485.0603942871094, + "logps/rejected": -909.929931640625, + "loss": 0.5201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7324737310409546, + "rewards/margins": 3.4806487560272217, + "rewards/rejected": -2.7481751441955566, + "step": 668 + }, + { + "epoch": 0.85, + "learning_rate": 5.543886593040736e-09, + "logits/chosen": -3.2524890899658203, + "logits/rejected": -3.2603812217712402, + "logps/chosen": -535.1056518554688, + "logps/rejected": -1412.8658447265625, + "loss": 0.573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8146301507949829, + "rewards/margins": 4.475543022155762, + "rewards/rejected": -3.6609129905700684, + "step": 669 + }, + { + "epoch": 0.85, + "learning_rate": 5.44967379058161e-09, + "logits/chosen": -3.266547679901123, + "logits/rejected": -3.2343554496765137, + "logps/chosen": -454.214111328125, + "logps/rejected": -1416.578125, + "loss": 0.5512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9269287586212158, + "rewards/margins": 4.174902439117432, + "rewards/rejected": -3.247973680496216, + "step": 670 + }, + { + "epoch": 0.86, + "learning_rate": 5.356222230278856e-09, + "logits/chosen": -3.223433017730713, + "logits/rejected": -3.129488945007324, + "logps/chosen": -580.5333251953125, + "logps/rejected": -1160.7320556640625, + "loss": 0.5806, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0074799060821533, + "rewards/margins": 4.170416355133057, + "rewards/rejected": -3.1629364490509033, + "step": 671 + }, + { + "epoch": 0.86, + "learning_rate": 5.263533508961826e-09, + "logits/chosen": -3.238048791885376, + "logits/rejected": -3.2101340293884277, + "logps/chosen": -488.8727111816406, + "logps/rejected": -1430.57666015625, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.064386010169983, + "rewards/margins": 4.960687637329102, + "rewards/rejected": -3.896301507949829, + "step": 672 + }, + { + "epoch": 0.86, + "learning_rate": 5.17160921042501e-09, + "logits/chosen": -3.240164041519165, + "logits/rejected": -3.2209959030151367, + "logps/chosen": -520.9498901367188, + "logps/rejected": -1486.6722412109375, + "loss": 0.5588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9856521487236023, + "rewards/margins": 5.409975051879883, + "rewards/rejected": -4.424322605133057, + "step": 673 + }, + { + "epoch": 0.86, + "learning_rate": 5.080450905401057e-09, + "logits/chosen": -3.295734405517578, + "logits/rejected": -3.2117815017700195, + "logps/chosen": -470.2867431640625, + "logps/rejected": -2264.209716796875, + "loss": 0.5632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7662216424942017, + "rewards/margins": 5.605802536010742, + "rewards/rejected": -4.839581489562988, + "step": 674 + }, + { + "epoch": 0.86, + "learning_rate": 4.9900601515338705e-09, + "logits/chosen": -3.279806613922119, + "logits/rejected": -3.132535457611084, + "logps/chosen": -530.2586669921875, + "logps/rejected": -1413.3505859375, + "loss": 0.5431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8899933099746704, + "rewards/margins": 3.945730686187744, + "rewards/rejected": -3.0557374954223633, + "step": 675 + }, + { + "epoch": 0.86, + "learning_rate": 4.9004384933520545e-09, + "logits/chosen": -3.2313899993896484, + "logits/rejected": -3.16461443901062, + "logps/chosen": -586.1036987304688, + "logps/rejected": -1743.3828125, + "loss": 0.5761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9219512939453125, + "rewards/margins": 5.502878189086914, + "rewards/rejected": -4.580926895141602, + "step": 676 + }, + { + "epoch": 0.86, + "learning_rate": 4.811587462242461e-09, + "logits/chosen": -3.2077317237854004, + "logits/rejected": -3.1489076614379883, + "logps/chosen": -527.3107299804688, + "logps/rejected": -1258.361572265625, + "loss": 0.5785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7947158813476562, + "rewards/margins": 3.802354574203491, + "rewards/rejected": -3.007638454437256, + "step": 677 + }, + { + "epoch": 0.86, + "learning_rate": 4.7235085764240625e-09, + "logits/chosen": -3.280885696411133, + "logits/rejected": -3.1212522983551025, + "logps/chosen": -471.8553466796875, + "logps/rejected": -1078.84814453125, + "loss": 0.5502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9039535522460938, + "rewards/margins": 3.718257188796997, + "rewards/rejected": -2.8143036365509033, + "step": 678 + }, + { + "epoch": 0.87, + "learning_rate": 4.636203340922007e-09, + "logits/chosen": -3.233086109161377, + "logits/rejected": -3.158904790878296, + "logps/chosen": -485.003173828125, + "logps/rejected": -1372.411865234375, + "loss": 0.539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7823486328125, + "rewards/margins": 4.543445110321045, + "rewards/rejected": -3.761096477508545, + "step": 679 + }, + { + "epoch": 0.87, + "learning_rate": 4.549673247541874e-09, + "logits/chosen": -3.258246421813965, + "logits/rejected": -3.075068473815918, + "logps/chosen": -525.1610107421875, + "logps/rejected": -849.3106689453125, + "loss": 0.5942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9454987049102783, + "rewards/margins": 3.0710251331329346, + "rewards/rejected": -2.1255264282226562, + "step": 680 + }, + { + "epoch": 0.87, + "learning_rate": 4.463919774844233e-09, + "logits/chosen": -3.342802047729492, + "logits/rejected": -3.2301032543182373, + "logps/chosen": -497.41668701171875, + "logps/rejected": -1179.2052001953125, + "loss": 0.582, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0544815063476562, + "rewards/margins": 3.945793628692627, + "rewards/rejected": -2.8913118839263916, + "step": 681 + }, + { + "epoch": 0.87, + "learning_rate": 4.37894438811931e-09, + "logits/chosen": -3.2310380935668945, + "logits/rejected": -3.1301321983337402, + "logps/chosen": -508.5248718261719, + "logps/rejected": -1390.253173828125, + "loss": 0.5516, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0653290748596191, + "rewards/margins": 4.675137519836426, + "rewards/rejected": -3.6098084449768066, + "step": 682 + }, + { + "epoch": 0.87, + "learning_rate": 4.294748539362031e-09, + "logits/chosen": -3.259298324584961, + "logits/rejected": -3.095588207244873, + "logps/chosen": -524.047119140625, + "logps/rejected": -691.6782836914062, + "loss": 0.5374, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.059967041015625, + "rewards/margins": 2.6907639503479004, + "rewards/rejected": -1.6307969093322754, + "step": 683 + }, + { + "epoch": 0.87, + "learning_rate": 4.2113336672471245e-09, + "logits/chosen": -3.272340774536133, + "logits/rejected": -3.133833885192871, + "logps/chosen": -522.4090576171875, + "logps/rejected": -1790.5294189453125, + "loss": 0.5258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.140681505203247, + "rewards/margins": 5.6210036277771, + "rewards/rejected": -4.480322360992432, + "step": 684 + }, + { + "epoch": 0.87, + "learning_rate": 4.128701197104628e-09, + "logits/chosen": -3.3034982681274414, + "logits/rejected": -3.140578269958496, + "logps/chosen": -493.0096435546875, + "logps/rejected": -1014.6041870117188, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9216262698173523, + "rewards/margins": 3.7275712490081787, + "rewards/rejected": -2.8059449195861816, + "step": 685 + }, + { + "epoch": 0.87, + "learning_rate": 4.0468525408954454e-09, + "logits/chosen": -3.2855072021484375, + "logits/rejected": -3.1760811805725098, + "logps/chosen": -482.11651611328125, + "logps/rejected": -1067.206787109375, + "loss": 0.5299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7272384762763977, + "rewards/margins": 3.3510241508483887, + "rewards/rejected": -2.6237854957580566, + "step": 686 + }, + { + "epoch": 0.88, + "learning_rate": 3.9657890971873e-09, + "logits/chosen": -3.315824508666992, + "logits/rejected": -3.1430015563964844, + "logps/chosen": -484.08050537109375, + "logps/rejected": -1358.49755859375, + "loss": 0.5524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.942230224609375, + "rewards/margins": 4.1224365234375, + "rewards/rejected": -3.180206298828125, + "step": 687 + }, + { + "epoch": 0.88, + "learning_rate": 3.8855122511307626e-09, + "logits/chosen": -3.336021900177002, + "logits/rejected": -3.1448116302490234, + "logps/chosen": -463.09417724609375, + "logps/rejected": -1043.5484619140625, + "loss": 0.55, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7930709719657898, + "rewards/margins": 3.7396745681762695, + "rewards/rejected": -2.946603298187256, + "step": 688 + }, + { + "epoch": 0.88, + "learning_rate": 3.8060233744356625e-09, + "logits/chosen": -3.2457666397094727, + "logits/rejected": -3.100795030593872, + "logps/chosen": -482.70654296875, + "logps/rejected": -2173.78125, + "loss": 0.5285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9200470447540283, + "rewards/margins": 5.185366630554199, + "rewards/rejected": -4.26531982421875, + "step": 689 + }, + { + "epoch": 0.88, + "learning_rate": 3.727323825347578e-09, + "logits/chosen": -3.283905506134033, + "logits/rejected": -3.1733548641204834, + "logps/chosen": -511.1496276855469, + "logps/rejected": -1446.603759765625, + "loss": 0.5178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9891464114189148, + "rewards/margins": 4.357170104980469, + "rewards/rejected": -3.368023633956909, + "step": 690 + }, + { + "epoch": 0.88, + "learning_rate": 3.649414948624652e-09, + "logits/chosen": -3.2714531421661377, + "logits/rejected": -3.087376117706299, + "logps/chosen": -510.8273010253906, + "logps/rejected": -2674.4296875, + "loss": 0.5446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5817230343818665, + "rewards/margins": 6.683444499969482, + "rewards/rejected": -6.10172176361084, + "step": 691 + }, + { + "epoch": 0.88, + "learning_rate": 3.5722980755146515e-09, + "logits/chosen": -3.254380226135254, + "logits/rejected": -3.0824036598205566, + "logps/chosen": -512.8521118164062, + "logps/rejected": -1249.76513671875, + "loss": 0.5481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9804184436798096, + "rewards/margins": 4.26756477355957, + "rewards/rejected": -3.2871460914611816, + "step": 692 + }, + { + "epoch": 0.88, + "learning_rate": 3.4959745237321427e-09, + "logits/chosen": -3.2302985191345215, + "logits/rejected": -3.1486308574676514, + "logps/chosen": -616.900146484375, + "logps/rejected": -1976.0447998046875, + "loss": 0.5764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0223907232284546, + "rewards/margins": 5.962973117828369, + "rewards/rejected": -4.940582275390625, + "step": 693 + }, + { + "epoch": 0.88, + "learning_rate": 3.4204455974360556e-09, + "logits/chosen": -3.309156656265259, + "logits/rejected": -3.0480897426605225, + "logps/chosen": -490.6701965332031, + "logps/rejected": -2209.88232421875, + "loss": 0.533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9433563947677612, + "rewards/margins": 6.235311985015869, + "rewards/rejected": -5.291955471038818, + "step": 694 + }, + { + "epoch": 0.89, + "learning_rate": 3.3457125872073388e-09, + "logits/chosen": -3.184269666671753, + "logits/rejected": -3.178515911102295, + "logps/chosen": -592.400146484375, + "logps/rejected": -918.8452758789062, + "loss": 0.5538, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0472503900527954, + "rewards/margins": 3.623931884765625, + "rewards/rejected": -2.576681613922119, + "step": 695 + }, + { + "epoch": 0.89, + "learning_rate": 3.2717767700269627e-09, + "logits/chosen": -3.2939181327819824, + "logits/rejected": -3.108071804046631, + "logps/chosen": -565.044921875, + "logps/rejected": -834.500732421875, + "loss": 0.6001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1413605213165283, + "rewards/margins": 2.744259834289551, + "rewards/rejected": -1.602899193763733, + "step": 696 + }, + { + "epoch": 0.89, + "learning_rate": 3.198639409254017e-09, + "logits/chosen": -3.213768482208252, + "logits/rejected": -3.188220262527466, + "logps/chosen": -493.6104431152344, + "logps/rejected": -1322.8720703125, + "loss": 0.5289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9751831293106079, + "rewards/margins": 5.333246231079102, + "rewards/rejected": -4.358062744140625, + "step": 697 + }, + { + "epoch": 0.89, + "learning_rate": 3.1263017546042324e-09, + "logits/chosen": -3.314113140106201, + "logits/rejected": -3.1424715518951416, + "logps/chosen": -444.19537353515625, + "logps/rejected": -1566.4107666015625, + "loss": 0.5201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8374221920967102, + "rewards/margins": 4.957344055175781, + "rewards/rejected": -4.119922161102295, + "step": 698 + }, + { + "epoch": 0.89, + "learning_rate": 3.054765042128521e-09, + "logits/chosen": -3.3003416061401367, + "logits/rejected": -3.2709639072418213, + "logps/chosen": -511.8470458984375, + "logps/rejected": -1489.44970703125, + "loss": 0.5117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6714050769805908, + "rewards/margins": 4.710986614227295, + "rewards/rejected": -4.039581298828125, + "step": 699 + }, + { + "epoch": 0.89, + "learning_rate": 2.9840304941919412e-09, + "logits/chosen": -3.260641574859619, + "logits/rejected": -3.1847710609436035, + "logps/chosen": -508.3382263183594, + "logps/rejected": -1214.4423828125, + "loss": 0.5586, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.031579613685608, + "rewards/margins": 4.481317520141602, + "rewards/rejected": -3.449737548828125, + "step": 700 + }, + { + "epoch": 0.89, + "learning_rate": 2.9140993194527286e-09, + "logits/chosen": -3.2603816986083984, + "logits/rejected": -3.157891035079956, + "logps/chosen": -493.35833740234375, + "logps/rejected": -1046.8115234375, + "loss": 0.5586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9041671752929688, + "rewards/margins": 3.2692275047302246, + "rewards/rejected": -2.365060329437256, + "step": 701 + }, + { + "epoch": 0.89, + "learning_rate": 2.8449727128417366e-09, + "logits/chosen": -3.277432441711426, + "logits/rejected": -3.1779139041900635, + "logps/chosen": -495.68609619140625, + "logps/rejected": -1792.1700439453125, + "loss": 0.5703, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2419204711914062, + "rewards/margins": 6.659724235534668, + "rewards/rejected": -5.417803764343262, + "step": 702 + }, + { + "epoch": 0.9, + "learning_rate": 2.7766518555419394e-09, + "logits/chosen": -3.275672674179077, + "logits/rejected": -3.261356830596924, + "logps/chosen": -590.6375122070312, + "logps/rejected": -996.4535522460938, + "loss": 0.567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7379852533340454, + "rewards/margins": 3.7609922885894775, + "rewards/rejected": -3.0230071544647217, + "step": 703 + }, + { + "epoch": 0.9, + "learning_rate": 2.709137914968268e-09, + "logits/chosen": -3.2304673194885254, + "logits/rejected": -3.1317849159240723, + "logps/chosen": -516.783203125, + "logps/rejected": -1211.32421875, + "loss": 0.5603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.089471459388733, + "rewards/margins": 4.270959377288818, + "rewards/rejected": -3.181488037109375, + "step": 704 + }, + { + "epoch": 0.9, + "learning_rate": 2.642432044747711e-09, + "logits/chosen": -3.246312141418457, + "logits/rejected": -3.1055374145507812, + "logps/chosen": -490.5010681152344, + "logps/rejected": -2168.868896484375, + "loss": 0.516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7154632806777954, + "rewards/margins": 5.292227268218994, + "rewards/rejected": -4.576764106750488, + "step": 705 + }, + { + "epoch": 0.9, + "learning_rate": 2.57653538469953e-09, + "logits/chosen": -3.30609393119812, + "logits/rejected": -3.0671210289001465, + "logps/chosen": -452.2392883300781, + "logps/rejected": -1005.873291015625, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8757721185684204, + "rewards/margins": 3.342559814453125, + "rewards/rejected": -2.466787815093994, + "step": 706 + }, + { + "epoch": 0.9, + "learning_rate": 2.51144906081584e-09, + "logits/chosen": -3.2903494834899902, + "logits/rejected": -3.173492193222046, + "logps/chosen": -524.7396240234375, + "logps/rejected": -1310.52734375, + "loss": 0.5713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6703201532363892, + "rewards/margins": 4.4640092849731445, + "rewards/rejected": -3.793689250946045, + "step": 707 + }, + { + "epoch": 0.9, + "learning_rate": 2.4471741852423233e-09, + "logits/chosen": -3.258721351623535, + "logits/rejected": -3.1169066429138184, + "logps/chosen": -471.3511962890625, + "logps/rejected": -3997.01025390625, + "loss": 0.5131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9098007678985596, + "rewards/margins": 7.558055400848389, + "rewards/rejected": -6.64825439453125, + "step": 708 + }, + { + "epoch": 0.9, + "learning_rate": 2.3837118562592794e-09, + "logits/chosen": -3.260406732559204, + "logits/rejected": -3.142345428466797, + "logps/chosen": -567.551513671875, + "logps/rejected": -698.1959838867188, + "loss": 0.5692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9303741455078125, + "rewards/margins": 2.4435577392578125, + "rewards/rejected": -1.51318359375, + "step": 709 + }, + { + "epoch": 0.91, + "learning_rate": 2.3210631582627928e-09, + "logits/chosen": -3.225212574005127, + "logits/rejected": -3.1163978576660156, + "logps/chosen": -482.0853271484375, + "logps/rejected": -1799.8553466796875, + "loss": 0.5428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9712890386581421, + "rewards/margins": 5.722326755523682, + "rewards/rejected": -4.75103759765625, + "step": 710 + }, + { + "epoch": 0.91, + "learning_rate": 2.259229161746279e-09, + "logits/chosen": -3.3131003379821777, + "logits/rejected": -3.1518383026123047, + "logps/chosen": -531.0181884765625, + "logps/rejected": -4307.8115234375, + "loss": 0.5427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9188202023506165, + "rewards/margins": 7.958792209625244, + "rewards/rejected": -7.039972305297852, + "step": 711 + }, + { + "epoch": 0.91, + "learning_rate": 2.198210923282118e-09, + "logits/chosen": -3.297356128692627, + "logits/rejected": -3.108429431915283, + "logps/chosen": -490.29571533203125, + "logps/rejected": -2148.801513671875, + "loss": 0.5255, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.18804931640625, + "rewards/margins": 5.837634563446045, + "rewards/rejected": -4.649585247039795, + "step": 712 + }, + { + "epoch": 0.91, + "learning_rate": 2.1380094855036614e-09, + "logits/chosen": -3.290456771850586, + "logits/rejected": -3.1700267791748047, + "logps/chosen": -521.0857543945312, + "logps/rejected": -1315.7650146484375, + "loss": 0.5176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0096421241760254, + "rewards/margins": 4.386595249176025, + "rewards/rejected": -3.376953125, + "step": 713 + }, + { + "epoch": 0.91, + "learning_rate": 2.0786258770873644e-09, + "logits/chosen": -3.2766332626342773, + "logits/rejected": -3.179960012435913, + "logps/chosen": -491.67352294921875, + "logps/rejected": -1025.7476806640625, + "loss": 0.5603, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0945053100585938, + "rewards/margins": 4.157670974731445, + "rewards/rejected": -3.0631651878356934, + "step": 714 + }, + { + "epoch": 0.91, + "learning_rate": 2.020061112735266e-09, + "logits/chosen": -3.2421512603759766, + "logits/rejected": -3.155616044998169, + "logps/chosen": -502.52020263671875, + "logps/rejected": -1025.70751953125, + "loss": 0.5217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.86065673828125, + "rewards/margins": 3.5328369140625, + "rewards/rejected": -2.67218017578125, + "step": 715 + }, + { + "epoch": 0.91, + "learning_rate": 1.9623161931575925e-09, + "logits/chosen": -3.287320852279663, + "logits/rejected": -3.2197775840759277, + "logps/chosen": -451.2357177734375, + "logps/rejected": -1045.6314697265625, + "loss": 0.5333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9696915149688721, + "rewards/margins": 4.089973449707031, + "rewards/rejected": -3.120281934738159, + "step": 716 + }, + { + "epoch": 0.91, + "learning_rate": 1.905392105055703e-09, + "logits/chosen": -3.2532520294189453, + "logits/rejected": -3.20562481880188, + "logps/chosen": -514.481689453125, + "logps/rejected": -1448.60302734375, + "loss": 0.5433, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.000732421875, + "rewards/margins": 4.525683403015137, + "rewards/rejected": -3.524951219558716, + "step": 717 + }, + { + "epoch": 0.92, + "learning_rate": 1.8492898211051989e-09, + "logits/chosen": -3.289468288421631, + "logits/rejected": -3.1262879371643066, + "logps/chosen": -499.6634521484375, + "logps/rejected": -2735.09765625, + "loss": 0.5272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9019180536270142, + "rewards/margins": 6.617469787597656, + "rewards/rejected": -5.715551853179932, + "step": 718 + }, + { + "epoch": 0.92, + "learning_rate": 1.7940102999393193e-09, + "logits/chosen": -3.24631404876709, + "logits/rejected": -3.110023260116577, + "logps/chosen": -619.1136474609375, + "logps/rejected": -1516.482666015625, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0365692377090454, + "rewards/margins": 4.723172187805176, + "rewards/rejected": -3.6866025924682617, + "step": 719 + }, + { + "epoch": 0.92, + "learning_rate": 1.7395544861325716e-09, + "logits/chosen": -3.254059076309204, + "logits/rejected": -3.1262636184692383, + "logps/chosen": -512.706298828125, + "logps/rejected": -2562.168212890625, + "loss": 0.5494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7831802368164062, + "rewards/margins": 7.281251907348633, + "rewards/rejected": -6.498071193695068, + "step": 720 + }, + { + "epoch": 0.92, + "learning_rate": 1.6859233101845506e-09, + "logits/chosen": -3.2435967922210693, + "logits/rejected": -3.1704678535461426, + "logps/chosen": -565.8380126953125, + "logps/rejected": -1203.8248291015625, + "loss": 0.5841, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1061737537384033, + "rewards/margins": 4.084185600280762, + "rewards/rejected": -2.9780120849609375, + "step": 721 + }, + { + "epoch": 0.92, + "learning_rate": 1.6331176885040876e-09, + "logits/chosen": -3.2461063861846924, + "logits/rejected": -3.166045665740967, + "logps/chosen": -561.8156127929688, + "logps/rejected": -1093.817626953125, + "loss": 0.5768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8172973394393921, + "rewards/margins": 3.4744386672973633, + "rewards/rejected": -2.6571412086486816, + "step": 722 + }, + { + "epoch": 0.92, + "learning_rate": 1.5811385233935548e-09, + "logits/chosen": -3.267489194869995, + "logits/rejected": -3.157691240310669, + "logps/chosen": -514.5203857421875, + "logps/rejected": -951.96923828125, + "loss": 0.5725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7985168695449829, + "rewards/margins": 3.226226806640625, + "rewards/rejected": -2.4277100563049316, + "step": 723 + }, + { + "epoch": 0.92, + "learning_rate": 1.5299867030334812e-09, + "logits/chosen": -3.2799465656280518, + "logits/rejected": -3.1498923301696777, + "logps/chosen": -473.46832275390625, + "logps/rejected": -1191.9296875, + "loss": 0.5437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6945816278457642, + "rewards/margins": 3.6799516677856445, + "rewards/rejected": -2.98537015914917, + "step": 724 + }, + { + "epoch": 0.92, + "learning_rate": 1.4796631014673322e-09, + "logits/chosen": -3.2373907566070557, + "logits/rejected": -3.1999316215515137, + "logps/chosen": -472.339599609375, + "logps/rejected": -1384.365234375, + "loss": 0.5211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.97528076171875, + "rewards/margins": 5.328009128570557, + "rewards/rejected": -4.352728366851807, + "step": 725 + }, + { + "epoch": 0.93, + "learning_rate": 1.4301685785866214e-09, + "logits/chosen": -3.263711929321289, + "logits/rejected": -3.1181671619415283, + "logps/chosen": -552.4398803710938, + "logps/rejected": -1448.517578125, + "loss": 0.5567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1125717163085938, + "rewards/margins": 4.619078159332275, + "rewards/rejected": -3.5065064430236816, + "step": 726 + }, + { + "epoch": 0.93, + "learning_rate": 1.3815039801161721e-09, + "logits/chosen": -3.2887091636657715, + "logits/rejected": -3.17236065864563, + "logps/chosen": -497.21173095703125, + "logps/rejected": -907.3350830078125, + "loss": 0.5333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9876160025596619, + "rewards/margins": 3.441509962081909, + "rewards/rejected": -2.4538941383361816, + "step": 727 + }, + { + "epoch": 0.93, + "learning_rate": 1.3336701375997127e-09, + "logits/chosen": -3.2995100021362305, + "logits/rejected": -3.1180405616760254, + "logps/chosen": -531.035888671875, + "logps/rejected": -1258.103271484375, + "loss": 0.5489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7954940795898438, + "rewards/margins": 3.787816047668457, + "rewards/rejected": -2.9923219680786133, + "step": 728 + }, + { + "epoch": 0.93, + "learning_rate": 1.2866678683856268e-09, + "logits/chosen": -3.3145952224731445, + "logits/rejected": -3.211009979248047, + "logps/chosen": -475.5011901855469, + "logps/rejected": -1563.0875244140625, + "loss": 0.5449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8593215942382812, + "rewards/margins": 5.111421585083008, + "rewards/rejected": -4.252099990844727, + "step": 729 + }, + { + "epoch": 0.93, + "learning_rate": 1.240497975613014e-09, + "logits/chosen": -3.295989751815796, + "logits/rejected": -3.192444324493408, + "logps/chosen": -493.3877258300781, + "logps/rejected": -1251.3480224609375, + "loss": 0.5517, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1190109252929688, + "rewards/margins": 4.377100944519043, + "rewards/rejected": -3.258090019226074, + "step": 730 + }, + { + "epoch": 0.93, + "learning_rate": 1.1951612481979567e-09, + "logits/chosen": -3.268972158432007, + "logits/rejected": -3.1217355728149414, + "logps/chosen": -508.59912109375, + "logps/rejected": -1163.62109375, + "loss": 0.5427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9388839602470398, + "rewards/margins": 4.195390701293945, + "rewards/rejected": -3.2565064430236816, + "step": 731 + }, + { + "epoch": 0.93, + "learning_rate": 1.1506584608200365e-09, + "logits/chosen": -3.2957544326782227, + "logits/rejected": -3.2530646324157715, + "logps/chosen": -481.36907958984375, + "logps/rejected": -1130.989013671875, + "loss": 0.5033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7396163940429688, + "rewards/margins": 3.6421189308166504, + "rewards/rejected": -2.9025025367736816, + "step": 732 + }, + { + "epoch": 0.93, + "learning_rate": 1.1069903739091002e-09, + "logits/chosen": -3.274744987487793, + "logits/rejected": -3.2629542350769043, + "logps/chosen": -546.17138671875, + "logps/rejected": -2067.436279296875, + "loss": 0.5445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.565899670124054, + "rewards/margins": 5.553228855133057, + "rewards/rejected": -4.987329483032227, + "step": 733 + }, + { + "epoch": 0.94, + "learning_rate": 1.064157733632276e-09, + "logits/chosen": -3.309403896331787, + "logits/rejected": -3.2253975868225098, + "logps/chosen": -525.2713012695312, + "logps/rejected": -1152.5360107421875, + "loss": 0.5472, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.229583740234375, + "rewards/margins": 4.208932876586914, + "rewards/rejected": -2.97934889793396, + "step": 734 + }, + { + "epoch": 0.94, + "learning_rate": 1.0221612718812e-09, + "logits/chosen": -3.2771530151367188, + "logits/rejected": -3.185153007507324, + "logps/chosen": -468.62646484375, + "logps/rejected": -1474.5665283203125, + "loss": 0.5215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7786300778388977, + "rewards/margins": 4.650944709777832, + "rewards/rejected": -3.872314453125, + "step": 735 + }, + { + "epoch": 0.94, + "learning_rate": 9.81001706259532e-10, + "logits/chosen": -3.2356982231140137, + "logits/rejected": -3.1471900939941406, + "logps/chosen": -448.3601989746094, + "logps/rejected": -1753.854736328125, + "loss": 0.5257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9384216070175171, + "rewards/margins": 5.46970272064209, + "rewards/rejected": -4.531280517578125, + "step": 736 + }, + { + "epoch": 0.94, + "learning_rate": 9.40679740070688e-10, + "logits/chosen": -3.264688014984131, + "logits/rejected": -3.04946231842041, + "logps/chosen": -482.28570556640625, + "logps/rejected": -2090.5517578125, + "loss": 0.5422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8423675298690796, + "rewards/margins": 5.390939712524414, + "rewards/rejected": -4.548571586608887, + "step": 737 + }, + { + "epoch": 0.94, + "learning_rate": 9.011960623058201e-10, + "logits/chosen": -3.2361435890197754, + "logits/rejected": -3.0761566162109375, + "logps/chosen": -546.13671875, + "logps/rejected": -1718.40234375, + "loss": 0.5396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9372085332870483, + "rewards/margins": 5.079616069793701, + "rewards/rejected": -4.142407417297363, + "step": 738 + }, + { + "epoch": 0.94, + "learning_rate": 8.625513476320289e-10, + "logits/chosen": -3.303201675415039, + "logits/rejected": -3.2213454246520996, + "logps/chosen": -461.65740966796875, + "logps/rejected": -1129.300048828125, + "loss": 0.5108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9498672485351562, + "rewards/margins": 3.9652390480041504, + "rewards/rejected": -3.015371799468994, + "step": 739 + }, + { + "epoch": 0.94, + "learning_rate": 8.247462563808816e-10, + "logits/chosen": -3.2988877296447754, + "logits/rejected": -3.1688575744628906, + "logps/chosen": -522.9437255859375, + "logps/rejected": -1221.2314453125, + "loss": 0.5498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9621948003768921, + "rewards/margins": 4.112787246704102, + "rewards/rejected": -3.150592088699341, + "step": 740 + }, + { + "epoch": 0.94, + "learning_rate": 7.877814345370715e-10, + "logits/chosen": -3.2455968856811523, + "logits/rejected": -3.11675763130188, + "logps/chosen": -462.580322265625, + "logps/rejected": -980.9552001953125, + "loss": 0.5573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8963211178779602, + "rewards/margins": 3.0430099964141846, + "rewards/rejected": -2.146688938140869, + "step": 741 + }, + { + "epoch": 0.95, + "learning_rate": 7.516575137274162e-10, + "logits/chosen": -3.3594489097595215, + "logits/rejected": -3.198986530303955, + "logps/chosen": -484.9338073730469, + "logps/rejected": -702.7652587890625, + "loss": 0.5492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.89361572265625, + "rewards/margins": 2.658761501312256, + "rewards/rejected": -1.7651458978652954, + "step": 742 + }, + { + "epoch": 0.95, + "learning_rate": 7.163751112100435e-10, + "logits/chosen": -3.3025779724121094, + "logits/rejected": -3.253718852996826, + "logps/chosen": -478.9678955078125, + "logps/rejected": -1814.513427734375, + "loss": 0.5316, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.024775743484497, + "rewards/margins": 5.817750453948975, + "rewards/rejected": -4.792974948883057, + "step": 743 + }, + { + "epoch": 0.95, + "learning_rate": 6.819348298638839e-10, + "logits/chosen": -3.2417845726013184, + "logits/rejected": -3.1138381958007812, + "logps/chosen": -522.3353271484375, + "logps/rejected": -1353.1517333984375, + "loss": 0.5503, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.037846326828003, + "rewards/margins": 4.615690231323242, + "rewards/rejected": -3.5778441429138184, + "step": 744 + }, + { + "epoch": 0.95, + "learning_rate": 6.483372581783054e-10, + "logits/chosen": -3.302061080932617, + "logits/rejected": -3.130181312561035, + "logps/chosen": -547.595703125, + "logps/rejected": -3761.224609375, + "loss": 0.558, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.357086181640625, + "rewards/margins": 6.74454927444458, + "rewards/rejected": -5.387463569641113, + "step": 745 + }, + { + "epoch": 0.95, + "learning_rate": 6.15582970243117e-10, + "logits/chosen": -3.269927978515625, + "logits/rejected": -3.0831432342529297, + "logps/chosen": -486.65423583984375, + "logps/rejected": -1941.484375, + "loss": 0.5342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7791641354560852, + "rewards/margins": 5.4162187576293945, + "rewards/rejected": -4.637054443359375, + "step": 746 + }, + { + "epoch": 0.95, + "learning_rate": 5.83672525738721e-10, + "logits/chosen": -3.2268295288085938, + "logits/rejected": -3.2279052734375, + "logps/chosen": -507.7053527832031, + "logps/rejected": -1011.1295166015625, + "loss": 0.5651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7724670171737671, + "rewards/margins": 3.8860108852386475, + "rewards/rejected": -3.113543748855591, + "step": 747 + }, + { + "epoch": 0.95, + "learning_rate": 5.526064699265753e-10, + "logits/chosen": -3.2647318840026855, + "logits/rejected": -3.1151719093322754, + "logps/chosen": -503.17071533203125, + "logps/rejected": -1111.170654296875, + "loss": 0.5569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7759063839912415, + "rewards/margins": 3.50449538230896, + "rewards/rejected": -2.7285890579223633, + "step": 748 + }, + { + "epoch": 0.95, + "learning_rate": 5.223853336398632e-10, + "logits/chosen": -3.327986717224121, + "logits/rejected": -3.1283998489379883, + "logps/chosen": -511.1422424316406, + "logps/rejected": -3534.906982421875, + "loss": 0.5212, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0115936994552612, + "rewards/margins": 8.068313598632812, + "rewards/rejected": -7.05672025680542, + "step": 749 + }, + { + "epoch": 0.96, + "learning_rate": 4.930096332744105e-10, + "logits/chosen": -3.233983039855957, + "logits/rejected": -3.183640480041504, + "logps/chosen": -524.10009765625, + "logps/rejected": -1540.1561279296875, + "loss": 0.5307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8865082263946533, + "rewards/margins": 4.978360176086426, + "rewards/rejected": -4.091851711273193, + "step": 750 + }, + { + "epoch": 0.96, + "learning_rate": 4.644798707798936e-10, + "logits/chosen": -3.2713773250579834, + "logits/rejected": -3.1564221382141113, + "logps/chosen": -526.038330078125, + "logps/rejected": -2224.3017578125, + "loss": 0.5332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0599274635314941, + "rewards/margins": 6.600186347961426, + "rewards/rejected": -5.540258884429932, + "step": 751 + }, + { + "epoch": 0.96, + "learning_rate": 4.3679653365124024e-10, + "logits/chosen": -3.251991033554077, + "logits/rejected": -3.1761794090270996, + "logps/chosen": -483.2640686035156, + "logps/rejected": -1228.8470458984375, + "loss": 0.4663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.966564953327179, + "rewards/margins": 4.291738986968994, + "rewards/rejected": -3.32517409324646, + "step": 752 + }, + { + "epoch": 0.96, + "learning_rate": 4.0996009492029195e-10, + "logits/chosen": -3.296309471130371, + "logits/rejected": -3.0643913745880127, + "logps/chosen": -525.4542846679688, + "logps/rejected": -1463.114501953125, + "loss": 0.5457, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0480788946151733, + "rewards/margins": 4.5547685623168945, + "rewards/rejected": -3.5066895484924316, + "step": 753 + }, + { + "epoch": 0.96, + "learning_rate": 3.8397101314774914e-10, + "logits/chosen": -3.2709736824035645, + "logits/rejected": -3.1847128868103027, + "logps/chosen": -558.268798828125, + "logps/rejected": -1217.878662109375, + "loss": 0.5857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9752410650253296, + "rewards/margins": 3.9767792224884033, + "rewards/rejected": -3.0015382766723633, + "step": 754 + }, + { + "epoch": 0.96, + "learning_rate": 3.588297324153056e-10, + "logits/chosen": -3.2425856590270996, + "logits/rejected": -3.1491990089416504, + "logps/chosen": -441.78338623046875, + "logps/rejected": -2243.21923828125, + "loss": 0.5326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9410660266876221, + "rewards/margins": 5.7710466384887695, + "rewards/rejected": -4.829980850219727, + "step": 755 + }, + { + "epoch": 0.96, + "learning_rate": 3.345366823180928e-10, + "logits/chosen": -3.320582389831543, + "logits/rejected": -3.1262741088867188, + "logps/chosen": -509.2082214355469, + "logps/rejected": -949.1221313476562, + "loss": 0.5711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8413604497909546, + "rewards/margins": 3.201223850250244, + "rewards/rejected": -2.35986328125, + "step": 756 + }, + { + "epoch": 0.96, + "learning_rate": 3.110922779573033e-10, + "logits/chosen": -3.1978025436401367, + "logits/rejected": -3.19927978515625, + "logps/chosen": -524.9244995117188, + "logps/rejected": -1983.62744140625, + "loss": 0.5528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7497757077217102, + "rewards/margins": 5.794374465942383, + "rewards/rejected": -5.044598579406738, + "step": 757 + }, + { + "epoch": 0.97, + "learning_rate": 2.8849691993311776e-10, + "logits/chosen": -3.234644651412964, + "logits/rejected": -3.108428478240967, + "logps/chosen": -495.3336181640625, + "logps/rejected": -1256.174072265625, + "loss": 0.5478, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0741028785705566, + "rewards/margins": 4.192291259765625, + "rewards/rejected": -3.1181883811950684, + "step": 758 + }, + { + "epoch": 0.97, + "learning_rate": 2.667509943378721e-10, + "logits/chosen": -3.3038992881774902, + "logits/rejected": -3.0780563354492188, + "logps/chosen": -545.6382446289062, + "logps/rejected": -1205.322265625, + "loss": 0.555, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0170745849609375, + "rewards/margins": 3.560476779937744, + "rewards/rejected": -2.5434021949768066, + "step": 759 + }, + { + "epoch": 0.97, + "learning_rate": 2.4585487274942915e-10, + "logits/chosen": -3.2975525856018066, + "logits/rejected": -3.1621127128601074, + "logps/chosen": -455.1292724609375, + "logps/rejected": -866.5518798828125, + "loss": 0.5125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9709945917129517, + "rewards/margins": 3.0583696365356445, + "rewards/rejected": -2.0873749256134033, + "step": 760 + }, + { + "epoch": 0.97, + "learning_rate": 2.2580891222485632e-10, + "logits/chosen": -3.3347487449645996, + "logits/rejected": -3.167306423187256, + "logps/chosen": -504.80615234375, + "logps/rejected": -1370.097412109375, + "loss": 0.5519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8507126569747925, + "rewards/margins": 4.239329814910889, + "rewards/rejected": -3.3886170387268066, + "step": 761 + }, + { + "epoch": 0.97, + "learning_rate": 2.0661345529430774e-10, + "logits/chosen": -3.2598676681518555, + "logits/rejected": -3.1624250411987305, + "logps/chosen": -486.27880859375, + "logps/rejected": -1399.5693359375, + "loss": 0.5384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9907044172286987, + "rewards/margins": 5.2539215087890625, + "rewards/rejected": -4.263216972351074, + "step": 762 + }, + { + "epoch": 0.97, + "learning_rate": 1.8826882995517934e-10, + "logits/chosen": -3.3174519538879395, + "logits/rejected": -3.172312021255493, + "logps/chosen": -498.462158203125, + "logps/rejected": -986.9016723632812, + "loss": 0.548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9892319440841675, + "rewards/margins": 3.2031846046447754, + "rewards/rejected": -2.2139525413513184, + "step": 763 + }, + { + "epoch": 0.97, + "learning_rate": 1.7077534966650763e-10, + "logits/chosen": -3.235032320022583, + "logits/rejected": -3.166287660598755, + "logps/chosen": -567.1036376953125, + "logps/rejected": -2317.10302734375, + "loss": 0.5719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.878826916217804, + "rewards/margins": 7.503973960876465, + "rewards/rejected": -6.625146865844727, + "step": 764 + }, + { + "epoch": 0.98, + "learning_rate": 1.541333133436018e-10, + "logits/chosen": -3.2640421390533447, + "logits/rejected": -3.172978162765503, + "logps/chosen": -492.5285949707031, + "logps/rejected": -1921.9427490234375, + "loss": 0.5154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9217026233673096, + "rewards/margins": 5.44651985168457, + "rewards/rejected": -4.524816989898682, + "step": 765 + }, + { + "epoch": 0.98, + "learning_rate": 1.3834300535294218e-10, + "logits/chosen": -3.2921254634857178, + "logits/rejected": -3.153308868408203, + "logps/chosen": -473.56640625, + "logps/rejected": -871.0614013671875, + "loss": 0.5612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.802716076374054, + "rewards/margins": 3.031790256500244, + "rewards/rejected": -2.229074001312256, + "step": 766 + }, + { + "epoch": 0.98, + "learning_rate": 1.2340469550733423e-10, + "logits/chosen": -3.1937904357910156, + "logits/rejected": -3.1740593910217285, + "logps/chosen": -517.7933959960938, + "logps/rejected": -1071.4599609375, + "loss": 0.5659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9914611577987671, + "rewards/margins": 3.231350898742676, + "rewards/rejected": -2.239889621734619, + "step": 767 + }, + { + "epoch": 0.98, + "learning_rate": 1.0931863906127325e-10, + "logits/chosen": -3.2791872024536133, + "logits/rejected": -3.1646833419799805, + "logps/chosen": -515.1959838867188, + "logps/rejected": -878.6861572265625, + "loss": 0.5621, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0534499883651733, + "rewards/margins": 3.0210037231445312, + "rewards/rejected": -1.9675538539886475, + "step": 768 + }, + { + "epoch": 0.98, + "learning_rate": 9.608507670659238e-11, + "logits/chosen": -3.2905702590942383, + "logits/rejected": -3.146240234375, + "logps/chosen": -503.6970520019531, + "logps/rejected": -1489.6058349609375, + "loss": 0.5322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9238037467002869, + "rewards/margins": 4.863977432250977, + "rewards/rejected": -3.940173387527466, + "step": 769 + }, + { + "epoch": 0.98, + "learning_rate": 8.370423456837139e-11, + "logits/chosen": -3.264346122741699, + "logits/rejected": -3.0709304809570312, + "logps/chosen": -538.8037109375, + "logps/rejected": -1967.578857421875, + "loss": 0.548, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0610748529434204, + "rewards/margins": 5.7669219970703125, + "rewards/rejected": -4.705847263336182, + "step": 770 + }, + { + "epoch": 0.98, + "learning_rate": 7.217632420102871e-11, + "logits/chosen": -3.1385388374328613, + "logits/rejected": -3.1226179599761963, + "logps/chosen": -553.2701416015625, + "logps/rejected": -1099.1259765625, + "loss": 0.5615, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2449921369552612, + "rewards/margins": 4.362298488616943, + "rewards/rejected": -3.1173064708709717, + "step": 771 + }, + { + "epoch": 0.98, + "learning_rate": 6.150154258476314e-11, + "logits/chosen": -3.315349578857422, + "logits/rejected": -3.126819610595703, + "logps/chosen": -460.9801940917969, + "logps/rejected": -2316.98974609375, + "loss": 0.5106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8758208751678467, + "rewards/margins": 6.4495086669921875, + "rewards/rejected": -5.573687553405762, + "step": 772 + }, + { + "epoch": 0.99, + "learning_rate": 5.168007212212333e-11, + "logits/chosen": -3.263363838195801, + "logits/rejected": -3.1710357666015625, + "logps/chosen": -469.7742004394531, + "logps/rejected": -1230.8017578125, + "loss": 0.5316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.789532482624054, + "rewards/margins": 3.7184174060821533, + "rewards/rejected": -2.928884983062744, + "step": 773 + }, + { + "epoch": 0.99, + "learning_rate": 4.271208063494902e-11, + "logits/chosen": -3.2615139484405518, + "logits/rejected": -3.2333085536956787, + "logps/chosen": -523.1578369140625, + "logps/rejected": -986.5601806640625, + "loss": 0.5554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8735840320587158, + "rewards/margins": 3.788745164871216, + "rewards/rejected": -2.9151611328125, + "step": 774 + }, + { + "epoch": 0.99, + "learning_rate": 3.459772136146788e-11, + "logits/chosen": -3.2611608505249023, + "logits/rejected": -3.0809338092803955, + "logps/chosen": -529.75390625, + "logps/rejected": -945.0029296875, + "loss": 0.5807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8696533441543579, + "rewards/margins": 3.3007631301879883, + "rewards/rejected": -2.431109666824341, + "step": 775 + }, + { + "epoch": 0.99, + "learning_rate": 2.733713295369755e-11, + "logits/chosen": -3.2572054862976074, + "logits/rejected": -3.0397162437438965, + "logps/chosen": -529.0992431640625, + "logps/rejected": -1271.9498291015625, + "loss": 0.5397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9938293695449829, + "rewards/margins": 3.702061653137207, + "rewards/rejected": -2.7082321643829346, + "step": 776 + }, + { + "epoch": 0.99, + "learning_rate": 2.093043947505868e-11, + "logits/chosen": -3.274184465408325, + "logits/rejected": -3.2816665172576904, + "logps/chosen": -486.77813720703125, + "logps/rejected": -4028.158203125, + "loss": 0.5014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.04973304271698, + "rewards/margins": 3.7855546474456787, + "rewards/rejected": -2.735821485519409, + "step": 777 + }, + { + "epoch": 0.99, + "learning_rate": 1.53777503982655e-11, + "logits/chosen": -3.225975751876831, + "logits/rejected": -3.097996234893799, + "logps/chosen": -510.79461669921875, + "logps/rejected": -2269.97705078125, + "loss": 0.5168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7882858514785767, + "rewards/margins": 6.481346130371094, + "rewards/rejected": -5.693060398101807, + "step": 778 + }, + { + "epoch": 0.99, + "learning_rate": 1.0679160603449533e-11, + "logits/chosen": -3.311831474304199, + "logits/rejected": -3.1999335289001465, + "logps/chosen": -562.6742553710938, + "logps/rejected": -1491.100830078125, + "loss": 0.5718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9108032584190369, + "rewards/margins": 5.156842231750488, + "rewards/rejected": -4.246038913726807, + "step": 779 + }, + { + "epoch": 0.99, + "learning_rate": 6.834750376549792e-12, + "logits/chosen": -3.240342617034912, + "logits/rejected": -3.070404529571533, + "logps/chosen": -489.52142333984375, + "logps/rejected": -1789.8131103515625, + "loss": 0.5635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8269363641738892, + "rewards/margins": 5.236854553222656, + "rewards/rejected": -4.409918308258057, + "step": 780 + }, + { + "epoch": 1.0, + "learning_rate": 3.844585407936085e-12, + "logits/chosen": -3.2672719955444336, + "logits/rejected": -3.130955696105957, + "logps/chosen": -490.0404968261719, + "logps/rejected": -1418.796630859375, + "loss": 0.5244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9284149408340454, + "rewards/margins": 4.788723945617676, + "rewards/rejected": -3.86030912399292, + "step": 781 + }, + { + "epoch": 1.0, + "learning_rate": 1.7087167912710475e-12, + "logits/chosen": -3.2417654991149902, + "logits/rejected": -3.078120231628418, + "logps/chosen": -509.76361083984375, + "logps/rejected": -2094.827880859375, + "loss": 0.5863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0471512079238892, + "rewards/margins": 6.479811668395996, + "rewards/rejected": -5.432660102844238, + "step": 782 + }, + { + "epoch": 1.0, + "learning_rate": 4.271810226552652e-13, + "logits/chosen": -3.2937939167022705, + "logits/rejected": -3.180586576461792, + "logps/chosen": -515.1351318359375, + "logps/rejected": -1228.718017578125, + "loss": 0.5742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1198532581329346, + "rewards/margins": 4.454627990722656, + "rewards/rejected": -3.334774971008301, + "step": 783 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -3.2594666481018066, + "logits/rejected": -3.107339382171631, + "logps/chosen": -502.82763671875, + "logps/rejected": -1773.623046875, + "loss": 0.5141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8907226324081421, + "rewards/margins": 4.794952392578125, + "rewards/rejected": -3.9042296409606934, + "step": 784 + }, + { + "epoch": 1.0, + "step": 784, + "total_flos": 0.0, + "train_loss": 0.6116742905685488, + "train_runtime": 1620.4445, + "train_samples_per_second": 3.872, + "train_steps_per_second": 0.484 + } + ], + "logging_steps": 1.0, + "max_steps": 784, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..abd56f6bc1b538198ad4e8c984a0bbb9752b1c66 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "up_proj", + "down_proj", + "q_proj", + "v_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8585ae8aa77c3b0d230337461a840830907442d9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e304730fa5226d748648e0bdab9163e415d731b59893e0d530002e65821671d +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..05b2b06af126bb0bef072716ecf1ff250b539520 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/trainer_state.json @@ -0,0 +1,21996 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1569, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.083333333333333e-09, + "logits/chosen": -3.15675687789917, + "logits/rejected": -3.0999014377593994, + "logps/chosen": -309.47918701171875, + "logps/rejected": -694.205078125, + "loss": 0.9909, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01025543175637722, + "rewards/margins": -0.008601382374763489, + "rewards/rejected": 0.01885681226849556, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.166666666666666e-09, + "logits/chosen": -3.128502368927002, + "logits/rejected": -2.974919319152832, + "logps/chosen": -236.93084716796875, + "logps/rejected": -1584.2825927734375, + "loss": 0.9587, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.006600952707231045, + "rewards/margins": 0.02595519833266735, + "rewards/rejected": -0.03255615383386612, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6.25e-09, + "logits/chosen": -3.1808934211730957, + "logits/rejected": -2.9976394176483154, + "logps/chosen": -283.38983154296875, + "logps/rejected": -1073.7008056640625, + "loss": 0.9607, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02646484412252903, + "rewards/margins": 0.0572814904153347, + "rewards/rejected": -0.03081665001809597, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -3.232327461242676, + "logits/rejected": -3.1624488830566406, + "logps/chosen": -258.3663635253906, + "logps/rejected": -739.406494140625, + "loss": 0.9735, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.02540740929543972, + "rewards/margins": -0.00801849365234375, + "rewards/rejected": 0.03342590481042862, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.0416666666666667e-08, + "logits/chosen": -3.290365695953369, + "logits/rejected": -3.0821852684020996, + "logps/chosen": -249.19638061523438, + "logps/rejected": -1055.1370849609375, + "loss": 0.9544, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01109314151108265, + "rewards/margins": 0.0650482177734375, + "rewards/rejected": -0.053955078125, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-08, + "logits/chosen": -3.2094433307647705, + "logits/rejected": -3.0841879844665527, + "logps/chosen": -297.5134582519531, + "logps/rejected": -649.134765625, + "loss": 1.0002, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.06611328572034836, + "rewards/margins": 0.031243901699781418, + "rewards/rejected": 0.03486938402056694, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.4583333333333333e-08, + "logits/chosen": -3.168616771697998, + "logits/rejected": -3.011223793029785, + "logps/chosen": -279.7734375, + "logps/rejected": -1396.24169921875, + "loss": 0.9911, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01326599158346653, + "rewards/margins": 0.07658997178077698, + "rewards/rejected": -0.063323974609375, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -3.248640537261963, + "logits/rejected": -2.885458469390869, + "logps/chosen": -265.33184814453125, + "logps/rejected": -1135.225341796875, + "loss": 0.9961, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.017800141125917435, + "rewards/margins": 0.018627166748046875, + "rewards/rejected": -0.03642730787396431, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 1.875e-08, + "logits/chosen": -3.190768241882324, + "logits/rejected": -2.9129083156585693, + "logps/chosen": -283.9991455078125, + "logps/rejected": -1004.7301025390625, + "loss": 0.9701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02012481726706028, + "rewards/margins": 0.07172393798828125, + "rewards/rejected": -0.05159912258386612, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333335e-08, + "logits/chosen": -3.2195353507995605, + "logits/rejected": -3.1214981079101562, + "logps/chosen": -240.51512145996094, + "logps/rejected": -625.6729736328125, + "loss": 0.9795, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.005255889147520065, + "rewards/margins": -0.062123872339725494, + "rewards/rejected": 0.05686798319220543, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 2.2916666666666663e-08, + "logits/chosen": -3.2376036643981934, + "logits/rejected": -3.030986785888672, + "logps/chosen": -269.2669677734375, + "logps/rejected": -460.15130615234375, + "loss": 0.9594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05849609524011612, + "rewards/margins": 0.08006744831800461, + "rewards/rejected": -0.02157134935259819, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-08, + "logits/chosen": -3.163285255432129, + "logits/rejected": -3.0087051391601562, + "logps/chosen": -256.51898193359375, + "logps/rejected": -644.1416625976562, + "loss": 0.9595, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02245178259909153, + "rewards/margins": 0.01277618482708931, + "rewards/rejected": -0.03522796928882599, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 2.708333333333333e-08, + "logits/chosen": -3.1910016536712646, + "logits/rejected": -3.0419483184814453, + "logps/chosen": -251.08712768554688, + "logps/rejected": -462.1988830566406, + "loss": 0.9503, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.08537979423999786, + "rewards/margins": -0.07179488986730576, + "rewards/rejected": -0.01358489878475666, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 2.9166666666666666e-08, + "logits/chosen": -3.1872494220733643, + "logits/rejected": -3.0448923110961914, + "logps/chosen": -267.3841552734375, + "logps/rejected": -1176.688232421875, + "loss": 0.9819, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.00503387488424778, + "rewards/margins": -0.09382171183824539, + "rewards/rejected": 0.08878783881664276, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 3.125e-08, + "logits/chosen": -3.2133431434631348, + "logits/rejected": -3.0041370391845703, + "logps/chosen": -257.7562255859375, + "logps/rejected": -672.7850341796875, + "loss": 0.9611, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.024643708020448685, + "rewards/margins": 0.013100430369377136, + "rewards/rejected": 0.011543277651071548, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -3.2109017372131348, + "logits/rejected": -3.1268811225891113, + "logps/chosen": -254.1451416015625, + "logps/rejected": -466.083251953125, + "loss": 0.9506, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.052446749061346054, + "rewards/margins": -0.041707612574100494, + "rewards/rejected": -0.010739135555922985, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 3.541666666666667e-08, + "logits/chosen": -3.1756319999694824, + "logits/rejected": -3.13623309135437, + "logps/chosen": -272.3403625488281, + "logps/rejected": -681.4178466796875, + "loss": 0.9538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.049539946019649506, + "rewards/margins": 0.11320266127586365, + "rewards/rejected": -0.06366272270679474, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 3.75e-08, + "logits/chosen": -3.1797590255737305, + "logits/rejected": -3.093275308609009, + "logps/chosen": -268.911865234375, + "logps/rejected": -293.7593994140625, + "loss": 0.98, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012300110422074795, + "rewards/margins": 0.02965850755572319, + "rewards/rejected": -0.01735839806497097, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 3.958333333333333e-08, + "logits/chosen": -3.1590588092803955, + "logits/rejected": -3.0327775478363037, + "logps/chosen": -277.9453125, + "logps/rejected": -669.50732421875, + "loss": 0.9322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.029862213879823685, + "rewards/margins": 0.16977769136428833, + "rewards/rejected": -0.13991546630859375, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -3.174617052078247, + "logits/rejected": -3.096684217453003, + "logps/chosen": -263.70355224609375, + "logps/rejected": -632.77587890625, + "loss": 0.937, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03957061842083931, + "rewards/margins": -0.03113861009478569, + "rewards/rejected": -0.008432007394731045, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.375e-08, + "logits/chosen": -3.1613759994506836, + "logits/rejected": -3.0204203128814697, + "logps/chosen": -304.761962890625, + "logps/rejected": -645.5592041015625, + "loss": 0.9694, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.007849120534956455, + "rewards/margins": 0.0498504638671875, + "rewards/rejected": -0.05769958719611168, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 4.583333333333333e-08, + "logits/chosen": -3.1953651905059814, + "logits/rejected": -3.146726608276367, + "logps/chosen": -291.3033447265625, + "logps/rejected": -810.91455078125, + "loss": 1.018, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02947540394961834, + "rewards/margins": -0.02136078104376793, + "rewards/rejected": -0.008114623837172985, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 4.791666666666667e-08, + "logits/chosen": -3.1508798599243164, + "logits/rejected": -3.0901520252227783, + "logps/chosen": -290.57794189453125, + "logps/rejected": -655.2907104492188, + "loss": 0.956, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0005699158064089715, + "rewards/margins": 0.04164276272058487, + "rewards/rejected": -0.041072845458984375, + "step": 23 + }, + { + "epoch": 0.02, + "learning_rate": 5e-08, + "logits/chosen": -3.178182601928711, + "logits/rejected": -3.100224494934082, + "logps/chosen": -272.58416748046875, + "logps/rejected": -507.9247741699219, + "loss": 0.9416, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0009750365279614925, + "rewards/margins": 0.03759918361902237, + "rewards/rejected": -0.03662414476275444, + "step": 24 + }, + { + "epoch": 0.02, + "learning_rate": 5.208333333333333e-08, + "logits/chosen": -3.2183632850646973, + "logits/rejected": -3.10914945602417, + "logps/chosen": -232.6282958984375, + "logps/rejected": -685.448974609375, + "loss": 0.9363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009235382080078125, + "rewards/margins": 0.038820646703243256, + "rewards/rejected": -0.02958526648581028, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 5.416666666666666e-08, + "logits/chosen": -3.197901487350464, + "logits/rejected": -3.0540404319763184, + "logps/chosen": -259.0303649902344, + "logps/rejected": -541.87548828125, + "loss": 0.9694, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.036801911890506744, + "rewards/margins": 0.007257841527462006, + "rewards/rejected": 0.02954407036304474, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 5.625e-08, + "logits/chosen": -3.202110767364502, + "logits/rejected": -3.1363651752471924, + "logps/chosen": -280.56597900390625, + "logps/rejected": -644.0758056640625, + "loss": 0.9478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04965820536017418, + "rewards/margins": 0.11301574856042862, + "rewards/rejected": -0.06335754692554474, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -3.1317083835601807, + "logits/rejected": -3.1019740104675293, + "logps/chosen": -295.4027404785156, + "logps/rejected": -620.846435546875, + "loss": 0.9458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00222167931497097, + "rewards/margins": 0.15144196152687073, + "rewards/rejected": -0.15366363525390625, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 6.041666666666666e-08, + "logits/chosen": -3.2248878479003906, + "logits/rejected": -3.1063506603240967, + "logps/chosen": -293.02655029296875, + "logps/rejected": -445.6553955078125, + "loss": 0.948, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.006393431685864925, + "rewards/margins": -4.5775435864925385e-05, + "rewards/rejected": -0.00634765625, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 6.25e-08, + "logits/chosen": -3.127845525741577, + "logits/rejected": -3.1072216033935547, + "logps/chosen": -299.88232421875, + "logps/rejected": -850.8580932617188, + "loss": 0.9914, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.013099669478833675, + "rewards/margins": -0.006398014724254608, + "rewards/rejected": -0.00670165941119194, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 6.458333333333333e-08, + "logits/chosen": -3.256544589996338, + "logits/rejected": -3.1554768085479736, + "logps/chosen": -259.1778564453125, + "logps/rejected": -380.5565490722656, + "loss": 0.9625, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.01781158521771431, + "rewards/margins": -0.02004852332174778, + "rewards/rejected": 0.03786010667681694, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -3.1998462677001953, + "logits/rejected": -3.108309507369995, + "logps/chosen": -239.94508361816406, + "logps/rejected": -337.3056335449219, + "loss": 0.9119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01578521728515625, + "rewards/margins": 0.08693389594554901, + "rewards/rejected": -0.07114867866039276, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 6.875e-08, + "logits/chosen": -3.238176107406616, + "logits/rejected": -3.044678211212158, + "logps/chosen": -247.3179931640625, + "logps/rejected": -404.37554931640625, + "loss": 0.9397, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01424713246524334, + "rewards/margins": 0.09007950127124786, + "rewards/rejected": -0.07583236694335938, + "step": 33 + }, + { + "epoch": 0.02, + "learning_rate": 7.083333333333334e-08, + "logits/chosen": -3.138125419616699, + "logits/rejected": -3.10152006149292, + "logps/chosen": -266.9291076660156, + "logps/rejected": -489.8109130859375, + "loss": 0.8961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016820525750517845, + "rewards/margins": 0.12141036987304688, + "rewards/rejected": -0.10458984225988388, + "step": 34 + }, + { + "epoch": 0.02, + "learning_rate": 7.291666666666666e-08, + "logits/chosen": -3.180886745452881, + "logits/rejected": -3.0697107315063477, + "logps/chosen": -263.63671875, + "logps/rejected": -597.2960205078125, + "loss": 0.9243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03069152683019638, + "rewards/margins": 0.09635772556066513, + "rewards/rejected": -0.06566619873046875, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-08, + "logits/chosen": -3.2311816215515137, + "logits/rejected": -3.0269885063171387, + "logps/chosen": -258.68408203125, + "logps/rejected": -796.1300659179688, + "loss": 0.9109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.048255160450935364, + "rewards/margins": 0.2136024534702301, + "rewards/rejected": -0.16534729301929474, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 7.708333333333333e-08, + "logits/chosen": -3.1655433177948, + "logits/rejected": -3.093165636062622, + "logps/chosen": -279.62689208984375, + "logps/rejected": -397.1683349609375, + "loss": 0.9181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03373108059167862, + "rewards/margins": 0.11083145439624786, + "rewards/rejected": -0.07710037380456924, + "step": 37 + }, + { + "epoch": 0.02, + "learning_rate": 7.916666666666665e-08, + "logits/chosen": -3.0988235473632812, + "logits/rejected": -3.0810234546661377, + "logps/chosen": -309.4184875488281, + "logps/rejected": -500.89947509765625, + "loss": 0.9616, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05614165961742401, + "rewards/margins": -0.02702636457979679, + "rewards/rejected": -0.02911529690027237, + "step": 38 + }, + { + "epoch": 0.02, + "learning_rate": 8.124999999999999e-08, + "logits/chosen": -3.1704368591308594, + "logits/rejected": -3.0762434005737305, + "logps/chosen": -256.2496337890625, + "logps/rejected": -601.1002197265625, + "loss": 0.9161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00970916822552681, + "rewards/margins": 0.09490204602479935, + "rewards/rejected": -0.08519287407398224, + "step": 39 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -3.1534557342529297, + "logits/rejected": -3.1123886108398438, + "logps/chosen": -286.4242858886719, + "logps/rejected": -702.4635620117188, + "loss": 0.9178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03789367526769638, + "rewards/margins": 0.149444580078125, + "rewards/rejected": -0.11155089735984802, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 8.541666666666666e-08, + "logits/chosen": -3.19879150390625, + "logits/rejected": -3.1510233879089355, + "logps/chosen": -270.3856201171875, + "logps/rejected": -368.37384033203125, + "loss": 0.9454, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0851898193359375, + "rewards/margins": 0.03233490139245987, + "rewards/rejected": -0.11752472817897797, + "step": 41 + }, + { + "epoch": 0.03, + "learning_rate": 8.75e-08, + "logits/chosen": -3.1938090324401855, + "logits/rejected": -3.0607926845550537, + "logps/chosen": -257.7550048828125, + "logps/rejected": -307.13897705078125, + "loss": 0.9596, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02059173583984375, + "rewards/margins": 0.00528717041015625, + "rewards/rejected": 0.0153045654296875, + "step": 42 + }, + { + "epoch": 0.03, + "learning_rate": 8.958333333333333e-08, + "logits/chosen": -3.2574238777160645, + "logits/rejected": -3.0696325302124023, + "logps/chosen": -262.75396728515625, + "logps/rejected": -527.8016357421875, + "loss": 0.901, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0238494873046875, + "rewards/margins": 0.08580169826745987, + "rewards/rejected": -0.06195221096277237, + "step": 43 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -3.2280826568603516, + "logits/rejected": -3.075770378112793, + "logps/chosen": -287.8787841796875, + "logps/rejected": -661.4387817382812, + "loss": 0.907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02247467078268528, + "rewards/margins": 0.1797439604997635, + "rewards/rejected": -0.20221863687038422, + "step": 44 + }, + { + "epoch": 0.03, + "learning_rate": 9.375e-08, + "logits/chosen": -3.1849594116210938, + "logits/rejected": -2.9742836952209473, + "logps/chosen": -243.82522583007812, + "logps/rejected": -488.164794921875, + "loss": 0.8992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02074737474322319, + "rewards/margins": 0.08770065009593964, + "rewards/rejected": -0.06695327907800674, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 9.583333333333334e-08, + "logits/chosen": -3.2046022415161133, + "logits/rejected": -3.1072678565979004, + "logps/chosen": -278.1094970703125, + "logps/rejected": -595.6753540039062, + "loss": 0.881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014366153627634048, + "rewards/margins": 0.1332191377878189, + "rewards/rejected": -0.13465575873851776, + "step": 46 + }, + { + "epoch": 0.03, + "learning_rate": 9.791666666666666e-08, + "logits/chosen": -3.20466947555542, + "logits/rejected": -3.1315460205078125, + "logps/chosen": -275.3850402832031, + "logps/rejected": -537.8136596679688, + "loss": 0.8694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040139008313417435, + "rewards/margins": 0.17090988159179688, + "rewards/rejected": -0.13077087700366974, + "step": 47 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -3.170804500579834, + "logits/rejected": -3.069425582885742, + "logps/chosen": -266.2212219238281, + "logps/rejected": -1248.0234375, + "loss": 0.8764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026285553351044655, + "rewards/margins": 0.3719444274902344, + "rewards/rejected": -0.3982299864292145, + "step": 48 + }, + { + "epoch": 0.03, + "learning_rate": 9.999989334501192e-08, + "logits/chosen": -3.251525640487671, + "logits/rejected": -3.0688164234161377, + "logps/chosen": -286.072509765625, + "logps/rejected": -874.1749877929688, + "loss": 0.9228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006790156476199627, + "rewards/margins": 0.16447754204273224, + "rewards/rejected": -0.163798525929451, + "step": 49 + }, + { + "epoch": 0.03, + "learning_rate": 9.999957338050264e-08, + "logits/chosen": -3.216892719268799, + "logits/rejected": -3.0249390602111816, + "logps/chosen": -291.9205627441406, + "logps/rejected": -910.1425170898438, + "loss": 0.8648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039183806627988815, + "rewards/margins": 0.37246933579444885, + "rewards/rejected": -0.33328551054000854, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 9.999904010783724e-08, + "logits/chosen": -3.1530051231384277, + "logits/rejected": -2.942018985748291, + "logps/chosen": -285.23358154296875, + "logps/rejected": -1137.552490234375, + "loss": 0.8761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05385741963982582, + "rewards/margins": 0.22529298067092896, + "rewards/rejected": -0.17143554985523224, + "step": 51 + }, + { + "epoch": 0.03, + "learning_rate": 9.999829352929073e-08, + "logits/chosen": -3.171222686767578, + "logits/rejected": -3.1266322135925293, + "logps/chosen": -286.8262023925781, + "logps/rejected": -656.198486328125, + "loss": 0.8638, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008575432002544403, + "rewards/margins": 0.21119995415210724, + "rewards/rejected": -0.21205750107765198, + "step": 52 + }, + { + "epoch": 0.03, + "learning_rate": 9.999733364804819e-08, + "logits/chosen": -3.157594680786133, + "logits/rejected": -3.040388345718384, + "logps/chosen": -245.60963439941406, + "logps/rejected": -526.1567993164062, + "loss": 0.8598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08552627265453339, + "rewards/margins": 0.27893751859664917, + "rewards/rejected": -0.19341126084327698, + "step": 53 + }, + { + "epoch": 0.03, + "learning_rate": 9.999616046820466e-08, + "logits/chosen": -3.242715358734131, + "logits/rejected": -3.0058937072753906, + "logps/chosen": -298.7218017578125, + "logps/rejected": -427.213134765625, + "loss": 0.8691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024259185418486595, + "rewards/margins": 0.19725799560546875, + "rewards/rejected": -0.1729988157749176, + "step": 54 + }, + { + "epoch": 0.04, + "learning_rate": 9.999477399476514e-08, + "logits/chosen": -3.2014551162719727, + "logits/rejected": -3.077192783355713, + "logps/chosen": -249.8242950439453, + "logps/rejected": -511.47265625, + "loss": 0.8346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03453979641199112, + "rewards/margins": 0.18890228867530823, + "rewards/rejected": -0.1543624997138977, + "step": 55 + }, + { + "epoch": 0.04, + "learning_rate": 9.999317423364464e-08, + "logits/chosen": -3.243114948272705, + "logits/rejected": -3.0470046997070312, + "logps/chosen": -256.978515625, + "logps/rejected": -491.5581970214844, + "loss": 0.8211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08095397800207138, + "rewards/margins": 0.25038450956344604, + "rewards/rejected": -0.16943055391311646, + "step": 56 + }, + { + "epoch": 0.04, + "learning_rate": 9.999136119166802e-08, + "logits/chosen": -3.195483922958374, + "logits/rejected": -3.103095054626465, + "logps/chosen": -252.43234252929688, + "logps/rejected": -760.6536865234375, + "loss": 0.843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07173614203929901, + "rewards/margins": 0.37763214111328125, + "rewards/rejected": -0.30589598417282104, + "step": 57 + }, + { + "epoch": 0.04, + "learning_rate": 9.998933487657011e-08, + "logits/chosen": -3.1580536365509033, + "logits/rejected": -3.054926872253418, + "logps/chosen": -278.14422607421875, + "logps/rejected": -586.13427734375, + "loss": 0.8439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01994018629193306, + "rewards/margins": 0.3010665774345398, + "rewards/rejected": -0.2811264097690582, + "step": 58 + }, + { + "epoch": 0.04, + "learning_rate": 9.998709529699554e-08, + "logits/chosen": -3.2239859104156494, + "logits/rejected": -3.0050911903381348, + "logps/chosen": -259.859619140625, + "logps/rejected": -1288.96630859375, + "loss": 0.7874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.055127717554569244, + "rewards/margins": 0.7325873970985413, + "rewards/rejected": -0.677459716796875, + "step": 59 + }, + { + "epoch": 0.04, + "learning_rate": 9.998464246249884e-08, + "logits/chosen": -3.200378656387329, + "logits/rejected": -3.092231273651123, + "logps/chosen": -267.270263671875, + "logps/rejected": -489.9418640136719, + "loss": 0.8327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06184081733226776, + "rewards/margins": 0.3071487545967102, + "rewards/rejected": -0.24530792236328125, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 9.998197638354427e-08, + "logits/chosen": -3.217846632003784, + "logits/rejected": -2.976282835006714, + "logps/chosen": -264.5123596191406, + "logps/rejected": -807.861328125, + "loss": 0.8124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09280319511890411, + "rewards/margins": 0.4107093811035156, + "rewards/rejected": -0.3179062008857727, + "step": 61 + }, + { + "epoch": 0.04, + "learning_rate": 9.997909707150585e-08, + "logits/chosen": -3.1999001502990723, + "logits/rejected": -3.1553614139556885, + "logps/chosen": -302.76507568359375, + "logps/rejected": -444.4248962402344, + "loss": 0.8058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13294830918312073, + "rewards/margins": 0.27928465604782104, + "rewards/rejected": -0.1463363766670227, + "step": 62 + }, + { + "epoch": 0.04, + "learning_rate": 9.997600453866733e-08, + "logits/chosen": -3.1675267219543457, + "logits/rejected": -3.0548765659332275, + "logps/chosen": -260.4369812011719, + "logps/rejected": -1389.083251953125, + "loss": 0.809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09004974365234375, + "rewards/margins": 0.5372482538223267, + "rewards/rejected": -0.4471984803676605, + "step": 63 + }, + { + "epoch": 0.04, + "learning_rate": 9.997269879822203e-08, + "logits/chosen": -3.190600872039795, + "logits/rejected": -3.0514376163482666, + "logps/chosen": -271.392333984375, + "logps/rejected": -386.91229248046875, + "loss": 0.8311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0086441021412611, + "rewards/margins": 0.14955291152000427, + "rewards/rejected": -0.14090880751609802, + "step": 64 + }, + { + "epoch": 0.04, + "learning_rate": 9.996917986427294e-08, + "logits/chosen": -3.1905181407928467, + "logits/rejected": -3.1377246379852295, + "logps/chosen": -333.11285400390625, + "logps/rejected": -3906.378662109375, + "loss": 0.7668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02035369910299778, + "rewards/margins": 0.5837936401367188, + "rewards/rejected": -0.5634399652481079, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 9.996544775183248e-08, + "logits/chosen": -3.1989526748657227, + "logits/rejected": -3.0619373321533203, + "logps/chosen": -279.095947265625, + "logps/rejected": -1433.3275146484375, + "loss": 0.8251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07820510864257812, + "rewards/margins": 0.7369362115859985, + "rewards/rejected": -0.6587311029434204, + "step": 66 + }, + { + "epoch": 0.04, + "learning_rate": 9.996150247682265e-08, + "logits/chosen": -3.177079677581787, + "logits/rejected": -3.146932601928711, + "logps/chosen": -253.7103271484375, + "logps/rejected": -812.771728515625, + "loss": 0.7607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06761016696691513, + "rewards/margins": 0.5150650143623352, + "rewards/rejected": -0.4474548101425171, + "step": 67 + }, + { + "epoch": 0.04, + "learning_rate": 9.995734405607475e-08, + "logits/chosen": -3.2277464866638184, + "logits/rejected": -3.1359972953796387, + "logps/chosen": -271.80157470703125, + "logps/rejected": -386.99029541015625, + "loss": 0.8008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0666656494140625, + "rewards/margins": 0.3230148255825043, + "rewards/rejected": -0.2563491761684418, + "step": 68 + }, + { + "epoch": 0.04, + "learning_rate": 9.995297250732942e-08, + "logits/chosen": -3.1492087841033936, + "logits/rejected": -3.093930721282959, + "logps/chosen": -270.01116943359375, + "logps/rejected": -650.1737060546875, + "loss": 0.7603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09953460842370987, + "rewards/margins": 0.5126999020576477, + "rewards/rejected": -0.41316527128219604, + "step": 69 + }, + { + "epoch": 0.04, + "learning_rate": 9.994838784923659e-08, + "logits/chosen": -3.2127904891967773, + "logits/rejected": -3.0451226234436035, + "logps/chosen": -289.5193176269531, + "logps/rejected": -498.4441223144531, + "loss": 0.7736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0340576171875, + "rewards/margins": 0.30895233154296875, + "rewards/rejected": -0.27489471435546875, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 9.99435901013553e-08, + "logits/chosen": -3.199178457260132, + "logits/rejected": -3.129286766052246, + "logps/chosen": -299.00604248046875, + "logps/rejected": -431.2790832519531, + "loss": 0.8131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006990051828324795, + "rewards/margins": 0.16156616806983948, + "rewards/rejected": -0.16855621337890625, + "step": 71 + }, + { + "epoch": 0.05, + "learning_rate": 9.993857928415369e-08, + "logits/chosen": -3.1660218238830566, + "logits/rejected": -3.0285720825195312, + "logps/chosen": -279.0898742675781, + "logps/rejected": -437.8880310058594, + "loss": 0.7925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.104400634765625, + "rewards/margins": 0.48473358154296875, + "rewards/rejected": -0.38033294677734375, + "step": 72 + }, + { + "epoch": 0.05, + "learning_rate": 9.993335541900894e-08, + "logits/chosen": -3.1681151390075684, + "logits/rejected": -3.097574234008789, + "logps/chosen": -285.58770751953125, + "logps/rejected": -555.39794921875, + "loss": 0.7433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13107910752296448, + "rewards/margins": 0.5093658566474915, + "rewards/rejected": -0.378286749124527, + "step": 73 + }, + { + "epoch": 0.05, + "learning_rate": 9.992791852820707e-08, + "logits/chosen": -3.1886305809020996, + "logits/rejected": -3.0921244621276855, + "logps/chosen": -268.8922119140625, + "logps/rejected": -549.3283081054688, + "loss": 0.7524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17099228501319885, + "rewards/margins": 0.5798957943916321, + "rewards/rejected": -0.4089035391807556, + "step": 74 + }, + { + "epoch": 0.05, + "learning_rate": 9.992226863494298e-08, + "logits/chosen": -3.1799111366271973, + "logits/rejected": -2.984353542327881, + "logps/chosen": -288.09375, + "logps/rejected": -698.8551025390625, + "loss": 0.7353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07483673095703125, + "rewards/margins": 0.4818618893623352, + "rewards/rejected": -0.40702515840530396, + "step": 75 + }, + { + "epoch": 0.05, + "learning_rate": 9.991640576332021e-08, + "logits/chosen": -3.119673252105713, + "logits/rejected": -2.9898767471313477, + "logps/chosen": -276.23248291015625, + "logps/rejected": -440.25836181640625, + "loss": 0.7321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11730270087718964, + "rewards/margins": 0.37137070298194885, + "rewards/rejected": -0.254067987203598, + "step": 76 + }, + { + "epoch": 0.05, + "learning_rate": 9.991032993835096e-08, + "logits/chosen": -3.066944122314453, + "logits/rejected": -3.0613625049591064, + "logps/chosen": -286.1818542480469, + "logps/rejected": -924.4994506835938, + "loss": 0.69, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0765228271484375, + "rewards/margins": 0.83599853515625, + "rewards/rejected": -0.7594757080078125, + "step": 77 + }, + { + "epoch": 0.05, + "learning_rate": 9.990404118595588e-08, + "logits/chosen": -3.1689224243164062, + "logits/rejected": -3.064621925354004, + "logps/chosen": -268.11590576171875, + "logps/rejected": -393.2600402832031, + "loss": 0.7885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08714447170495987, + "rewards/margins": 0.31597900390625, + "rewards/rejected": -0.22883453965187073, + "step": 78 + }, + { + "epoch": 0.05, + "learning_rate": 9.989753953296407e-08, + "logits/chosen": -3.238790512084961, + "logits/rejected": -3.1513829231262207, + "logps/chosen": -292.9261779785156, + "logps/rejected": -413.348876953125, + "loss": 0.7514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16082915663719177, + "rewards/margins": 0.5978385806083679, + "rewards/rejected": -0.43700945377349854, + "step": 79 + }, + { + "epoch": 0.05, + "learning_rate": 9.989082500711287e-08, + "logits/chosen": -3.2337753772735596, + "logits/rejected": -3.0091018676757812, + "logps/chosen": -290.65045166015625, + "logps/rejected": -460.520751953125, + "loss": 0.7041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10012054443359375, + "rewards/margins": 0.41166841983795166, + "rewards/rejected": -0.3115478754043579, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 9.988389763704778e-08, + "logits/chosen": -3.1635897159576416, + "logits/rejected": -3.0343151092529297, + "logps/chosen": -230.31301879882812, + "logps/rejected": -733.8567504882812, + "loss": 0.6533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18175584077835083, + "rewards/margins": 0.6853996515274048, + "rewards/rejected": -0.503643810749054, + "step": 81 + }, + { + "epoch": 0.05, + "learning_rate": 9.987675745232238e-08, + "logits/chosen": -3.20497989654541, + "logits/rejected": -3.028393268585205, + "logps/chosen": -297.9314880371094, + "logps/rejected": -800.3336181640625, + "loss": 0.7509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14820250868797302, + "rewards/margins": 0.8051269054412842, + "rewards/rejected": -0.6569244265556335, + "step": 82 + }, + { + "epoch": 0.05, + "learning_rate": 9.986940448339807e-08, + "logits/chosen": -3.187039375305176, + "logits/rejected": -3.1830520629882812, + "logps/chosen": -296.9833068847656, + "logps/rejected": -636.0032348632812, + "loss": 0.7742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10375671088695526, + "rewards/margins": 0.6293091177940369, + "rewards/rejected": -0.5255523920059204, + "step": 83 + }, + { + "epoch": 0.05, + "learning_rate": 9.986183876164412e-08, + "logits/chosen": -3.1414942741394043, + "logits/rejected": -2.9664299488067627, + "logps/chosen": -262.0418701171875, + "logps/rejected": -1004.1738891601562, + "loss": 0.7261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16943053901195526, + "rewards/margins": 0.8052398562431335, + "rewards/rejected": -0.6358093619346619, + "step": 84 + }, + { + "epoch": 0.05, + "learning_rate": 9.985406031933737e-08, + "logits/chosen": -3.1344268321990967, + "logits/rejected": -3.0583102703094482, + "logps/chosen": -284.49029541015625, + "logps/rejected": -526.7748413085938, + "loss": 0.7254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11520309746265411, + "rewards/margins": 0.5364570617675781, + "rewards/rejected": -0.4212539792060852, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 9.984606918966225e-08, + "logits/chosen": -3.205990791320801, + "logits/rejected": -3.1626696586608887, + "logps/chosen": -275.1701965332031, + "logps/rejected": -713.589111328125, + "loss": 0.6586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24420472979545593, + "rewards/margins": 0.8643860816955566, + "rewards/rejected": -0.6201813220977783, + "step": 86 + }, + { + "epoch": 0.06, + "learning_rate": 9.98378654067105e-08, + "logits/chosen": -3.1499547958374023, + "logits/rejected": -3.059696674346924, + "logps/chosen": -260.74566650390625, + "logps/rejected": -398.5688171386719, + "loss": 0.6717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16625672578811646, + "rewards/margins": 0.36413878202438354, + "rewards/rejected": -0.19788208603858948, + "step": 87 + }, + { + "epoch": 0.06, + "learning_rate": 9.982944900548107e-08, + "logits/chosen": -3.2271318435668945, + "logits/rejected": -3.121936798095703, + "logps/chosen": -253.8331756591797, + "logps/rejected": -575.8009033203125, + "loss": 0.7146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1344345211982727, + "rewards/margins": 0.6132659912109375, + "rewards/rejected": -0.4788314700126648, + "step": 88 + }, + { + "epoch": 0.06, + "learning_rate": 9.982082002188005e-08, + "logits/chosen": -3.1852598190307617, + "logits/rejected": -2.933534860610962, + "logps/chosen": -250.0060272216797, + "logps/rejected": -527.9376220703125, + "loss": 0.7433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11657333374023438, + "rewards/margins": 0.5964805483818054, + "rewards/rejected": -0.47990721464157104, + "step": 89 + }, + { + "epoch": 0.06, + "learning_rate": 9.981197849272037e-08, + "logits/chosen": -3.1614747047424316, + "logits/rejected": -3.0962023735046387, + "logps/chosen": -326.4434814453125, + "logps/rejected": -375.7874755859375, + "loss": 0.7301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0735321044921875, + "rewards/margins": 0.2961685061454773, + "rewards/rejected": -0.22263643145561218, + "step": 90 + }, + { + "epoch": 0.06, + "learning_rate": 9.980292445572178e-08, + "logits/chosen": -3.151763916015625, + "logits/rejected": -3.0544681549072266, + "logps/chosen": -258.1134948730469, + "logps/rejected": -328.0830078125, + "loss": 0.7126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13488921523094177, + "rewards/margins": 0.4220565855503082, + "rewards/rejected": -0.28716737031936646, + "step": 91 + }, + { + "epoch": 0.06, + "learning_rate": 9.979365794951058e-08, + "logits/chosen": -3.2491159439086914, + "logits/rejected": -3.1588797569274902, + "logps/chosen": -251.96368408203125, + "logps/rejected": -792.1199340820312, + "loss": 0.6467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10254211723804474, + "rewards/margins": 0.9023895263671875, + "rewards/rejected": -0.799847424030304, + "step": 92 + }, + { + "epoch": 0.06, + "learning_rate": 9.978417901361958e-08, + "logits/chosen": -3.190556049346924, + "logits/rejected": -2.9892115592956543, + "logps/chosen": -288.2961730957031, + "logps/rejected": -470.3898010253906, + "loss": 0.711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20765380561351776, + "rewards/margins": 0.6408111453056335, + "rewards/rejected": -0.4331573247909546, + "step": 93 + }, + { + "epoch": 0.06, + "learning_rate": 9.977448768848777e-08, + "logits/chosen": -3.0864453315734863, + "logits/rejected": -2.9925107955932617, + "logps/chosen": -288.1572570800781, + "logps/rejected": -413.65472412109375, + "loss": 0.7064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22115936875343323, + "rewards/margins": 0.4786483645439148, + "rewards/rejected": -0.25748902559280396, + "step": 94 + }, + { + "epoch": 0.06, + "learning_rate": 9.976458401546029e-08, + "logits/chosen": -3.2072176933288574, + "logits/rejected": -3.018092155456543, + "logps/chosen": -278.5301513671875, + "logps/rejected": -1415.57763671875, + "loss": 0.7078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12201842665672302, + "rewards/margins": 1.0997642278671265, + "rewards/rejected": -0.9777458310127258, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 9.975446803678817e-08, + "logits/chosen": -3.2529001235961914, + "logits/rejected": -2.977165699005127, + "logps/chosen": -249.77857971191406, + "logps/rejected": -734.4794921875, + "loss": 0.6196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.162547305226326, + "rewards/margins": 0.7396255731582642, + "rewards/rejected": -0.5770782828330994, + "step": 96 + }, + { + "epoch": 0.06, + "learning_rate": 9.974413979562824e-08, + "logits/chosen": -3.2670414447784424, + "logits/rejected": -3.056424140930176, + "logps/chosen": -249.50775146484375, + "logps/rejected": -1459.015625, + "loss": 0.618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12743301689624786, + "rewards/margins": 1.3757606744766235, + "rewards/rejected": -1.248327612876892, + "step": 97 + }, + { + "epoch": 0.06, + "learning_rate": 9.97335993360428e-08, + "logits/chosen": -3.245028495788574, + "logits/rejected": -3.0432686805725098, + "logps/chosen": -262.9735107421875, + "logps/rejected": -514.044677734375, + "loss": 0.6252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22953873872756958, + "rewards/margins": 0.7771156430244446, + "rewards/rejected": -0.547576904296875, + "step": 98 + }, + { + "epoch": 0.06, + "learning_rate": 9.972284670299956e-08, + "logits/chosen": -3.171733856201172, + "logits/rejected": -3.0272982120513916, + "logps/chosen": -261.3026123046875, + "logps/rejected": -184.05108642578125, + "loss": 0.7069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18041688203811646, + "rewards/margins": 0.2830825746059418, + "rewards/rejected": -0.10266570746898651, + "step": 99 + }, + { + "epoch": 0.06, + "learning_rate": 9.97118819423714e-08, + "logits/chosen": -3.173208475112915, + "logits/rejected": -3.1206772327423096, + "logps/chosen": -299.71478271484375, + "logps/rejected": -481.9498291015625, + "loss": 0.622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0912322998046875, + "rewards/margins": 0.491586297750473, + "rewards/rejected": -0.4003539979457855, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 9.970070510093617e-08, + "logits/chosen": -3.1780614852905273, + "logits/rejected": -3.0296127796173096, + "logps/chosen": -275.912841796875, + "logps/rejected": -445.6690673828125, + "loss": 0.6302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1646621823310852, + "rewards/margins": 0.6672500371932983, + "rewards/rejected": -0.5025879144668579, + "step": 101 + }, + { + "epoch": 0.07, + "learning_rate": 9.968931622637651e-08, + "logits/chosen": -3.234238624572754, + "logits/rejected": -3.098538875579834, + "logps/chosen": -278.1122131347656, + "logps/rejected": -375.04864501953125, + "loss": 0.7128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1716255247592926, + "rewards/margins": 0.5583122372627258, + "rewards/rejected": -0.3866867125034332, + "step": 102 + }, + { + "epoch": 0.07, + "learning_rate": 9.967771536727964e-08, + "logits/chosen": -3.2493245601654053, + "logits/rejected": -3.1704447269439697, + "logps/chosen": -268.6153259277344, + "logps/rejected": -779.389404296875, + "loss": 0.5414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18662872910499573, + "rewards/margins": 1.1134933233261108, + "rewards/rejected": -0.9268646240234375, + "step": 103 + }, + { + "epoch": 0.07, + "learning_rate": 9.966590257313712e-08, + "logits/chosen": -3.2248709201812744, + "logits/rejected": -3.11151123046875, + "logps/chosen": -254.63421630859375, + "logps/rejected": -759.0403442382812, + "loss": 0.6425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1589149534702301, + "rewards/margins": 0.9443427920341492, + "rewards/rejected": -0.7854278683662415, + "step": 104 + }, + { + "epoch": 0.07, + "learning_rate": 9.965387789434472e-08, + "logits/chosen": -3.1281163692474365, + "logits/rejected": -3.0803565979003906, + "logps/chosen": -273.597900390625, + "logps/rejected": -461.3992614746094, + "loss": 0.6338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1630142331123352, + "rewards/margins": 0.5338088870048523, + "rewards/rejected": -0.3707946836948395, + "step": 105 + }, + { + "epoch": 0.07, + "learning_rate": 9.964164138220208e-08, + "logits/chosen": -3.1970088481903076, + "logits/rejected": -3.1732711791992188, + "logps/chosen": -272.07098388671875, + "logps/rejected": -441.831298828125, + "loss": 0.6006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07941436767578125, + "rewards/margins": 0.6490691900253296, + "rewards/rejected": -0.5696548223495483, + "step": 106 + }, + { + "epoch": 0.07, + "learning_rate": 9.962919308891262e-08, + "logits/chosen": -3.173495054244995, + "logits/rejected": -3.0698070526123047, + "logps/chosen": -255.16139221191406, + "logps/rejected": -635.5350341796875, + "loss": 0.619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21770936250686646, + "rewards/margins": 1.0739868879318237, + "rewards/rejected": -0.8562775254249573, + "step": 107 + }, + { + "epoch": 0.07, + "learning_rate": 9.961653306758325e-08, + "logits/chosen": -3.2279818058013916, + "logits/rejected": -2.998703718185425, + "logps/chosen": -269.8822937011719, + "logps/rejected": -579.1328735351562, + "loss": 0.6067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24737243354320526, + "rewards/margins": 0.9965317249298096, + "rewards/rejected": -0.7491592764854431, + "step": 108 + }, + { + "epoch": 0.07, + "learning_rate": 9.960366137222414e-08, + "logits/chosen": -3.2047505378723145, + "logits/rejected": -3.045177936553955, + "logps/chosen": -293.1044921875, + "logps/rejected": -899.4532470703125, + "loss": 0.5897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24601516127586365, + "rewards/margins": 1.0567817687988281, + "rewards/rejected": -0.8107666373252869, + "step": 109 + }, + { + "epoch": 0.07, + "learning_rate": 9.959057805774851e-08, + "logits/chosen": -3.182586669921875, + "logits/rejected": -3.0656039714813232, + "logps/chosen": -318.3377990722656, + "logps/rejected": -770.668701171875, + "loss": 0.6366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1157379150390625, + "rewards/margins": 1.0784087181091309, + "rewards/rejected": -0.9626709222793579, + "step": 110 + }, + { + "epoch": 0.07, + "learning_rate": 9.95772831799724e-08, + "logits/chosen": -3.160212993621826, + "logits/rejected": -3.0941836833953857, + "logps/chosen": -276.4947814941406, + "logps/rejected": -656.18310546875, + "loss": 0.6604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18192443251609802, + "rewards/margins": 0.9982406497001648, + "rewards/rejected": -0.8163162469863892, + "step": 111 + }, + { + "epoch": 0.07, + "learning_rate": 9.956377679561438e-08, + "logits/chosen": -3.214231491088867, + "logits/rejected": -3.0990867614746094, + "logps/chosen": -240.25927734375, + "logps/rejected": -1563.83740234375, + "loss": 0.5877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22468796372413635, + "rewards/margins": 1.7974998950958252, + "rewards/rejected": -1.5728119611740112, + "step": 112 + }, + { + "epoch": 0.07, + "learning_rate": 9.955005896229543e-08, + "logits/chosen": -3.159432888031006, + "logits/rejected": -3.135313034057617, + "logps/chosen": -268.2745666503906, + "logps/rejected": -646.3222045898438, + "loss": 0.6224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1213325560092926, + "rewards/margins": 0.9536781311035156, + "rewards/rejected": -0.8323456048965454, + "step": 113 + }, + { + "epoch": 0.07, + "learning_rate": 9.953612973853851e-08, + "logits/chosen": -3.178068161010742, + "logits/rejected": -3.0131969451904297, + "logps/chosen": -272.68829345703125, + "logps/rejected": -699.7930908203125, + "loss": 0.6317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23419037461280823, + "rewards/margins": 1.0554825067520142, + "rewards/rejected": -0.8212921619415283, + "step": 114 + }, + { + "epoch": 0.07, + "learning_rate": 9.95219891837685e-08, + "logits/chosen": -3.234279155731201, + "logits/rejected": -3.0310821533203125, + "logps/chosen": -258.992919921875, + "logps/rejected": -1136.8994140625, + "loss": 0.5828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16217422485351562, + "rewards/margins": 1.3225898742675781, + "rewards/rejected": -1.1604156494140625, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 9.950763735831182e-08, + "logits/chosen": -3.159667730331421, + "logits/rejected": -2.9615683555603027, + "logps/chosen": -287.9840087890625, + "logps/rejected": -1060.572998046875, + "loss": 0.5661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2576919496059418, + "rewards/margins": 1.3633713722229004, + "rewards/rejected": -1.1056793928146362, + "step": 116 + }, + { + "epoch": 0.07, + "learning_rate": 9.949307432339625e-08, + "logits/chosen": -3.1656429767608643, + "logits/rejected": -3.090263843536377, + "logps/chosen": -229.482177734375, + "logps/rejected": -747.5150756835938, + "loss": 0.4838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2751106321811676, + "rewards/margins": 1.2580451965332031, + "rewards/rejected": -0.9829345941543579, + "step": 117 + }, + { + "epoch": 0.08, + "learning_rate": 9.947830014115055e-08, + "logits/chosen": -3.1897733211517334, + "logits/rejected": -2.9931654930114746, + "logps/chosen": -268.12799072265625, + "logps/rejected": -424.1930847167969, + "loss": 0.6181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24282532930374146, + "rewards/margins": 0.9018341302871704, + "rewards/rejected": -0.659008800983429, + "step": 118 + }, + { + "epoch": 0.08, + "learning_rate": 9.946331487460436e-08, + "logits/chosen": -3.1702353954315186, + "logits/rejected": -3.116434097290039, + "logps/chosen": -261.7657775878906, + "logps/rejected": -545.60693359375, + "loss": 0.5681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15133896470069885, + "rewards/margins": 1.061414361000061, + "rewards/rejected": -0.9100754261016846, + "step": 119 + }, + { + "epoch": 0.08, + "learning_rate": 9.944811858768781e-08, + "logits/chosen": -3.205709457397461, + "logits/rejected": -3.076699733734131, + "logps/chosen": -288.60821533203125, + "logps/rejected": -992.817626953125, + "loss": 0.5963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2459358274936676, + "rewards/margins": 1.390119194984436, + "rewards/rejected": -1.1441833972930908, + "step": 120 + }, + { + "epoch": 0.08, + "learning_rate": 9.94327113452313e-08, + "logits/chosen": -3.2661209106445312, + "logits/rejected": -3.1316018104553223, + "logps/chosen": -256.1053771972656, + "logps/rejected": -439.04449462890625, + "loss": 0.5743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21583175659179688, + "rewards/margins": 0.8237937688827515, + "rewards/rejected": -0.6079620122909546, + "step": 121 + }, + { + "epoch": 0.08, + "learning_rate": 9.941709321296518e-08, + "logits/chosen": -3.2120206356048584, + "logits/rejected": -3.1178343296051025, + "logps/chosen": -293.203125, + "logps/rejected": -396.6179504394531, + "loss": 0.593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25799560546875, + "rewards/margins": 0.6791290044784546, + "rewards/rejected": -0.421133428812027, + "step": 122 + }, + { + "epoch": 0.08, + "learning_rate": 9.940126425751956e-08, + "logits/chosen": -3.1824750900268555, + "logits/rejected": -3.0790462493896484, + "logps/chosen": -272.2681579589844, + "logps/rejected": -560.0559692382812, + "loss": 0.5885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3307022154331207, + "rewards/margins": 0.8905915021896362, + "rewards/rejected": -0.5598892569541931, + "step": 123 + }, + { + "epoch": 0.08, + "learning_rate": 9.938522454642387e-08, + "logits/chosen": -3.2422091960906982, + "logits/rejected": -3.167593002319336, + "logps/chosen": -260.63238525390625, + "logps/rejected": -928.7893676757812, + "loss": 0.5879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24948731064796448, + "rewards/margins": 1.5348756313323975, + "rewards/rejected": -1.2853882312774658, + "step": 124 + }, + { + "epoch": 0.08, + "learning_rate": 9.936897414810676e-08, + "logits/chosen": -3.203202724456787, + "logits/rejected": -2.962592363357544, + "logps/chosen": -270.03662109375, + "logps/rejected": -1227.1361083984375, + "loss": 0.496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24226227402687073, + "rewards/margins": 1.562271237373352, + "rewards/rejected": -1.3200088739395142, + "step": 125 + }, + { + "epoch": 0.08, + "learning_rate": 9.935251313189563e-08, + "logits/chosen": -3.2848734855651855, + "logits/rejected": -3.193728446960449, + "logps/chosen": -242.65090942382812, + "logps/rejected": -510.5807189941406, + "loss": 0.5828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24239425361156464, + "rewards/margins": 0.8335182070732117, + "rewards/rejected": -0.5911239981651306, + "step": 126 + }, + { + "epoch": 0.08, + "learning_rate": 9.933584156801651e-08, + "logits/chosen": -3.1795482635498047, + "logits/rejected": -3.167421340942383, + "logps/chosen": -283.9157409667969, + "logps/rejected": -924.9686889648438, + "loss": 0.4781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21652832627296448, + "rewards/margins": 1.597747802734375, + "rewards/rejected": -1.381219506263733, + "step": 127 + }, + { + "epoch": 0.08, + "learning_rate": 9.931895952759358e-08, + "logits/chosen": -3.2336220741271973, + "logits/rejected": -3.124321460723877, + "logps/chosen": -274.9884338378906, + "logps/rejected": -477.40234375, + "loss": 0.534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2791229486465454, + "rewards/margins": 0.9101272821426392, + "rewards/rejected": -0.6310043334960938, + "step": 128 + }, + { + "epoch": 0.08, + "learning_rate": 9.930186708264901e-08, + "logits/chosen": -3.2425498962402344, + "logits/rejected": -3.0473084449768066, + "logps/chosen": -282.71856689453125, + "logps/rejected": -663.08056640625, + "loss": 0.6202, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24709472060203552, + "rewards/margins": 1.0961883068084717, + "rewards/rejected": -0.8490936756134033, + "step": 129 + }, + { + "epoch": 0.08, + "learning_rate": 9.928456430610257e-08, + "logits/chosen": -3.159611225128174, + "logits/rejected": -3.1785407066345215, + "logps/chosen": -262.3172302246094, + "logps/rejected": -637.2445068359375, + "loss": 0.5563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24744492769241333, + "rewards/margins": 1.2075035572052002, + "rewards/rejected": -0.9600586295127869, + "step": 130 + }, + { + "epoch": 0.08, + "learning_rate": 9.926705127177137e-08, + "logits/chosen": -3.226226329803467, + "logits/rejected": -3.083677053451538, + "logps/chosen": -273.76470947265625, + "logps/rejected": -283.97528076171875, + "loss": 0.5916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32809752225875854, + "rewards/margins": 0.7070327997207642, + "rewards/rejected": -0.3789352476596832, + "step": 131 + }, + { + "epoch": 0.08, + "learning_rate": 9.924932805436949e-08, + "logits/chosen": -3.203153133392334, + "logits/rejected": -3.082836627960205, + "logps/chosen": -289.39007568359375, + "logps/rejected": -555.8895263671875, + "loss": 0.5854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2521011233329773, + "rewards/margins": 0.974067747592926, + "rewards/rejected": -0.721966564655304, + "step": 132 + }, + { + "epoch": 0.08, + "learning_rate": 9.923139472950772e-08, + "logits/chosen": -3.2575440406799316, + "logits/rejected": -3.0998001098632812, + "logps/chosen": -294.1458435058594, + "logps/rejected": -907.228515625, + "loss": 0.5233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25298768281936646, + "rewards/margins": 1.5800232887268066, + "rewards/rejected": -1.3270355463027954, + "step": 133 + }, + { + "epoch": 0.09, + "learning_rate": 9.92132513736932e-08, + "logits/chosen": -3.195833921432495, + "logits/rejected": -3.0626914501190186, + "logps/chosen": -250.56752014160156, + "logps/rejected": -919.3110961914062, + "loss": 0.5423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24598997831344604, + "rewards/margins": 1.4809844493865967, + "rewards/rejected": -1.2349945306777954, + "step": 134 + }, + { + "epoch": 0.09, + "learning_rate": 9.919489806432913e-08, + "logits/chosen": -3.219125270843506, + "logits/rejected": -3.1121420860290527, + "logps/chosen": -266.7503662109375, + "logps/rejected": -977.151123046875, + "loss": 0.4946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2932586669921875, + "rewards/margins": 1.7997252941131592, + "rewards/rejected": -1.5064666271209717, + "step": 135 + }, + { + "epoch": 0.09, + "learning_rate": 9.917633487971437e-08, + "logits/chosen": -3.1627275943756104, + "logits/rejected": -3.048729181289673, + "logps/chosen": -259.9317932128906, + "logps/rejected": -829.1664428710938, + "loss": 0.5499, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2480308711528778, + "rewards/margins": 1.299085259437561, + "rewards/rejected": -1.0510543584823608, + "step": 136 + }, + { + "epoch": 0.09, + "learning_rate": 9.915756189904317e-08, + "logits/chosen": -3.2098817825317383, + "logits/rejected": -3.059540271759033, + "logps/chosen": -265.459228515625, + "logps/rejected": -1345.964111328125, + "loss": 0.524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30863726139068604, + "rewards/margins": 2.224292755126953, + "rewards/rejected": -1.9156556129455566, + "step": 137 + }, + { + "epoch": 0.09, + "learning_rate": 9.913857920240479e-08, + "logits/chosen": -3.1672234535217285, + "logits/rejected": -3.074160575866699, + "logps/chosen": -272.9672546386719, + "logps/rejected": -778.10498046875, + "loss": 0.4981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2762207090854645, + "rewards/margins": 1.549813985824585, + "rewards/rejected": -1.2735931873321533, + "step": 138 + }, + { + "epoch": 0.09, + "learning_rate": 9.911938687078323e-08, + "logits/chosen": -3.1715331077575684, + "logits/rejected": -3.151366710662842, + "logps/chosen": -248.87887573242188, + "logps/rejected": -657.0775756835938, + "loss": 0.4699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20089873671531677, + "rewards/margins": 1.3071014881134033, + "rewards/rejected": -1.1062027215957642, + "step": 139 + }, + { + "epoch": 0.09, + "learning_rate": 9.909998498605681e-08, + "logits/chosen": -3.175334930419922, + "logits/rejected": -3.0909523963928223, + "logps/chosen": -276.9197998046875, + "logps/rejected": -571.61474609375, + "loss": 0.5709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2135269194841385, + "rewards/margins": 1.2245299816131592, + "rewards/rejected": -1.0110031366348267, + "step": 140 + }, + { + "epoch": 0.09, + "learning_rate": 9.908037363099782e-08, + "logits/chosen": -3.2299365997314453, + "logits/rejected": -3.162855863571167, + "logps/chosen": -290.81024169921875, + "logps/rejected": -680.6280517578125, + "loss": 0.5698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27824097871780396, + "rewards/margins": 1.4140716791152954, + "rewards/rejected": -1.1358306407928467, + "step": 141 + }, + { + "epoch": 0.09, + "learning_rate": 9.906055288927222e-08, + "logits/chosen": -3.2086234092712402, + "logits/rejected": -3.1950817108154297, + "logps/chosen": -263.8929748535156, + "logps/rejected": -727.37109375, + "loss": 0.541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2515220642089844, + "rewards/margins": 1.3894340991973877, + "rewards/rejected": -1.1379120349884033, + "step": 142 + }, + { + "epoch": 0.09, + "learning_rate": 9.904052284543926e-08, + "logits/chosen": -3.2849278450012207, + "logits/rejected": -3.18524432182312, + "logps/chosen": -230.40054321289062, + "logps/rejected": -654.3582763671875, + "loss": 0.4663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2838386595249176, + "rewards/margins": 1.449608564376831, + "rewards/rejected": -1.1657699346542358, + "step": 143 + }, + { + "epoch": 0.09, + "learning_rate": 9.90202835849511e-08, + "logits/chosen": -3.2447919845581055, + "logits/rejected": -3.035431385040283, + "logps/chosen": -275.3568115234375, + "logps/rejected": -458.0790710449219, + "loss": 0.5702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2526397705078125, + "rewards/margins": 1.0211091041564941, + "rewards/rejected": -0.7684692144393921, + "step": 144 + }, + { + "epoch": 0.09, + "learning_rate": 9.899983519415244e-08, + "logits/chosen": -3.1979498863220215, + "logits/rejected": -3.0642857551574707, + "logps/chosen": -269.5766296386719, + "logps/rejected": -909.33740234375, + "loss": 0.5057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2824295163154602, + "rewards/margins": 1.725559949874878, + "rewards/rejected": -1.4431304931640625, + "step": 145 + }, + { + "epoch": 0.09, + "learning_rate": 9.897917776028021e-08, + "logits/chosen": -3.175686836242676, + "logits/rejected": -2.928337335586548, + "logps/chosen": -266.2857971191406, + "logps/rejected": -1239.011962890625, + "loss": 0.4979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37668001651763916, + "rewards/margins": 2.0431718826293945, + "rewards/rejected": -1.6664917469024658, + "step": 146 + }, + { + "epoch": 0.09, + "learning_rate": 9.895831137146317e-08, + "logits/chosen": -3.170056104660034, + "logits/rejected": -3.029956340789795, + "logps/chosen": -260.3128967285156, + "logps/rejected": -1132.6650390625, + "loss": 0.4997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2752418518066406, + "rewards/margins": 1.9972238540649414, + "rewards/rejected": -1.7219818830490112, + "step": 147 + }, + { + "epoch": 0.09, + "learning_rate": 9.893723611672147e-08, + "logits/chosen": -3.292609214782715, + "logits/rejected": -3.1095499992370605, + "logps/chosen": -276.52777099609375, + "logps/rejected": -1783.7415771484375, + "loss": 0.4608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3226425051689148, + "rewards/margins": 3.299208164215088, + "rewards/rejected": -2.9765655994415283, + "step": 148 + }, + { + "epoch": 0.09, + "learning_rate": 9.891595208596637e-08, + "logits/chosen": -3.208852767944336, + "logits/rejected": -3.1656060218811035, + "logps/chosen": -254.95118713378906, + "logps/rejected": -604.2535400390625, + "loss": 0.4304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36379241943359375, + "rewards/margins": 1.4765288829803467, + "rewards/rejected": -1.112736463546753, + "step": 149 + }, + { + "epoch": 0.1, + "learning_rate": 9.889445936999976e-08, + "logits/chosen": -3.2827115058898926, + "logits/rejected": -3.122201919555664, + "logps/chosen": -286.3800354003906, + "logps/rejected": -537.367919921875, + "loss": 0.5644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22891998291015625, + "rewards/margins": 1.1838881969451904, + "rewards/rejected": -0.954968273639679, + "step": 150 + }, + { + "epoch": 0.1, + "learning_rate": 9.887275806051388e-08, + "logits/chosen": -3.211134195327759, + "logits/rejected": -3.102180004119873, + "logps/chosen": -264.59332275390625, + "logps/rejected": -924.85791015625, + "loss": 0.4643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30490875244140625, + "rewards/margins": 1.854838490486145, + "rewards/rejected": -1.5499298572540283, + "step": 151 + }, + { + "epoch": 0.1, + "learning_rate": 9.885084825009084e-08, + "logits/chosen": -3.199681282043457, + "logits/rejected": -3.043135404586792, + "logps/chosen": -270.3661193847656, + "logps/rejected": -531.4871826171875, + "loss": 0.6116, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2878982424736023, + "rewards/margins": 1.082026720046997, + "rewards/rejected": -0.79412841796875, + "step": 152 + }, + { + "epoch": 0.1, + "learning_rate": 9.882873003220226e-08, + "logits/chosen": -3.176021099090576, + "logits/rejected": -3.132744550704956, + "logps/chosen": -295.26800537109375, + "logps/rejected": -534.865478515625, + "loss": 0.5326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35681915283203125, + "rewards/margins": 1.4164414405822754, + "rewards/rejected": -1.0596221685409546, + "step": 153 + }, + { + "epoch": 0.1, + "learning_rate": 9.88064035012089e-08, + "logits/chosen": -3.2372169494628906, + "logits/rejected": -3.1586837768554688, + "logps/chosen": -259.0315246582031, + "logps/rejected": -595.9423828125, + "loss": 0.4803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2950882017612457, + "rewards/margins": 1.33428955078125, + "rewards/rejected": -1.0392013788223267, + "step": 154 + }, + { + "epoch": 0.1, + "learning_rate": 9.878386875236016e-08, + "logits/chosen": -3.24344801902771, + "logits/rejected": -3.1398189067840576, + "logps/chosen": -261.6542053222656, + "logps/rejected": -687.8671875, + "loss": 0.472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3522804379463196, + "rewards/margins": 1.605970025062561, + "rewards/rejected": -1.2536895275115967, + "step": 155 + }, + { + "epoch": 0.1, + "learning_rate": 9.876112588179377e-08, + "logits/chosen": -3.2036380767822266, + "logits/rejected": -3.0353126525878906, + "logps/chosen": -232.95654296875, + "logps/rejected": -535.97314453125, + "loss": 0.4861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3795784115791321, + "rewards/margins": 1.334215521812439, + "rewards/rejected": -0.9546371698379517, + "step": 156 + }, + { + "epoch": 0.1, + "learning_rate": 9.873817498653537e-08, + "logits/chosen": -3.225675582885742, + "logits/rejected": -3.1230525970458984, + "logps/chosen": -267.05950927734375, + "logps/rejected": -638.3702392578125, + "loss": 0.4602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.302520751953125, + "rewards/margins": 1.7635254859924316, + "rewards/rejected": -1.4610047340393066, + "step": 157 + }, + { + "epoch": 0.1, + "learning_rate": 9.871501616449807e-08, + "logits/chosen": -3.227108955383301, + "logits/rejected": -3.1421523094177246, + "logps/chosen": -239.08372497558594, + "logps/rejected": -458.4169616699219, + "loss": 0.4858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24417877197265625, + "rewards/margins": 0.9285538196563721, + "rewards/rejected": -0.6843750476837158, + "step": 158 + }, + { + "epoch": 0.1, + "learning_rate": 9.8691649514482e-08, + "logits/chosen": -3.2188262939453125, + "logits/rejected": -3.1873295307159424, + "logps/chosen": -249.8113250732422, + "logps/rejected": -645.5294189453125, + "loss": 0.4959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25522691011428833, + "rewards/margins": 1.2648963928222656, + "rewards/rejected": -1.009669542312622, + "step": 159 + }, + { + "epoch": 0.1, + "learning_rate": 9.866807513617397e-08, + "logits/chosen": -3.231560707092285, + "logits/rejected": -3.0407962799072266, + "logps/chosen": -294.3050231933594, + "logps/rejected": -866.3287963867188, + "loss": 0.4645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4050338864326477, + "rewards/margins": 1.7664809226989746, + "rewards/rejected": -1.3614470958709717, + "step": 160 + }, + { + "epoch": 0.1, + "learning_rate": 9.864429313014697e-08, + "logits/chosen": -3.195405960083008, + "logits/rejected": -2.9576144218444824, + "logps/chosen": -258.45111083984375, + "logps/rejected": -654.7218017578125, + "loss": 0.5269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29794085025787354, + "rewards/margins": 1.1800155639648438, + "rewards/rejected": -0.8820747137069702, + "step": 161 + }, + { + "epoch": 0.1, + "learning_rate": 9.86203035978598e-08, + "logits/chosen": -3.197448492050171, + "logits/rejected": -3.0879549980163574, + "logps/chosen": -302.08544921875, + "logps/rejected": -421.3991394042969, + "loss": 0.5316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37103575468063354, + "rewards/margins": 0.9965423345565796, + "rewards/rejected": -0.6255066394805908, + "step": 162 + }, + { + "epoch": 0.1, + "learning_rate": 9.859610664165657e-08, + "logits/chosen": -3.2176547050476074, + "logits/rejected": -3.030202865600586, + "logps/chosen": -269.8486328125, + "logps/rejected": -333.0755310058594, + "loss": 0.4928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37706148624420166, + "rewards/margins": 0.9761978387832642, + "rewards/rejected": -0.5991363525390625, + "step": 163 + }, + { + "epoch": 0.1, + "learning_rate": 9.857170236476633e-08, + "logits/chosen": -3.1687278747558594, + "logits/rejected": -3.0910136699676514, + "logps/chosen": -243.55128479003906, + "logps/rejected": -327.67218017578125, + "loss": 0.5334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2966812252998352, + "rewards/margins": 0.6995956897735596, + "rewards/rejected": -0.402914434671402, + "step": 164 + }, + { + "epoch": 0.11, + "learning_rate": 9.85470908713026e-08, + "logits/chosen": -3.220977306365967, + "logits/rejected": -3.130725622177124, + "logps/chosen": -265.58758544921875, + "logps/rejected": -778.535400390625, + "loss": 0.4842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38677141070365906, + "rewards/margins": 2.2678139209747314, + "rewards/rejected": -1.88104248046875, + "step": 165 + }, + { + "epoch": 0.11, + "learning_rate": 9.852227226626292e-08, + "logits/chosen": -3.170628786087036, + "logits/rejected": -3.149839162826538, + "logps/chosen": -258.1180419921875, + "logps/rejected": -597.30859375, + "loss": 0.489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2561172544956207, + "rewards/margins": 1.2964035272598267, + "rewards/rejected": -1.0402863025665283, + "step": 166 + }, + { + "epoch": 0.11, + "learning_rate": 9.849724665552841e-08, + "logits/chosen": -3.224701404571533, + "logits/rejected": -3.0144777297973633, + "logps/chosen": -256.79833984375, + "logps/rejected": -687.5578002929688, + "loss": 0.4478, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3457092344760895, + "rewards/margins": 1.3989548683166504, + "rewards/rejected": -1.0532455444335938, + "step": 167 + }, + { + "epoch": 0.11, + "learning_rate": 9.84720141458633e-08, + "logits/chosen": -3.1603894233703613, + "logits/rejected": -3.1159114837646484, + "logps/chosen": -265.0855712890625, + "logps/rejected": -718.4338989257812, + "loss": 0.4744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3006691038608551, + "rewards/margins": 1.7426674365997314, + "rewards/rejected": -1.4419983625411987, + "step": 168 + }, + { + "epoch": 0.11, + "learning_rate": 9.844657484491456e-08, + "logits/chosen": -3.2585947513580322, + "logits/rejected": -3.074326515197754, + "logps/chosen": -285.71490478515625, + "logps/rejected": -1176.303466796875, + "loss": 0.3993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39970701932907104, + "rewards/margins": 2.352801561355591, + "rewards/rejected": -1.953094482421875, + "step": 169 + }, + { + "epoch": 0.11, + "learning_rate": 9.842092886121127e-08, + "logits/chosen": -3.2176122665405273, + "logits/rejected": -3.1284232139587402, + "logps/chosen": -239.3792724609375, + "logps/rejected": -531.6927490234375, + "loss": 0.4413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2748253047466278, + "rewards/margins": 1.4061851501464844, + "rewards/rejected": -1.1313598155975342, + "step": 170 + }, + { + "epoch": 0.11, + "learning_rate": 9.839507630416434e-08, + "logits/chosen": -3.212937355041504, + "logits/rejected": -3.116560935974121, + "logps/chosen": -285.08062744140625, + "logps/rejected": -675.06689453125, + "loss": 0.5575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45429229736328125, + "rewards/margins": 1.7208282947540283, + "rewards/rejected": -1.266535997390747, + "step": 171 + }, + { + "epoch": 0.11, + "learning_rate": 9.836901728406595e-08, + "logits/chosen": -3.1869804859161377, + "logits/rejected": -3.0949296951293945, + "logps/chosen": -264.5949401855469, + "logps/rejected": -552.2174072265625, + "loss": 0.4352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3764961361885071, + "rewards/margins": 1.5808753967285156, + "rewards/rejected": -1.2043793201446533, + "step": 172 + }, + { + "epoch": 0.11, + "learning_rate": 9.834275191208903e-08, + "logits/chosen": -3.145289421081543, + "logits/rejected": -3.077359676361084, + "logps/chosen": -270.9505615234375, + "logps/rejected": -777.1492919921875, + "loss": 0.535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36887359619140625, + "rewards/margins": 1.8694320917129517, + "rewards/rejected": -1.5005584955215454, + "step": 173 + }, + { + "epoch": 0.11, + "learning_rate": 9.831628030028696e-08, + "logits/chosen": -3.214005470275879, + "logits/rejected": -3.1857333183288574, + "logps/chosen": -280.99114990234375, + "logps/rejected": -786.6121215820312, + "loss": 0.4378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3667663633823395, + "rewards/margins": 1.729060411453247, + "rewards/rejected": -1.3622939586639404, + "step": 174 + }, + { + "epoch": 0.11, + "learning_rate": 9.828960256159286e-08, + "logits/chosen": -3.202789306640625, + "logits/rejected": -3.0062923431396484, + "logps/chosen": -226.39456176757812, + "logps/rejected": -686.78125, + "loss": 0.4091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4157570004463196, + "rewards/margins": 1.6508949995040894, + "rewards/rejected": -1.235137939453125, + "step": 175 + }, + { + "epoch": 0.11, + "learning_rate": 9.826271880981933e-08, + "logits/chosen": -3.173877477645874, + "logits/rejected": -3.0462417602539062, + "logps/chosen": -300.018798828125, + "logps/rejected": -683.7943115234375, + "loss": 0.4818, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29481202363967896, + "rewards/margins": 1.368371605873108, + "rewards/rejected": -1.0735596418380737, + "step": 176 + }, + { + "epoch": 0.11, + "learning_rate": 9.82356291596578e-08, + "logits/chosen": -3.1058080196380615, + "logits/rejected": -3.077817678451538, + "logps/chosen": -262.4292297363281, + "logps/rejected": -501.6461486816406, + "loss": 0.533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3797195553779602, + "rewards/margins": 1.2289520502090454, + "rewards/rejected": -0.8492324948310852, + "step": 177 + }, + { + "epoch": 0.11, + "learning_rate": 9.820833372667812e-08, + "logits/chosen": -3.2028822898864746, + "logits/rejected": -3.1184535026550293, + "logps/chosen": -294.2402648925781, + "logps/rejected": -825.0535888671875, + "loss": 0.4431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41466063261032104, + "rewards/margins": 1.9292373657226562, + "rewards/rejected": -1.51457679271698, + "step": 178 + }, + { + "epoch": 0.11, + "learning_rate": 9.818083262732806e-08, + "logits/chosen": -3.179810047149658, + "logits/rejected": -3.0094943046569824, + "logps/chosen": -269.648193359375, + "logps/rejected": -776.5223388671875, + "loss": 0.4694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44985198974609375, + "rewards/margins": 1.8285019397735596, + "rewards/rejected": -1.3786499500274658, + "step": 179 + }, + { + "epoch": 0.11, + "learning_rate": 9.815312597893278e-08, + "logits/chosen": -3.1703872680664062, + "logits/rejected": -3.0607404708862305, + "logps/chosen": -260.6007080078125, + "logps/rejected": -652.0479736328125, + "loss": 0.4577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.382131963968277, + "rewards/margins": 1.4080932140350342, + "rewards/rejected": -1.0259612798690796, + "step": 180 + }, + { + "epoch": 0.12, + "learning_rate": 9.812521389969441e-08, + "logits/chosen": -3.25203013420105, + "logits/rejected": -3.07195782661438, + "logps/chosen": -256.2454528808594, + "logps/rejected": -439.61663818359375, + "loss": 0.4741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3593147397041321, + "rewards/margins": 1.2434272766113281, + "rewards/rejected": -0.8841125965118408, + "step": 181 + }, + { + "epoch": 0.12, + "learning_rate": 9.80970965086914e-08, + "logits/chosen": -3.2651853561401367, + "logits/rejected": -3.0574874877929688, + "logps/chosen": -220.991943359375, + "logps/rejected": -679.2772216796875, + "loss": 0.433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31514817476272583, + "rewards/margins": 1.5561881065368652, + "rewards/rejected": -1.2410399913787842, + "step": 182 + }, + { + "epoch": 0.12, + "learning_rate": 9.806877392587819e-08, + "logits/chosen": -3.1961216926574707, + "logits/rejected": -2.9737484455108643, + "logps/chosen": -243.1352996826172, + "logps/rejected": -1209.74267578125, + "loss": 0.4271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5106689929962158, + "rewards/margins": 2.5245513916015625, + "rewards/rejected": -2.013882637023926, + "step": 183 + }, + { + "epoch": 0.12, + "learning_rate": 9.804024627208455e-08, + "logits/chosen": -3.2373175621032715, + "logits/rejected": -2.951298236846924, + "logps/chosen": -275.9642028808594, + "logps/rejected": -873.6666870117188, + "loss": 0.4995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43877333402633667, + "rewards/margins": 1.9066352844238281, + "rewards/rejected": -1.4678620100021362, + "step": 184 + }, + { + "epoch": 0.12, + "learning_rate": 9.801151366901514e-08, + "logits/chosen": -3.2083497047424316, + "logits/rejected": -3.065502166748047, + "logps/chosen": -247.77853393554688, + "logps/rejected": -1133.614013671875, + "loss": 0.4555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3783554136753082, + "rewards/margins": 2.2890701293945312, + "rewards/rejected": -1.9107147455215454, + "step": 185 + }, + { + "epoch": 0.12, + "learning_rate": 9.798257623924899e-08, + "logits/chosen": -3.2199997901916504, + "logits/rejected": -3.0727968215942383, + "logps/chosen": -265.8031005859375, + "logps/rejected": -423.225830078125, + "loss": 0.5114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2778869569301605, + "rewards/margins": 1.1956512928009033, + "rewards/rejected": -0.9177643060684204, + "step": 186 + }, + { + "epoch": 0.12, + "learning_rate": 9.795343410623893e-08, + "logits/chosen": -3.1748673915863037, + "logits/rejected": -2.93703293800354, + "logps/chosen": -247.3998260498047, + "logps/rejected": -1131.5888671875, + "loss": 0.4318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4051162600517273, + "rewards/margins": 2.4317564964294434, + "rewards/rejected": -2.0266404151916504, + "step": 187 + }, + { + "epoch": 0.12, + "learning_rate": 9.792408739431116e-08, + "logits/chosen": -3.2400496006011963, + "logits/rejected": -3.096306324005127, + "logps/chosen": -268.27880859375, + "logps/rejected": -774.1116943359375, + "loss": 0.4612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40716856718063354, + "rewards/margins": 2.4314026832580566, + "rewards/rejected": -2.0242340564727783, + "step": 188 + }, + { + "epoch": 0.12, + "learning_rate": 9.789453622866454e-08, + "logits/chosen": -3.252898693084717, + "logits/rejected": -3.1438584327697754, + "logps/chosen": -248.38470458984375, + "logps/rejected": -519.9860229492188, + "loss": 0.3874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3661643862724304, + "rewards/margins": 1.6122353076934814, + "rewards/rejected": -1.2460708618164062, + "step": 189 + }, + { + "epoch": 0.12, + "learning_rate": 9.786478073537028e-08, + "logits/chosen": -3.1886067390441895, + "logits/rejected": -3.122753620147705, + "logps/chosen": -249.4935302734375, + "logps/rejected": -405.95941162109375, + "loss": 0.474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41691896319389343, + "rewards/margins": 1.107722520828247, + "rewards/rejected": -0.6908035278320312, + "step": 190 + }, + { + "epoch": 0.12, + "learning_rate": 9.783482104137126e-08, + "logits/chosen": -3.196065664291382, + "logits/rejected": -3.0729360580444336, + "logps/chosen": -262.8279724121094, + "logps/rejected": -281.3992614746094, + "loss": 0.4851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3927810788154602, + "rewards/margins": 0.9229484796524048, + "rewards/rejected": -0.5301674008369446, + "step": 191 + }, + { + "epoch": 0.12, + "learning_rate": 9.780465727448148e-08, + "logits/chosen": -3.241485595703125, + "logits/rejected": -3.0353879928588867, + "logps/chosen": -264.04876708984375, + "logps/rejected": -308.63104248046875, + "loss": 0.4906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5993942022323608, + "rewards/margins": 1.1962769031524658, + "rewards/rejected": -0.5968826413154602, + "step": 192 + }, + { + "epoch": 0.12, + "learning_rate": 9.777428956338562e-08, + "logits/chosen": -3.246211290359497, + "logits/rejected": -3.1404924392700195, + "logps/chosen": -247.19143676757812, + "logps/rejected": -514.9323120117188, + "loss": 0.4725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3604164123535156, + "rewards/margins": 1.4144387245178223, + "rewards/rejected": -1.0540223121643066, + "step": 193 + }, + { + "epoch": 0.12, + "learning_rate": 9.774371803763837e-08, + "logits/chosen": -3.129896640777588, + "logits/rejected": -3.144789457321167, + "logps/chosen": -273.709228515625, + "logps/rejected": -724.3482666015625, + "loss": 0.4242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44054490327835083, + "rewards/margins": 2.148165225982666, + "rewards/rejected": -1.7076202630996704, + "step": 194 + }, + { + "epoch": 0.12, + "learning_rate": 9.771294282766397e-08, + "logits/chosen": -3.1738061904907227, + "logits/rejected": -3.0230112075805664, + "logps/chosen": -289.401123046875, + "logps/rejected": -497.9466552734375, + "loss": 0.4625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31330567598342896, + "rewards/margins": 1.485670566558838, + "rewards/rejected": -1.1723648309707642, + "step": 195 + }, + { + "epoch": 0.12, + "learning_rate": 9.768196406475562e-08, + "logits/chosen": -3.2127232551574707, + "logits/rejected": -3.136777639389038, + "logps/chosen": -251.483642578125, + "logps/rejected": -328.1351623535156, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41903382539749146, + "rewards/margins": 1.1387298107147217, + "rewards/rejected": -0.719696044921875, + "step": 196 + }, + { + "epoch": 0.13, + "learning_rate": 9.765078188107488e-08, + "logits/chosen": -3.166210889816284, + "logits/rejected": -2.9899649620056152, + "logps/chosen": -257.50457763671875, + "logps/rejected": -688.2384033203125, + "loss": 0.4139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49037933349609375, + "rewards/margins": 1.822096347808838, + "rewards/rejected": -1.3317170143127441, + "step": 197 + }, + { + "epoch": 0.13, + "learning_rate": 9.761939640965117e-08, + "logits/chosen": -3.284024715423584, + "logits/rejected": -3.04646372795105, + "logps/chosen": -266.76861572265625, + "logps/rejected": -741.90966796875, + "loss": 0.4862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4717857241630554, + "rewards/margins": 1.8350913524627686, + "rewards/rejected": -1.363305687904358, + "step": 198 + }, + { + "epoch": 0.13, + "learning_rate": 9.758780778438118e-08, + "logits/chosen": -3.2544660568237305, + "logits/rejected": -3.1328892707824707, + "logps/chosen": -265.8944396972656, + "logps/rejected": -904.522705078125, + "loss": 0.4666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3909347653388977, + "rewards/margins": 2.1767594814300537, + "rewards/rejected": -1.7858245372772217, + "step": 199 + }, + { + "epoch": 0.13, + "learning_rate": 9.755601614002828e-08, + "logits/chosen": -3.221135139465332, + "logits/rejected": -3.1173574924468994, + "logps/chosen": -315.94580078125, + "logps/rejected": -916.607421875, + "loss": 0.4202, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4196929931640625, + "rewards/margins": 2.1256959438323975, + "rewards/rejected": -1.706002950668335, + "step": 200 + }, + { + "epoch": 0.13, + "learning_rate": 9.7524021612222e-08, + "logits/chosen": -3.271820068359375, + "logits/rejected": -3.177639961242676, + "logps/chosen": -255.90811157226562, + "logps/rejected": -350.54681396484375, + "loss": 0.4285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39301377534866333, + "rewards/margins": 1.2758598327636719, + "rewards/rejected": -0.8828461170196533, + "step": 201 + }, + { + "epoch": 0.13, + "learning_rate": 9.749182433745732e-08, + "logits/chosen": -3.2065720558166504, + "logits/rejected": -3.025470733642578, + "logps/chosen": -246.39840698242188, + "logps/rejected": -336.12841796875, + "loss": 0.516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41872328519821167, + "rewards/margins": 1.1585183143615723, + "rewards/rejected": -0.739794909954071, + "step": 202 + }, + { + "epoch": 0.13, + "learning_rate": 9.74594244530943e-08, + "logits/chosen": -3.1909518241882324, + "logits/rejected": -3.0767509937286377, + "logps/chosen": -293.59918212890625, + "logps/rejected": -617.4638671875, + "loss": 0.4621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3867248594760895, + "rewards/margins": 1.4903779029846191, + "rewards/rejected": -1.1036529541015625, + "step": 203 + }, + { + "epoch": 0.13, + "learning_rate": 9.742682209735726e-08, + "logits/chosen": -3.2326955795288086, + "logits/rejected": -3.1008832454681396, + "logps/chosen": -254.92147827148438, + "logps/rejected": -414.05035400390625, + "loss": 0.4348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4963386654853821, + "rewards/margins": 1.4198906421661377, + "rewards/rejected": -0.9235519170761108, + "step": 204 + }, + { + "epoch": 0.13, + "learning_rate": 9.73940174093344e-08, + "logits/chosen": -3.133604049682617, + "logits/rejected": -3.132412910461426, + "logps/chosen": -267.07757568359375, + "logps/rejected": -525.7246704101562, + "loss": 0.4687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46387481689453125, + "rewards/margins": 1.6950585842132568, + "rewards/rejected": -1.231183648109436, + "step": 205 + }, + { + "epoch": 0.13, + "learning_rate": 9.736101052897704e-08, + "logits/chosen": -3.1564011573791504, + "logits/rejected": -3.0972847938537598, + "logps/chosen": -306.27838134765625, + "logps/rejected": -640.2969970703125, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49319231510162354, + "rewards/margins": 2.2716622352600098, + "rewards/rejected": -1.7784699201583862, + "step": 206 + }, + { + "epoch": 0.13, + "learning_rate": 9.732780159709911e-08, + "logits/chosen": -3.192379951477051, + "logits/rejected": -3.06831693649292, + "logps/chosen": -293.77520751953125, + "logps/rejected": -972.4511108398438, + "loss": 0.3789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5249221920967102, + "rewards/margins": 2.4981415271759033, + "rewards/rejected": -1.9732192754745483, + "step": 207 + }, + { + "epoch": 0.13, + "learning_rate": 9.729439075537655e-08, + "logits/chosen": -3.218717098236084, + "logits/rejected": -3.078890323638916, + "logps/chosen": -258.787841796875, + "logps/rejected": -601.9652099609375, + "loss": 0.5084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43393707275390625, + "rewards/margins": 1.8466049432754517, + "rewards/rejected": -1.4126678705215454, + "step": 208 + }, + { + "epoch": 0.13, + "learning_rate": 9.726077814634669e-08, + "logits/chosen": -3.133422374725342, + "logits/rejected": -3.0469624996185303, + "logps/chosen": -291.1778259277344, + "logps/rejected": -752.341552734375, + "loss": 0.4033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5246384143829346, + "rewards/margins": 2.2608766555786133, + "rewards/rejected": -1.7362381219863892, + "step": 209 + }, + { + "epoch": 0.13, + "learning_rate": 9.72269639134076e-08, + "logits/chosen": -3.18758487701416, + "logits/rejected": -3.1687169075012207, + "logps/chosen": -271.439697265625, + "logps/rejected": -486.5670166015625, + "loss": 0.4047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42944490909576416, + "rewards/margins": 1.6953812837600708, + "rewards/rejected": -1.2659363746643066, + "step": 210 + }, + { + "epoch": 0.13, + "learning_rate": 9.719294820081756e-08, + "logits/chosen": -3.262385129928589, + "logits/rejected": -3.0745930671691895, + "logps/chosen": -280.76739501953125, + "logps/rejected": -551.30126953125, + "loss": 0.4253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4444320797920227, + "rewards/margins": 1.65050208568573, + "rewards/rejected": -1.2060699462890625, + "step": 211 + }, + { + "epoch": 0.14, + "learning_rate": 9.715873115369439e-08, + "logits/chosen": -3.270631790161133, + "logits/rejected": -3.1090567111968994, + "logps/chosen": -237.87310791015625, + "logps/rejected": -704.6351318359375, + "loss": 0.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.491220086812973, + "rewards/margins": 2.0224335193634033, + "rewards/rejected": -1.531213402748108, + "step": 212 + }, + { + "epoch": 0.14, + "learning_rate": 9.712431291801482e-08, + "logits/chosen": -3.2985551357269287, + "logits/rejected": -3.0758273601531982, + "logps/chosen": -228.71783447265625, + "logps/rejected": -3330.8837890625, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5321594476699829, + "rewards/margins": 4.778695583343506, + "rewards/rejected": -4.2465362548828125, + "step": 213 + }, + { + "epoch": 0.14, + "learning_rate": 9.708969364061395e-08, + "logits/chosen": -3.2364768981933594, + "logits/rejected": -3.025794506072998, + "logps/chosen": -282.695068359375, + "logps/rejected": -453.456787109375, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43270188570022583, + "rewards/margins": 1.2518470287322998, + "rewards/rejected": -0.8191452026367188, + "step": 214 + }, + { + "epoch": 0.14, + "learning_rate": 9.705487346918447e-08, + "logits/chosen": -3.166802167892456, + "logits/rejected": -3.12206768989563, + "logps/chosen": -257.9957275390625, + "logps/rejected": -942.105224609375, + "loss": 0.3934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41736066341400146, + "rewards/margins": 3.3178000450134277, + "rewards/rejected": -2.900439500808716, + "step": 215 + }, + { + "epoch": 0.14, + "learning_rate": 9.701985255227623e-08, + "logits/chosen": -3.1963486671447754, + "logits/rejected": -3.043484687805176, + "logps/chosen": -293.33148193359375, + "logps/rejected": -679.2208862304688, + "loss": 0.4503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45505374670028687, + "rewards/margins": 2.159018039703369, + "rewards/rejected": -1.7039642333984375, + "step": 216 + }, + { + "epoch": 0.14, + "learning_rate": 9.698463103929542e-08, + "logits/chosen": -3.2649343013763428, + "logits/rejected": -3.204536199569702, + "logps/chosen": -200.8621063232422, + "logps/rejected": -598.6004638671875, + "loss": 0.3963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45070725679397583, + "rewards/margins": 2.421401262283325, + "rewards/rejected": -1.9706940650939941, + "step": 217 + }, + { + "epoch": 0.14, + "learning_rate": 9.694920908050404e-08, + "logits/chosen": -3.1881484985351562, + "logits/rejected": -3.096668004989624, + "logps/chosen": -243.21311950683594, + "logps/rejected": -750.1195068359375, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44480133056640625, + "rewards/margins": 2.242060899734497, + "rewards/rejected": -1.7972595691680908, + "step": 218 + }, + { + "epoch": 0.14, + "learning_rate": 9.691358682701925e-08, + "logits/chosen": -3.275056838989258, + "logits/rejected": -3.1592557430267334, + "logps/chosen": -263.96807861328125, + "logps/rejected": -403.4139404296875, + "loss": 0.4213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2985946536064148, + "rewards/margins": 1.4428298473358154, + "rewards/rejected": -1.1442352533340454, + "step": 219 + }, + { + "epoch": 0.14, + "learning_rate": 9.687776443081269e-08, + "logits/chosen": -3.1881651878356934, + "logits/rejected": -3.0916380882263184, + "logps/chosen": -281.7279968261719, + "logps/rejected": -495.760986328125, + "loss": 0.4359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45142215490341187, + "rewards/margins": 1.7073800563812256, + "rewards/rejected": -1.255957841873169, + "step": 220 + }, + { + "epoch": 0.14, + "learning_rate": 9.684174204470985e-08, + "logits/chosen": -3.2095179557800293, + "logits/rejected": -3.1018614768981934, + "logps/chosen": -252.158935546875, + "logps/rejected": -656.9581909179688, + "loss": 0.419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4355125427246094, + "rewards/margins": 1.8656470775604248, + "rewards/rejected": -1.4301345348358154, + "step": 221 + }, + { + "epoch": 0.14, + "learning_rate": 9.68055198223894e-08, + "logits/chosen": -3.2593889236450195, + "logits/rejected": -3.0882411003112793, + "logps/chosen": -245.21292114257812, + "logps/rejected": -1704.0511474609375, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5464355945587158, + "rewards/margins": 4.019909858703613, + "rewards/rejected": -3.4734740257263184, + "step": 222 + }, + { + "epoch": 0.14, + "learning_rate": 9.676909791838257e-08, + "logits/chosen": -3.2471094131469727, + "logits/rejected": -3.0726799964904785, + "logps/chosen": -257.1131591796875, + "logps/rejected": -1529.0625, + "loss": 0.3869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.526470959186554, + "rewards/margins": 4.031884670257568, + "rewards/rejected": -3.505413770675659, + "step": 223 + }, + { + "epoch": 0.14, + "learning_rate": 9.67324764880725e-08, + "logits/chosen": -3.15006160736084, + "logits/rejected": -3.0888822078704834, + "logps/chosen": -257.68658447265625, + "logps/rejected": -713.4232177734375, + "loss": 0.4369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5879837274551392, + "rewards/margins": 2.2770094871520996, + "rewards/rejected": -1.68902587890625, + "step": 224 + }, + { + "epoch": 0.14, + "learning_rate": 9.669565568769346e-08, + "logits/chosen": -3.2645835876464844, + "logits/rejected": -2.968609571456909, + "logps/chosen": -239.8937225341797, + "logps/rejected": -1198.5118408203125, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.556164562702179, + "rewards/margins": 3.0870237350463867, + "rewards/rejected": -2.5308594703674316, + "step": 225 + }, + { + "epoch": 0.14, + "learning_rate": 9.66586356743304e-08, + "logits/chosen": -3.2644448280334473, + "logits/rejected": -3.0843987464904785, + "logps/chosen": -262.96282958984375, + "logps/rejected": -365.6520080566406, + "loss": 0.4022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5859146118164062, + "rewards/margins": 1.3795089721679688, + "rewards/rejected": -0.7935943603515625, + "step": 226 + }, + { + "epoch": 0.14, + "learning_rate": 9.662141660591803e-08, + "logits/chosen": -3.1726155281066895, + "logits/rejected": -3.089782953262329, + "logps/chosen": -246.7803497314453, + "logps/rejected": -285.22784423828125, + "loss": 0.449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4423942565917969, + "rewards/margins": 1.0025246143341064, + "rewards/rejected": -0.5601303577423096, + "step": 227 + }, + { + "epoch": 0.15, + "learning_rate": 9.658399864124036e-08, + "logits/chosen": -3.193235158920288, + "logits/rejected": -3.099640369415283, + "logps/chosen": -264.6256103515625, + "logps/rejected": -722.3223876953125, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4107864499092102, + "rewards/margins": 1.9753022193908691, + "rewards/rejected": -1.5645158290863037, + "step": 228 + }, + { + "epoch": 0.15, + "learning_rate": 9.654638193992987e-08, + "logits/chosen": -3.2168400287628174, + "logits/rejected": -3.001307249069214, + "logps/chosen": -261.9175720214844, + "logps/rejected": -1334.41455078125, + "loss": 0.4894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6746246814727783, + "rewards/margins": 3.1863555908203125, + "rewards/rejected": -2.5117311477661133, + "step": 229 + }, + { + "epoch": 0.15, + "learning_rate": 9.650856666246691e-08, + "logits/chosen": -3.1619982719421387, + "logits/rejected": -3.1122753620147705, + "logps/chosen": -271.37506103515625, + "logps/rejected": -626.4171142578125, + "loss": 0.4304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5046394467353821, + "rewards/margins": 2.0480096340179443, + "rewards/rejected": -1.543370008468628, + "step": 230 + }, + { + "epoch": 0.15, + "learning_rate": 9.647055297017901e-08, + "logits/chosen": -3.206993579864502, + "logits/rejected": -3.055901288986206, + "logps/chosen": -260.90667724609375, + "logps/rejected": -375.65594482421875, + "loss": 0.4307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.582476794719696, + "rewards/margins": 1.255070447921753, + "rewards/rejected": -0.6725937128067017, + "step": 231 + }, + { + "epoch": 0.15, + "learning_rate": 9.643234102524016e-08, + "logits/chosen": -3.2416980266571045, + "logits/rejected": -3.1308064460754395, + "logps/chosen": -265.47283935546875, + "logps/rejected": -437.7491149902344, + "loss": 0.435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5674713253974915, + "rewards/margins": 1.551116943359375, + "rewards/rejected": -0.9836456775665283, + "step": 232 + }, + { + "epoch": 0.15, + "learning_rate": 9.639393099067015e-08, + "logits/chosen": -3.15767240524292, + "logits/rejected": -3.0385208129882812, + "logps/chosen": -280.2491149902344, + "logps/rejected": -325.55230712890625, + "loss": 0.4387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5505332946777344, + "rewards/margins": 1.3425507545471191, + "rewards/rejected": -0.7920173406600952, + "step": 233 + }, + { + "epoch": 0.15, + "learning_rate": 9.635532303033385e-08, + "logits/chosen": -3.193065643310547, + "logits/rejected": -3.0739612579345703, + "logps/chosen": -265.87774658203125, + "logps/rejected": -1522.440185546875, + "loss": 0.3732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44427490234375, + "rewards/margins": 4.232172012329102, + "rewards/rejected": -3.7878968715667725, + "step": 234 + }, + { + "epoch": 0.15, + "learning_rate": 9.631651730894052e-08, + "logits/chosen": -3.150792121887207, + "logits/rejected": -3.1588168144226074, + "logps/chosen": -292.52362060546875, + "logps/rejected": -666.430908203125, + "loss": 0.3848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5534766912460327, + "rewards/margins": 2.5587868690490723, + "rewards/rejected": -2.00531005859375, + "step": 235 + }, + { + "epoch": 0.15, + "learning_rate": 9.627751399204309e-08, + "logits/chosen": -3.193284034729004, + "logits/rejected": -3.1126129627227783, + "logps/chosen": -282.154296875, + "logps/rejected": -568.9473266601562, + "loss": 0.4286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40299147367477417, + "rewards/margins": 1.5875892639160156, + "rewards/rejected": -1.1845977306365967, + "step": 236 + }, + { + "epoch": 0.15, + "learning_rate": 9.623831324603753e-08, + "logits/chosen": -3.1758947372436523, + "logits/rejected": -3.1732420921325684, + "logps/chosen": -283.01068115234375, + "logps/rejected": -959.077392578125, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.490042120218277, + "rewards/margins": 3.065316915512085, + "rewards/rejected": -2.575274705886841, + "step": 237 + }, + { + "epoch": 0.15, + "learning_rate": 9.619891523816201e-08, + "logits/chosen": -3.1575427055358887, + "logits/rejected": -3.0594382286071777, + "logps/chosen": -274.2044677734375, + "logps/rejected": -691.3646850585938, + "loss": 0.3925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7000420093536377, + "rewards/margins": 2.454235076904297, + "rewards/rejected": -1.7541930675506592, + "step": 238 + }, + { + "epoch": 0.15, + "learning_rate": 9.61593201364963e-08, + "logits/chosen": -3.2534961700439453, + "logits/rejected": -3.196584463119507, + "logps/chosen": -229.92266845703125, + "logps/rejected": -540.7300415039062, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4847404360771179, + "rewards/margins": 2.046828508377075, + "rewards/rejected": -1.5620880126953125, + "step": 239 + }, + { + "epoch": 0.15, + "learning_rate": 9.611952810996102e-08, + "logits/chosen": -3.222932815551758, + "logits/rejected": -3.1021323204040527, + "logps/chosen": -289.97119140625, + "logps/rejected": -457.0301513671875, + "loss": 0.4333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49990540742874146, + "rewards/margins": 1.6826202869415283, + "rewards/rejected": -1.1827149391174316, + "step": 240 + }, + { + "epoch": 0.15, + "learning_rate": 9.60795393283169e-08, + "logits/chosen": -3.211271286010742, + "logits/rejected": -3.0394961833953857, + "logps/chosen": -233.9693145751953, + "logps/rejected": -758.0884399414062, + "loss": 0.3964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49172136187553406, + "rewards/margins": 2.4664466381073, + "rewards/rejected": -1.974725365638733, + "step": 241 + }, + { + "epoch": 0.15, + "learning_rate": 9.603935396216402e-08, + "logits/chosen": -3.256345748901367, + "logits/rejected": -3.10438871383667, + "logps/chosen": -252.27154541015625, + "logps/rejected": -548.31591796875, + "loss": 0.3901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.635973334312439, + "rewards/margins": 1.9899375438690186, + "rewards/rejected": -1.3539642095565796, + "step": 242 + }, + { + "epoch": 0.15, + "learning_rate": 9.599897218294121e-08, + "logits/chosen": -3.2526674270629883, + "logits/rejected": -3.1376819610595703, + "logps/chosen": -273.9256591796875, + "logps/rejected": -346.2729797363281, + "loss": 0.4224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6666504144668579, + "rewards/margins": 1.4076019525527954, + "rewards/rejected": -0.7409515380859375, + "step": 243 + }, + { + "epoch": 0.16, + "learning_rate": 9.595839416292518e-08, + "logits/chosen": -3.2692863941192627, + "logits/rejected": -3.1005711555480957, + "logps/chosen": -240.8002471923828, + "logps/rejected": -568.6099853515625, + "loss": 0.3859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4941558837890625, + "rewards/margins": 2.0278778076171875, + "rewards/rejected": -1.533721923828125, + "step": 244 + }, + { + "epoch": 0.16, + "learning_rate": 9.591762007522985e-08, + "logits/chosen": -3.26568865776062, + "logits/rejected": -3.106571674346924, + "logps/chosen": -260.36767578125, + "logps/rejected": -1559.994140625, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5774734616279602, + "rewards/margins": 4.407057285308838, + "rewards/rejected": -3.8295836448669434, + "step": 245 + }, + { + "epoch": 0.16, + "learning_rate": 9.587665009380565e-08, + "logits/chosen": -3.2160725593566895, + "logits/rejected": -3.1067991256713867, + "logps/chosen": -268.46533203125, + "logps/rejected": -707.9706420898438, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4499466121196747, + "rewards/margins": 2.4049911499023438, + "rewards/rejected": -1.9550445079803467, + "step": 246 + }, + { + "epoch": 0.16, + "learning_rate": 9.583548439343864e-08, + "logits/chosen": -3.217000722885132, + "logits/rejected": -3.0897645950317383, + "logps/chosen": -249.58895874023438, + "logps/rejected": -527.10400390625, + "loss": 0.3874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5576225519180298, + "rewards/margins": 2.2621207237243652, + "rewards/rejected": -1.704498291015625, + "step": 247 + }, + { + "epoch": 0.16, + "learning_rate": 9.579412314974997e-08, + "logits/chosen": -3.2393155097961426, + "logits/rejected": -3.068223476409912, + "logps/chosen": -253.37185668945312, + "logps/rejected": -1537.2578125, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49823760986328125, + "rewards/margins": 4.140510559082031, + "rewards/rejected": -3.64227294921875, + "step": 248 + }, + { + "epoch": 0.16, + "learning_rate": 9.575256653919493e-08, + "logits/chosen": -3.216698169708252, + "logits/rejected": -3.0185728073120117, + "logps/chosen": -282.18792724609375, + "logps/rejected": -371.0250244140625, + "loss": 0.4271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5630767941474915, + "rewards/margins": 1.4164535999298096, + "rewards/rejected": -0.8533767461776733, + "step": 249 + }, + { + "epoch": 0.16, + "learning_rate": 9.571081473906231e-08, + "logits/chosen": -3.2004551887512207, + "logits/rejected": -3.1433894634246826, + "logps/chosen": -215.74098205566406, + "logps/rejected": -823.2542724609375, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.509411633014679, + "rewards/margins": 2.6016664505004883, + "rewards/rejected": -2.092254638671875, + "step": 250 + }, + { + "epoch": 0.16, + "learning_rate": 9.56688679274736e-08, + "logits/chosen": -3.220324993133545, + "logits/rejected": -3.20790696144104, + "logps/chosen": -290.2294921875, + "logps/rejected": -594.2303466796875, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5257309079170227, + "rewards/margins": 2.3590025901794434, + "rewards/rejected": -1.833271861076355, + "step": 251 + }, + { + "epoch": 0.16, + "learning_rate": 9.562672628338232e-08, + "logits/chosen": -3.1855618953704834, + "logits/rejected": -3.0595526695251465, + "logps/chosen": -278.8095703125, + "logps/rejected": -305.133544921875, + "loss": 0.4029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5368858575820923, + "rewards/margins": 1.2369027137756348, + "rewards/rejected": -0.7000167965888977, + "step": 252 + }, + { + "epoch": 0.16, + "learning_rate": 9.558438998657309e-08, + "logits/chosen": -3.267549991607666, + "logits/rejected": -3.1599109172821045, + "logps/chosen": -264.8714599609375, + "logps/rejected": -630.5198364257812, + "loss": 0.3748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4600639343261719, + "rewards/margins": 2.4820032119750977, + "rewards/rejected": -2.021939277648926, + "step": 253 + }, + { + "epoch": 0.16, + "learning_rate": 9.554185921766101e-08, + "logits/chosen": -3.195617198944092, + "logits/rejected": -3.119631052017212, + "logps/chosen": -257.519287109375, + "logps/rejected": -1130.33544921875, + "loss": 0.4033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5860855579376221, + "rewards/margins": 3.5624465942382812, + "rewards/rejected": -2.9763612747192383, + "step": 254 + }, + { + "epoch": 0.16, + "learning_rate": 9.549913415809083e-08, + "logits/chosen": -3.190474033355713, + "logits/rejected": -3.146010398864746, + "logps/chosen": -297.2939453125, + "logps/rejected": -788.1627197265625, + "loss": 0.398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5694732666015625, + "rewards/margins": 2.924386501312256, + "rewards/rejected": -2.3549132347106934, + "step": 255 + }, + { + "epoch": 0.16, + "learning_rate": 9.545621499013619e-08, + "logits/chosen": -3.187681198120117, + "logits/rejected": -3.114898681640625, + "logps/chosen": -251.04251098632812, + "logps/rejected": -565.4923706054688, + "loss": 0.3942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5335807800292969, + "rewards/margins": 1.7622673511505127, + "rewards/rejected": -1.2286865711212158, + "step": 256 + }, + { + "epoch": 0.16, + "learning_rate": 9.541310189689879e-08, + "logits/chosen": -3.269211530685425, + "logits/rejected": -3.082839012145996, + "logps/chosen": -281.3155517578125, + "logps/rejected": -667.6119384765625, + "loss": 0.4262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5967010259628296, + "rewards/margins": 2.8356552124023438, + "rewards/rejected": -2.2389540672302246, + "step": 257 + }, + { + "epoch": 0.16, + "learning_rate": 9.536979506230771e-08, + "logits/chosen": -3.2272238731384277, + "logits/rejected": -3.0757977962493896, + "logps/chosen": -261.25494384765625, + "logps/rejected": -585.5130004882812, + "loss": 0.4648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5830093622207642, + "rewards/margins": 2.675442695617676, + "rewards/rejected": -2.092433214187622, + "step": 258 + }, + { + "epoch": 0.17, + "learning_rate": 9.532629467111855e-08, + "logits/chosen": -3.2183687686920166, + "logits/rejected": -3.12725567817688, + "logps/chosen": -250.05758666992188, + "logps/rejected": -741.123779296875, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45867234468460083, + "rewards/margins": 2.3799049854278564, + "rewards/rejected": -1.9212325811386108, + "step": 259 + }, + { + "epoch": 0.17, + "learning_rate": 9.528260090891266e-08, + "logits/chosen": -3.1602654457092285, + "logits/rejected": -3.1086833477020264, + "logps/chosen": -236.68321228027344, + "logps/rejected": -696.167236328125, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7073944211006165, + "rewards/margins": 2.695788621902466, + "rewards/rejected": -1.9883942604064941, + "step": 260 + }, + { + "epoch": 0.17, + "learning_rate": 9.523871396209633e-08, + "logits/chosen": -3.198350429534912, + "logits/rejected": -3.0900464057922363, + "logps/chosen": -276.4552917480469, + "logps/rejected": -531.3567504882812, + "loss": 0.4233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6211395263671875, + "rewards/margins": 2.126669406890869, + "rewards/rejected": -1.5055298805236816, + "step": 261 + }, + { + "epoch": 0.17, + "learning_rate": 9.519463401790004e-08, + "logits/chosen": -3.2489867210388184, + "logits/rejected": -3.1349830627441406, + "logps/chosen": -289.88763427734375, + "logps/rejected": -557.906494140625, + "loss": 0.4068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.645581066608429, + "rewards/margins": 2.0380783081054688, + "rewards/rejected": -1.3924973011016846, + "step": 262 + }, + { + "epoch": 0.17, + "learning_rate": 9.515036126437766e-08, + "logits/chosen": -3.1664438247680664, + "logits/rejected": -3.06796932220459, + "logps/chosen": -279.8702697753906, + "logps/rejected": -329.9287414550781, + "loss": 0.4423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5384666323661804, + "rewards/margins": 1.265679121017456, + "rewards/rejected": -0.7272125482559204, + "step": 263 + }, + { + "epoch": 0.17, + "learning_rate": 9.510589589040552e-08, + "logits/chosen": -3.198925733566284, + "logits/rejected": -3.0827131271362305, + "logps/chosen": -266.7005615234375, + "logps/rejected": -781.3480224609375, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6005859375, + "rewards/margins": 3.112628221511841, + "rewards/rejected": -2.512042284011841, + "step": 264 + }, + { + "epoch": 0.17, + "learning_rate": 9.506123808568185e-08, + "logits/chosen": -3.2804346084594727, + "logits/rejected": -3.155150890350342, + "logps/chosen": -281.0211181640625, + "logps/rejected": -530.2161254882812, + "loss": 0.3984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47032469511032104, + "rewards/margins": 1.9142487049102783, + "rewards/rejected": -1.4439239501953125, + "step": 265 + }, + { + "epoch": 0.17, + "learning_rate": 9.501638804072569e-08, + "logits/chosen": -3.209240198135376, + "logits/rejected": -3.071634531021118, + "logps/chosen": -274.6244812011719, + "logps/rejected": -612.502685546875, + "loss": 0.369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48258668184280396, + "rewards/margins": 2.232623338699341, + "rewards/rejected": -1.7500367164611816, + "step": 266 + }, + { + "epoch": 0.17, + "learning_rate": 9.497134594687634e-08, + "logits/chosen": -3.1986801624298096, + "logits/rejected": -3.0717287063598633, + "logps/chosen": -249.89794921875, + "logps/rejected": -498.65283203125, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5214096307754517, + "rewards/margins": 2.2305588722229004, + "rewards/rejected": -1.7091491222381592, + "step": 267 + }, + { + "epoch": 0.17, + "learning_rate": 9.492611199629232e-08, + "logits/chosen": -3.1910810470581055, + "logits/rejected": -2.9672000408172607, + "logps/chosen": -274.7091064453125, + "logps/rejected": -185.25613403320312, + "loss": 0.4222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5808777213096619, + "rewards/margins": 0.8661091327667236, + "rewards/rejected": -0.28523141145706177, + "step": 268 + }, + { + "epoch": 0.17, + "learning_rate": 9.48806863819507e-08, + "logits/chosen": -3.2371883392333984, + "logits/rejected": -3.0708937644958496, + "logps/chosen": -267.8187255859375, + "logps/rejected": -393.03216552734375, + "loss": 0.3802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6397750973701477, + "rewards/margins": 1.847253441810608, + "rewards/rejected": -1.207478404045105, + "step": 269 + }, + { + "epoch": 0.17, + "learning_rate": 9.483506929764621e-08, + "logits/chosen": -3.1571316719055176, + "logits/rejected": -3.0946426391601562, + "logps/chosen": -274.5933532714844, + "logps/rejected": -584.8286743164062, + "loss": 0.3872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6154098510742188, + "rewards/margins": 2.5654635429382324, + "rewards/rejected": -1.9500534534454346, + "step": 270 + }, + { + "epoch": 0.17, + "learning_rate": 9.478926093799045e-08, + "logits/chosen": -3.1758460998535156, + "logits/rejected": -3.1152591705322266, + "logps/chosen": -255.1572265625, + "logps/rejected": -537.424072265625, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5643119812011719, + "rewards/margins": 2.0878119468688965, + "rewards/rejected": -1.5235000848770142, + "step": 271 + }, + { + "epoch": 0.17, + "learning_rate": 9.474326149841099e-08, + "logits/chosen": -3.240907907485962, + "logits/rejected": -3.1516382694244385, + "logps/chosen": -221.715087890625, + "logps/rejected": -714.59814453125, + "loss": 0.3761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5921417474746704, + "rewards/margins": 2.968179225921631, + "rewards/rejected": -2.37603759765625, + "step": 272 + }, + { + "epoch": 0.17, + "learning_rate": 9.469707117515066e-08, + "logits/chosen": -3.2883925437927246, + "logits/rejected": -3.0434789657592773, + "logps/chosen": -259.1812438964844, + "logps/rejected": -408.4078063964844, + "loss": 0.3828, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6998322010040283, + "rewards/margins": 1.7956397533416748, + "rewards/rejected": -1.095807671546936, + "step": 273 + }, + { + "epoch": 0.17, + "learning_rate": 9.465069016526656e-08, + "logits/chosen": -3.2564573287963867, + "logits/rejected": -3.1631898880004883, + "logps/chosen": -301.02813720703125, + "logps/rejected": -444.27880859375, + "loss": 0.398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44622802734375, + "rewards/margins": 1.7975738048553467, + "rewards/rejected": -1.3513457775115967, + "step": 274 + }, + { + "epoch": 0.18, + "learning_rate": 9.460411866662935e-08, + "logits/chosen": -3.2458157539367676, + "logits/rejected": -3.1599082946777344, + "logps/chosen": -231.18675231933594, + "logps/rejected": -526.0404663085938, + "loss": 0.4095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6391380429267883, + "rewards/margins": 2.1650900840759277, + "rewards/rejected": -1.5259521007537842, + "step": 275 + }, + { + "epoch": 0.18, + "learning_rate": 9.455735687792232e-08, + "logits/chosen": -3.2215898036956787, + "logits/rejected": -3.098635673522949, + "logps/chosen": -279.7830505371094, + "logps/rejected": -631.2972412109375, + "loss": 0.3916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6469742059707642, + "rewards/margins": 2.352139472961426, + "rewards/rejected": -1.705165147781372, + "step": 276 + }, + { + "epoch": 0.18, + "learning_rate": 9.451040499864061e-08, + "logits/chosen": -3.2989487648010254, + "logits/rejected": -3.109506130218506, + "logps/chosen": -229.4119873046875, + "logps/rejected": -1080.1044921875, + "loss": 0.3691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5892661809921265, + "rewards/margins": 4.048464298248291, + "rewards/rejected": -3.459197998046875, + "step": 277 + }, + { + "epoch": 0.18, + "learning_rate": 9.446326322909031e-08, + "logits/chosen": -3.2964224815368652, + "logits/rejected": -3.1129727363586426, + "logps/chosen": -251.3380584716797, + "logps/rejected": -574.1842041015625, + "loss": 0.3689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5290145874023438, + "rewards/margins": 2.3871068954467773, + "rewards/rejected": -1.8580925464630127, + "step": 278 + }, + { + "epoch": 0.18, + "learning_rate": 9.441593177038757e-08, + "logits/chosen": -3.2231364250183105, + "logits/rejected": -3.0766844749450684, + "logps/chosen": -286.9449768066406, + "logps/rejected": -635.482177734375, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6364296078681946, + "rewards/margins": 2.2976908683776855, + "rewards/rejected": -1.6612610816955566, + "step": 279 + }, + { + "epoch": 0.18, + "learning_rate": 9.436841082445787e-08, + "logits/chosen": -3.1914970874786377, + "logits/rejected": -3.0565474033355713, + "logps/chosen": -240.56907653808594, + "logps/rejected": -326.87860107421875, + "loss": 0.4219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6100448369979858, + "rewards/margins": 1.5215438604354858, + "rewards/rejected": -0.9114990234375, + "step": 280 + }, + { + "epoch": 0.18, + "learning_rate": 9.432070059403507e-08, + "logits/chosen": -3.175004005432129, + "logits/rejected": -3.1179933547973633, + "logps/chosen": -259.45819091796875, + "logps/rejected": -882.747802734375, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.583905041217804, + "rewards/margins": 3.3560454845428467, + "rewards/rejected": -2.7721405029296875, + "step": 281 + }, + { + "epoch": 0.18, + "learning_rate": 9.427280128266049e-08, + "logits/chosen": -3.198485851287842, + "logits/rejected": -3.1957497596740723, + "logps/chosen": -268.0509948730469, + "logps/rejected": -560.6234741210938, + "loss": 0.3942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5208885073661804, + "rewards/margins": 2.1064629554748535, + "rewards/rejected": -1.5855743885040283, + "step": 282 + }, + { + "epoch": 0.18, + "learning_rate": 9.422471309468217e-08, + "logits/chosen": -3.2704591751098633, + "logits/rejected": -3.1530818939208984, + "logps/chosen": -266.61236572265625, + "logps/rejected": -682.2001342773438, + "loss": 0.3983, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8802261352539062, + "rewards/margins": 3.248866319656372, + "rewards/rejected": -2.368640184402466, + "step": 283 + }, + { + "epoch": 0.18, + "learning_rate": 9.417643623525391e-08, + "logits/chosen": -3.2030646800994873, + "logits/rejected": -3.106010675430298, + "logps/chosen": -293.60687255859375, + "logps/rejected": -351.4222412109375, + "loss": 0.4204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5266998410224915, + "rewards/margins": 1.4808052778244019, + "rewards/rejected": -0.9541053771972656, + "step": 284 + }, + { + "epoch": 0.18, + "learning_rate": 9.412797091033442e-08, + "logits/chosen": -3.184723138809204, + "logits/rejected": -3.1458468437194824, + "logps/chosen": -318.40032958984375, + "logps/rejected": -627.6184692382812, + "loss": 0.3905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5214462280273438, + "rewards/margins": 2.7471542358398438, + "rewards/rejected": -2.2257080078125, + "step": 285 + }, + { + "epoch": 0.18, + "learning_rate": 9.407931732668645e-08, + "logits/chosen": -3.2371063232421875, + "logits/rejected": -3.197030544281006, + "logps/chosen": -260.89715576171875, + "logps/rejected": -492.7633056640625, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5500465631484985, + "rewards/margins": 2.1288156509399414, + "rewards/rejected": -1.5787689685821533, + "step": 286 + }, + { + "epoch": 0.18, + "learning_rate": 9.40304756918759e-08, + "logits/chosen": -3.241952657699585, + "logits/rejected": -3.1147468090057373, + "logps/chosen": -282.3194580078125, + "logps/rejected": -678.560546875, + "loss": 0.3497, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6660797595977783, + "rewards/margins": 2.820819139480591, + "rewards/rejected": -2.1547393798828125, + "step": 287 + }, + { + "epoch": 0.18, + "learning_rate": 9.398144621427094e-08, + "logits/chosen": -3.204514503479004, + "logits/rejected": -3.1582984924316406, + "logps/chosen": -237.42530822753906, + "logps/rejected": -500.66473388671875, + "loss": 0.3667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6987411975860596, + "rewards/margins": 1.9901657104492188, + "rewards/rejected": -1.2914246320724487, + "step": 288 + }, + { + "epoch": 0.18, + "learning_rate": 9.393222910304106e-08, + "logits/chosen": -3.2504477500915527, + "logits/rejected": -3.124743938446045, + "logps/chosen": -278.58746337890625, + "logps/rejected": -1275.979736328125, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6594460010528564, + "rewards/margins": 5.119699478149414, + "rewards/rejected": -4.460253715515137, + "step": 289 + }, + { + "epoch": 0.18, + "learning_rate": 9.388282456815633e-08, + "logits/chosen": -3.2270684242248535, + "logits/rejected": -3.1263208389282227, + "logps/chosen": -265.65069580078125, + "logps/rejected": -686.2115478515625, + "loss": 0.4164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5489867925643921, + "rewards/margins": 2.6968536376953125, + "rewards/rejected": -2.14786696434021, + "step": 290 + }, + { + "epoch": 0.19, + "learning_rate": 9.383323282038631e-08, + "logits/chosen": -3.205902099609375, + "logits/rejected": -3.220085620880127, + "logps/chosen": -297.7658996582031, + "logps/rejected": -484.8853454589844, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5756927728652954, + "rewards/margins": 2.1164259910583496, + "rewards/rejected": -1.5407333374023438, + "step": 291 + }, + { + "epoch": 0.19, + "learning_rate": 9.378345407129932e-08, + "logits/chosen": -3.2673654556274414, + "logits/rejected": -3.120363235473633, + "logps/chosen": -264.0706481933594, + "logps/rejected": -401.7740173339844, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7640495300292969, + "rewards/margins": 1.6169731616973877, + "rewards/rejected": -0.852923572063446, + "step": 292 + }, + { + "epoch": 0.19, + "learning_rate": 9.373348853326142e-08, + "logits/chosen": -3.2123570442199707, + "logits/rejected": -3.046835422515869, + "logps/chosen": -294.6626892089844, + "logps/rejected": -911.0121459960938, + "loss": 0.4121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7429794073104858, + "rewards/margins": 2.9364333152770996, + "rewards/rejected": -2.1934540271759033, + "step": 293 + }, + { + "epoch": 0.19, + "learning_rate": 9.368333641943558e-08, + "logits/chosen": -3.1875057220458984, + "logits/rejected": -3.0269060134887695, + "logps/chosen": -250.14596557617188, + "logps/rejected": -839.7770385742188, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6377105712890625, + "rewards/margins": 3.040121555328369, + "rewards/rejected": -2.4024109840393066, + "step": 294 + }, + { + "epoch": 0.19, + "learning_rate": 9.363299794378071e-08, + "logits/chosen": -3.2602334022521973, + "logits/rejected": -3.1347005367279053, + "logps/chosen": -280.2084045410156, + "logps/rejected": -851.0515747070312, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5563751459121704, + "rewards/margins": 3.24371337890625, + "rewards/rejected": -2.687338352203369, + "step": 295 + }, + { + "epoch": 0.19, + "learning_rate": 9.35824733210508e-08, + "logits/chosen": -3.1901121139526367, + "logits/rejected": -3.1265625953674316, + "logps/chosen": -285.53314208984375, + "logps/rejected": -517.2468872070312, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5310379266738892, + "rewards/margins": 2.2953062057495117, + "rewards/rejected": -1.764268398284912, + "step": 296 + }, + { + "epoch": 0.19, + "learning_rate": 9.353176276679396e-08, + "logits/chosen": -3.269381046295166, + "logits/rejected": -3.105165958404541, + "logps/chosen": -258.5242919921875, + "logps/rejected": -674.3035888671875, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5510917901992798, + "rewards/margins": 2.584279775619507, + "rewards/rejected": -2.0331878662109375, + "step": 297 + }, + { + "epoch": 0.19, + "learning_rate": 9.348086649735155e-08, + "logits/chosen": -3.251507520675659, + "logits/rejected": -3.1395316123962402, + "logps/chosen": -260.6748046875, + "logps/rejected": -672.6641845703125, + "loss": 0.3259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6915680170059204, + "rewards/margins": 3.3468141555786133, + "rewards/rejected": -2.6552460193634033, + "step": 298 + }, + { + "epoch": 0.19, + "learning_rate": 9.342978472985718e-08, + "logits/chosen": -3.1924009323120117, + "logits/rejected": -3.105377674102783, + "logps/chosen": -279.668701171875, + "logps/rejected": -1317.1126708984375, + "loss": 0.3746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8248580694198608, + "rewards/margins": 4.489424228668213, + "rewards/rejected": -3.6645660400390625, + "step": 299 + }, + { + "epoch": 0.19, + "learning_rate": 9.337851768223588e-08, + "logits/chosen": -3.244974136352539, + "logits/rejected": -3.183012008666992, + "logps/chosen": -252.97239685058594, + "logps/rejected": -523.2650756835938, + "loss": 0.4037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6451492309570312, + "rewards/margins": 2.1978867053985596, + "rewards/rejected": -1.5527374744415283, + "step": 300 + }, + { + "epoch": 0.19, + "learning_rate": 9.332706557320314e-08, + "logits/chosen": -3.1738312244415283, + "logits/rejected": -3.0079212188720703, + "logps/chosen": -260.18292236328125, + "logps/rejected": -668.2330322265625, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6476501226425171, + "rewards/margins": 2.029693603515625, + "rewards/rejected": -1.382043480873108, + "step": 301 + }, + { + "epoch": 0.19, + "learning_rate": 9.327542862226386e-08, + "logits/chosen": -3.2374603748321533, + "logits/rejected": -3.109537124633789, + "logps/chosen": -271.55157470703125, + "logps/rejected": -509.049072265625, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6401985287666321, + "rewards/margins": 2.186171770095825, + "rewards/rejected": -1.545973300933838, + "step": 302 + }, + { + "epoch": 0.19, + "learning_rate": 9.32236070497116e-08, + "logits/chosen": -3.228374719619751, + "logits/rejected": -3.064690589904785, + "logps/chosen": -261.3410949707031, + "logps/rejected": -565.9998168945312, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.813751220703125, + "rewards/margins": 2.343217372894287, + "rewards/rejected": -1.5294662714004517, + "step": 303 + }, + { + "epoch": 0.19, + "learning_rate": 9.317160107662754e-08, + "logits/chosen": -3.253561496734619, + "logits/rejected": -3.1593422889709473, + "logps/chosen": -270.66827392578125, + "logps/rejected": -1059.2900390625, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6072548031806946, + "rewards/margins": 3.7387704849243164, + "rewards/rejected": -3.1315157413482666, + "step": 304 + }, + { + "epoch": 0.19, + "learning_rate": 9.311941092487954e-08, + "logits/chosen": -3.274043560028076, + "logits/rejected": -3.1255297660827637, + "logps/chosen": -280.4540710449219, + "logps/rejected": -323.2666015625, + "loss": 0.4163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6000809073448181, + "rewards/margins": 1.5017273426055908, + "rewards/rejected": -0.9016464352607727, + "step": 305 + }, + { + "epoch": 0.2, + "learning_rate": 9.306703681712118e-08, + "logits/chosen": -3.2680604457855225, + "logits/rejected": -3.0082926750183105, + "logps/chosen": -281.7630920410156, + "logps/rejected": -311.032958984375, + "loss": 0.4056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.568554699420929, + "rewards/margins": 1.358635663986206, + "rewards/rejected": -0.7900810241699219, + "step": 306 + }, + { + "epoch": 0.2, + "learning_rate": 9.301447897679087e-08, + "logits/chosen": -3.223353385925293, + "logits/rejected": -3.087756395339966, + "logps/chosen": -259.7269287109375, + "logps/rejected": -607.4725341796875, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6940056085586548, + "rewards/margins": 2.2980401515960693, + "rewards/rejected": -1.604034423828125, + "step": 307 + }, + { + "epoch": 0.2, + "learning_rate": 9.296173762811083e-08, + "logits/chosen": -3.221836566925049, + "logits/rejected": -3.1583094596862793, + "logps/chosen": -254.4916534423828, + "logps/rejected": -574.5413818359375, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7052841186523438, + "rewards/margins": 2.960292100906372, + "rewards/rejected": -2.2550079822540283, + "step": 308 + }, + { + "epoch": 0.2, + "learning_rate": 9.290881299608619e-08, + "logits/chosen": -3.262634754180908, + "logits/rejected": -3.059983253479004, + "logps/chosen": -224.3443603515625, + "logps/rejected": -1184.535888671875, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7492645382881165, + "rewards/margins": 4.449746608734131, + "rewards/rejected": -3.7004823684692383, + "step": 309 + }, + { + "epoch": 0.2, + "learning_rate": 9.285570530650399e-08, + "logits/chosen": -3.2680697441101074, + "logits/rejected": -3.112339735031128, + "logps/chosen": -275.0823974609375, + "logps/rejected": -892.1865234375, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6605850458145142, + "rewards/margins": 3.1647047996520996, + "rewards/rejected": -2.504119873046875, + "step": 310 + }, + { + "epoch": 0.2, + "learning_rate": 9.280241478593222e-08, + "logits/chosen": -3.2322182655334473, + "logits/rejected": -3.127584457397461, + "logps/chosen": -239.13031005859375, + "logps/rejected": -412.47711181640625, + "loss": 0.3245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6803840398788452, + "rewards/margins": 2.084352970123291, + "rewards/rejected": -1.4039688110351562, + "step": 311 + }, + { + "epoch": 0.2, + "learning_rate": 9.274894166171887e-08, + "logits/chosen": -3.139799118041992, + "logits/rejected": -3.1480674743652344, + "logps/chosen": -279.4092712402344, + "logps/rejected": -981.841796875, + "loss": 0.3734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6684325933456421, + "rewards/margins": 3.6589601039886475, + "rewards/rejected": -2.990527391433716, + "step": 312 + }, + { + "epoch": 0.2, + "learning_rate": 9.269528616199098e-08, + "logits/chosen": -3.2600228786468506, + "logits/rejected": -3.1559014320373535, + "logps/chosen": -244.0367431640625, + "logps/rejected": -327.46807861328125, + "loss": 0.3921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6775802373886108, + "rewards/margins": 1.6177995204925537, + "rewards/rejected": -0.9402191638946533, + "step": 313 + }, + { + "epoch": 0.2, + "learning_rate": 9.264144851565358e-08, + "logits/chosen": -3.302638530731201, + "logits/rejected": -3.094611167907715, + "logps/chosen": -234.83441162109375, + "logps/rejected": -1580.480224609375, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8057419061660767, + "rewards/margins": 5.644953727722168, + "rewards/rejected": -4.839211940765381, + "step": 314 + }, + { + "epoch": 0.2, + "learning_rate": 9.258742895238885e-08, + "logits/chosen": -3.1892952919006348, + "logits/rejected": -3.1365818977355957, + "logps/chosen": -289.9590759277344, + "logps/rejected": -857.6265869140625, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6923859119415283, + "rewards/margins": 3.44036865234375, + "rewards/rejected": -2.747982978820801, + "step": 315 + }, + { + "epoch": 0.2, + "learning_rate": 9.2533227702655e-08, + "logits/chosen": -3.191830635070801, + "logits/rejected": -3.0187461376190186, + "logps/chosen": -253.64352416992188, + "logps/rejected": -300.11224365234375, + "loss": 0.4312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7787612676620483, + "rewards/margins": 1.5618927478790283, + "rewards/rejected": -0.7831314206123352, + "step": 316 + }, + { + "epoch": 0.2, + "learning_rate": 9.247884499768539e-08, + "logits/chosen": -3.1515328884124756, + "logits/rejected": -2.9609792232513428, + "logps/chosen": -273.53375244140625, + "logps/rejected": -1022.70166015625, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6644287109375, + "rewards/margins": 3.6378495693206787, + "rewards/rejected": -2.9734208583831787, + "step": 317 + }, + { + "epoch": 0.2, + "learning_rate": 9.242428106948748e-08, + "logits/chosen": -3.282991409301758, + "logits/rejected": -3.15828275680542, + "logps/chosen": -230.82679748535156, + "logps/rejected": -596.7777099609375, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7822654843330383, + "rewards/margins": 2.92739200592041, + "rewards/rejected": -2.1451263427734375, + "step": 318 + }, + { + "epoch": 0.2, + "learning_rate": 9.236953615084189e-08, + "logits/chosen": -3.165408134460449, + "logits/rejected": -3.098781108856201, + "logps/chosen": -259.47308349609375, + "logps/rejected": -631.9324340820312, + "loss": 0.3168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5287414789199829, + "rewards/margins": 2.7514801025390625, + "rewards/rejected": -2.222738742828369, + "step": 319 + }, + { + "epoch": 0.2, + "learning_rate": 9.231461047530133e-08, + "logits/chosen": -3.2622921466827393, + "logits/rejected": -3.152886390686035, + "logps/chosen": -246.34140014648438, + "logps/rejected": -417.48583984375, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7021820545196533, + "rewards/margins": 1.9869705438613892, + "rewards/rejected": -1.2847886085510254, + "step": 320 + }, + { + "epoch": 0.2, + "learning_rate": 9.225950427718974e-08, + "logits/chosen": -3.1936020851135254, + "logits/rejected": -3.1384167671203613, + "logps/chosen": -241.29464721679688, + "logps/rejected": -577.5443115234375, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8910133838653564, + "rewards/margins": 2.916114091873169, + "rewards/rejected": -2.0251007080078125, + "step": 321 + }, + { + "epoch": 0.21, + "learning_rate": 9.220421779160112e-08, + "logits/chosen": -3.180853843688965, + "logits/rejected": -3.0510900020599365, + "logps/chosen": -279.89556884765625, + "logps/rejected": -408.8171691894531, + "loss": 0.3992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7639938592910767, + "rewards/margins": 2.1219544410705566, + "rewards/rejected": -1.3579604625701904, + "step": 322 + }, + { + "epoch": 0.21, + "learning_rate": 9.214875125439865e-08, + "logits/chosen": -3.2073745727539062, + "logits/rejected": -3.171412467956543, + "logps/chosen": -255.77108764648438, + "logps/rejected": -885.693603515625, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7346084713935852, + "rewards/margins": 3.562936305999756, + "rewards/rejected": -2.8283278942108154, + "step": 323 + }, + { + "epoch": 0.21, + "learning_rate": 9.209310490221367e-08, + "logits/chosen": -3.23232364654541, + "logits/rejected": -3.097165107727051, + "logps/chosen": -290.43804931640625, + "logps/rejected": -812.7734375, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7497818470001221, + "rewards/margins": 3.320448398590088, + "rewards/rejected": -2.570666551589966, + "step": 324 + }, + { + "epoch": 0.21, + "learning_rate": 9.20372789724446e-08, + "logits/chosen": -3.2755022048950195, + "logits/rejected": -3.027111530303955, + "logps/chosen": -259.57257080078125, + "logps/rejected": -1027.21240234375, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.739331066608429, + "rewards/margins": 3.664581298828125, + "rewards/rejected": -2.925250291824341, + "step": 325 + }, + { + "epoch": 0.21, + "learning_rate": 9.1981273703256e-08, + "logits/chosen": -3.2745842933654785, + "logits/rejected": -3.0590975284576416, + "logps/chosen": -276.81103515625, + "logps/rejected": -529.8029174804688, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8690032958984375, + "rewards/margins": 2.418975830078125, + "rewards/rejected": -1.5499725341796875, + "step": 326 + }, + { + "epoch": 0.21, + "learning_rate": 9.192508933357753e-08, + "logits/chosen": -3.2190184593200684, + "logits/rejected": -3.113887310028076, + "logps/chosen": -295.34832763671875, + "logps/rejected": -738.264892578125, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7764343023300171, + "rewards/margins": 3.0402345657348633, + "rewards/rejected": -2.2638001441955566, + "step": 327 + }, + { + "epoch": 0.21, + "learning_rate": 9.18687261031029e-08, + "logits/chosen": -3.2566568851470947, + "logits/rejected": -3.1082847118377686, + "logps/chosen": -274.52423095703125, + "logps/rejected": -449.3681640625, + "loss": 0.3955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6820007562637329, + "rewards/margins": 2.040876865386963, + "rewards/rejected": -1.3588759899139404, + "step": 328 + }, + { + "epoch": 0.21, + "learning_rate": 9.181218425228893e-08, + "logits/chosen": -3.1667802333831787, + "logits/rejected": -3.0792598724365234, + "logps/chosen": -292.20965576171875, + "logps/rejected": -555.91748046875, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6676437854766846, + "rewards/margins": 2.8189711570739746, + "rewards/rejected": -2.151327610015869, + "step": 329 + }, + { + "epoch": 0.21, + "learning_rate": 9.175546402235441e-08, + "logits/chosen": -3.266522169113159, + "logits/rejected": -3.168558120727539, + "logps/chosen": -284.2423095703125, + "logps/rejected": -478.4983215332031, + "loss": 0.4021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6041534543037415, + "rewards/margins": 2.025334358215332, + "rewards/rejected": -1.4211807250976562, + "step": 330 + }, + { + "epoch": 0.21, + "learning_rate": 9.169856565527916e-08, + "logits/chosen": -3.2518773078918457, + "logits/rejected": -3.08866024017334, + "logps/chosen": -266.714111328125, + "logps/rejected": -459.2275085449219, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6351165771484375, + "rewards/margins": 1.899950385093689, + "rewards/rejected": -1.2648338079452515, + "step": 331 + }, + { + "epoch": 0.21, + "learning_rate": 9.164148939380299e-08, + "logits/chosen": -3.1683390140533447, + "logits/rejected": -3.009464979171753, + "logps/chosen": -282.4407043457031, + "logps/rejected": -870.6634521484375, + "loss": 0.3892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8016617298126221, + "rewards/margins": 2.955277919769287, + "rewards/rejected": -2.153616428375244, + "step": 332 + }, + { + "epoch": 0.21, + "learning_rate": 9.158423548142458e-08, + "logits/chosen": -3.272780179977417, + "logits/rejected": -3.1120505332946777, + "logps/chosen": -261.4248046875, + "logps/rejected": -701.6197509765625, + "loss": 0.3657, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.697985827922821, + "rewards/margins": 3.0491058826446533, + "rewards/rejected": -2.3511199951171875, + "step": 333 + }, + { + "epoch": 0.21, + "learning_rate": 9.152680416240058e-08, + "logits/chosen": -3.2015938758850098, + "logits/rejected": -3.1040713787078857, + "logps/chosen": -260.59075927734375, + "logps/rejected": -534.6195068359375, + "loss": 0.3699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6504898071289062, + "rewards/margins": 2.6230409145355225, + "rewards/rejected": -1.9725511074066162, + "step": 334 + }, + { + "epoch": 0.21, + "learning_rate": 9.146919568174444e-08, + "logits/chosen": -3.2020187377929688, + "logits/rejected": -3.160937786102295, + "logps/chosen": -213.842529296875, + "logps/rejected": -921.3970336914062, + "loss": 0.3389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6710342168807983, + "rewards/margins": 3.924543857574463, + "rewards/rejected": -3.253509521484375, + "step": 335 + }, + { + "epoch": 0.21, + "learning_rate": 9.141141028522543e-08, + "logits/chosen": -3.208747625350952, + "logits/rejected": -3.0415198802948, + "logps/chosen": -264.3408508300781, + "logps/rejected": -515.0535888671875, + "loss": 0.3543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7968605160713196, + "rewards/margins": 2.570272922515869, + "rewards/rejected": -1.7734123468399048, + "step": 336 + }, + { + "epoch": 0.21, + "learning_rate": 9.135344821936759e-08, + "logits/chosen": -3.2177093029022217, + "logits/rejected": -3.107375383377075, + "logps/chosen": -255.40423583984375, + "logps/rejected": -1525.864013671875, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.732477605342865, + "rewards/margins": 5.445392608642578, + "rewards/rejected": -4.712915420532227, + "step": 337 + }, + { + "epoch": 0.22, + "learning_rate": 9.129530973144865e-08, + "logits/chosen": -3.2429633140563965, + "logits/rejected": -3.133324384689331, + "logps/chosen": -286.240966796875, + "logps/rejected": -675.4892578125, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.726971447467804, + "rewards/margins": 2.90653395652771, + "rewards/rejected": -2.179562568664551, + "step": 338 + }, + { + "epoch": 0.22, + "learning_rate": 9.123699506949901e-08, + "logits/chosen": -3.237610340118408, + "logits/rejected": -3.171093463897705, + "logps/chosen": -237.9677734375, + "logps/rejected": -608.359375, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8267837762832642, + "rewards/margins": 3.1341049671173096, + "rewards/rejected": -2.307321071624756, + "step": 339 + }, + { + "epoch": 0.22, + "learning_rate": 9.117850448230064e-08, + "logits/chosen": -3.185094118118286, + "logits/rejected": -3.110426902770996, + "logps/chosen": -264.80157470703125, + "logps/rejected": -484.2105407714844, + "loss": 0.3695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7075546383857727, + "rewards/margins": 2.027737617492676, + "rewards/rejected": -1.3201828002929688, + "step": 340 + }, + { + "epoch": 0.22, + "learning_rate": 9.111983821938607e-08, + "logits/chosen": -3.235243558883667, + "logits/rejected": -3.0881547927856445, + "logps/chosen": -277.45806884765625, + "logps/rejected": -789.052978515625, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.689678966999054, + "rewards/margins": 3.278555393218994, + "rewards/rejected": -2.588876485824585, + "step": 341 + }, + { + "epoch": 0.22, + "learning_rate": 9.106099653103727e-08, + "logits/chosen": -3.204653739929199, + "logits/rejected": -3.1299610137939453, + "logps/chosen": -289.53466796875, + "logps/rejected": -501.5799560546875, + "loss": 0.3424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6801445484161377, + "rewards/margins": 2.365464210510254, + "rewards/rejected": -1.6853195428848267, + "step": 342 + }, + { + "epoch": 0.22, + "learning_rate": 9.100197966828462e-08, + "logits/chosen": -3.218395709991455, + "logits/rejected": -3.185486078262329, + "logps/chosen": -228.04135131835938, + "logps/rejected": -483.9079895019531, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7289336919784546, + "rewards/margins": 2.4199647903442383, + "rewards/rejected": -1.6910309791564941, + "step": 343 + }, + { + "epoch": 0.22, + "learning_rate": 9.094278788290586e-08, + "logits/chosen": -3.2229182720184326, + "logits/rejected": -3.091583728790283, + "logps/chosen": -265.281494140625, + "logps/rejected": -513.0908203125, + "loss": 0.35, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7209388613700867, + "rewards/margins": 2.776557207107544, + "rewards/rejected": -2.0556182861328125, + "step": 344 + }, + { + "epoch": 0.22, + "learning_rate": 9.088342142742491e-08, + "logits/chosen": -3.2208762168884277, + "logits/rejected": -3.1516432762145996, + "logps/chosen": -294.239990234375, + "logps/rejected": -792.1239013671875, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6577285528182983, + "rewards/margins": 4.247169494628906, + "rewards/rejected": -3.5894408226013184, + "step": 345 + }, + { + "epoch": 0.22, + "learning_rate": 9.082388055511095e-08, + "logits/chosen": -3.217088460922241, + "logits/rejected": -3.162698984146118, + "logps/chosen": -290.21722412109375, + "logps/rejected": -640.5828247070312, + "loss": 0.3425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7292969226837158, + "rewards/margins": 2.9319701194763184, + "rewards/rejected": -2.2026734352111816, + "step": 346 + }, + { + "epoch": 0.22, + "learning_rate": 9.076416551997722e-08, + "logits/chosen": -3.2786407470703125, + "logits/rejected": -3.118931293487549, + "logps/chosen": -243.5674285888672, + "logps/rejected": -1481.0330810546875, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6451362371444702, + "rewards/margins": 5.191226959228516, + "rewards/rejected": -4.546090602874756, + "step": 347 + }, + { + "epoch": 0.22, + "learning_rate": 9.070427657677995e-08, + "logits/chosen": -3.216028928756714, + "logits/rejected": -3.093841075897217, + "logps/chosen": -277.3753662109375, + "logps/rejected": -635.6876220703125, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8178428411483765, + "rewards/margins": 3.1621453762054443, + "rewards/rejected": -2.3443024158477783, + "step": 348 + }, + { + "epoch": 0.22, + "learning_rate": 9.064421398101734e-08, + "logits/chosen": -3.2199392318725586, + "logits/rejected": -2.998866558074951, + "logps/chosen": -247.36972045898438, + "logps/rejected": -1249.1572265625, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7408393621444702, + "rewards/margins": 4.128366947174072, + "rewards/rejected": -3.3875274658203125, + "step": 349 + }, + { + "epoch": 0.22, + "learning_rate": 9.05839779889284e-08, + "logits/chosen": -3.216364860534668, + "logits/rejected": -3.1548614501953125, + "logps/chosen": -248.19879150390625, + "logps/rejected": -528.46923828125, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9280060529708862, + "rewards/margins": 2.5651047229766846, + "rewards/rejected": -1.637098789215088, + "step": 350 + }, + { + "epoch": 0.22, + "learning_rate": 9.05235688574919e-08, + "logits/chosen": -3.215554714202881, + "logits/rejected": -3.0828022956848145, + "logps/chosen": -251.91494750976562, + "logps/rejected": -351.83782958984375, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8440887928009033, + "rewards/margins": 2.216295003890991, + "rewards/rejected": -1.372206211090088, + "step": 351 + }, + { + "epoch": 0.22, + "learning_rate": 9.046298684442525e-08, + "logits/chosen": -3.22763991355896, + "logits/rejected": -3.165701389312744, + "logps/chosen": -226.66635131835938, + "logps/rejected": -654.2518920898438, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6946266293525696, + "rewards/margins": 3.5011115074157715, + "rewards/rejected": -2.806485176086426, + "step": 352 + }, + { + "epoch": 0.22, + "learning_rate": 9.04022322081834e-08, + "logits/chosen": -3.190377712249756, + "logits/rejected": -3.1403748989105225, + "logps/chosen": -303.6979064941406, + "logps/rejected": -800.9759521484375, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7573577761650085, + "rewards/margins": 3.194042921066284, + "rewards/rejected": -2.436685085296631, + "step": 353 + }, + { + "epoch": 0.23, + "learning_rate": 9.034130520795773e-08, + "logits/chosen": -3.2500481605529785, + "logits/rejected": -3.082373857498169, + "logps/chosen": -269.469970703125, + "logps/rejected": -972.9521484375, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.735185980796814, + "rewards/margins": 3.8661155700683594, + "rewards/rejected": -3.130929470062256, + "step": 354 + }, + { + "epoch": 0.23, + "learning_rate": 9.0280206103675e-08, + "logits/chosen": -3.1871085166931152, + "logits/rejected": -3.093799114227295, + "logps/chosen": -249.31173706054688, + "logps/rejected": -374.74993896484375, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7218459844589233, + "rewards/margins": 1.8700790405273438, + "rewards/rejected": -1.1482330560684204, + "step": 355 + }, + { + "epoch": 0.23, + "learning_rate": 9.021893515599618e-08, + "logits/chosen": -3.1970510482788086, + "logits/rejected": -3.098860740661621, + "logps/chosen": -244.59527587890625, + "logps/rejected": -616.4313354492188, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7581184506416321, + "rewards/margins": 2.627443790435791, + "rewards/rejected": -1.8693252801895142, + "step": 356 + }, + { + "epoch": 0.23, + "learning_rate": 9.015749262631535e-08, + "logits/chosen": -3.2535223960876465, + "logits/rejected": -3.1356654167175293, + "logps/chosen": -249.457763671875, + "logps/rejected": -743.7828979492188, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8275833129882812, + "rewards/margins": 3.459507942199707, + "rewards/rejected": -2.6319243907928467, + "step": 357 + }, + { + "epoch": 0.23, + "learning_rate": 9.009587877675862e-08, + "logits/chosen": -3.2686567306518555, + "logits/rejected": -3.149886131286621, + "logps/chosen": -275.83404541015625, + "logps/rejected": -832.1871337890625, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5736709833145142, + "rewards/margins": 3.3379197120666504, + "rewards/rejected": -2.764248847961426, + "step": 358 + }, + { + "epoch": 0.23, + "learning_rate": 9.003409387018293e-08, + "logits/chosen": -3.167177200317383, + "logits/rejected": -3.101713180541992, + "logps/chosen": -263.96697998046875, + "logps/rejected": -517.0513305664062, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7109786868095398, + "rewards/margins": 2.2329742908477783, + "rewards/rejected": -1.5219955444335938, + "step": 359 + }, + { + "epoch": 0.23, + "learning_rate": 8.997213817017506e-08, + "logits/chosen": -3.1873292922973633, + "logits/rejected": -3.0781044960021973, + "logps/chosen": -284.01373291015625, + "logps/rejected": -647.5608520507812, + "loss": 0.3503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8364441394805908, + "rewards/margins": 3.1610565185546875, + "rewards/rejected": -2.324612617492676, + "step": 360 + }, + { + "epoch": 0.23, + "learning_rate": 8.991001194105035e-08, + "logits/chosen": -3.1654863357543945, + "logits/rejected": -3.13787841796875, + "logps/chosen": -288.5287780761719, + "logps/rejected": -582.5919189453125, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7568420171737671, + "rewards/margins": 2.7191591262817383, + "rewards/rejected": -1.9623169898986816, + "step": 361 + }, + { + "epoch": 0.23, + "learning_rate": 8.984771544785171e-08, + "logits/chosen": -3.243988513946533, + "logits/rejected": -3.10575532913208, + "logps/chosen": -260.4977722167969, + "logps/rejected": -625.0126342773438, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6710929870605469, + "rewards/margins": 2.9800467491149902, + "rewards/rejected": -2.3089540004730225, + "step": 362 + }, + { + "epoch": 0.23, + "learning_rate": 8.978524895634842e-08, + "logits/chosen": -3.278714656829834, + "logits/rejected": -3.1301794052124023, + "logps/chosen": -289.4779357910156, + "logps/rejected": -800.6932373046875, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8048889636993408, + "rewards/margins": 3.6430788040161133, + "rewards/rejected": -2.8381896018981934, + "step": 363 + }, + { + "epoch": 0.23, + "learning_rate": 8.972261273303496e-08, + "logits/chosen": -3.2807798385620117, + "logits/rejected": -3.232177734375, + "logps/chosen": -238.85769653320312, + "logps/rejected": -380.865966796875, + "loss": 0.3747, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7640060186386108, + "rewards/margins": 2.1340742111206055, + "rewards/rejected": -1.3700683116912842, + "step": 364 + }, + { + "epoch": 0.23, + "learning_rate": 8.965980704513e-08, + "logits/chosen": -3.195586919784546, + "logits/rejected": -3.082764148712158, + "logps/chosen": -233.34149169921875, + "logps/rejected": -1001.0048828125, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7575027942657471, + "rewards/margins": 3.7204742431640625, + "rewards/rejected": -2.9629716873168945, + "step": 365 + }, + { + "epoch": 0.23, + "learning_rate": 8.959683216057511e-08, + "logits/chosen": -3.1962149143218994, + "logits/rejected": -3.0514750480651855, + "logps/chosen": -247.97271728515625, + "logps/rejected": -570.176025390625, + "loss": 0.3752, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6294350028038025, + "rewards/margins": 2.039919376373291, + "rewards/rejected": -1.4104843139648438, + "step": 366 + }, + { + "epoch": 0.23, + "learning_rate": 8.953368834803371e-08, + "logits/chosen": -3.29429030418396, + "logits/rejected": -3.0849897861480713, + "logps/chosen": -279.0885009765625, + "logps/rejected": -422.34954833984375, + "loss": 0.3635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7189041376113892, + "rewards/margins": 1.9954864978790283, + "rewards/rejected": -1.2765823602676392, + "step": 367 + }, + { + "epoch": 0.23, + "learning_rate": 8.947037587688991e-08, + "logits/chosen": -3.203547954559326, + "logits/rejected": -3.1182141304016113, + "logps/chosen": -264.18365478515625, + "logps/rejected": -649.7412109375, + "loss": 0.3111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9751068353652954, + "rewards/margins": 3.262591600418091, + "rewards/rejected": -2.287484645843506, + "step": 368 + }, + { + "epoch": 0.24, + "learning_rate": 8.940689501724736e-08, + "logits/chosen": -3.2018051147460938, + "logits/rejected": -3.156012535095215, + "logps/chosen": -246.51126098632812, + "logps/rejected": -560.9992065429688, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8572632074356079, + "rewards/margins": 2.4851608276367188, + "rewards/rejected": -1.6278977394104004, + "step": 369 + }, + { + "epoch": 0.24, + "learning_rate": 8.934324603992804e-08, + "logits/chosen": -3.2575292587280273, + "logits/rejected": -3.151705026626587, + "logps/chosen": -269.55352783203125, + "logps/rejected": -685.0694580078125, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.748577892780304, + "rewards/margins": 3.312548875808716, + "rewards/rejected": -2.5639710426330566, + "step": 370 + }, + { + "epoch": 0.24, + "learning_rate": 8.92794292164712e-08, + "logits/chosen": -3.166868209838867, + "logits/rejected": -2.9979491233825684, + "logps/chosen": -260.849609375, + "logps/rejected": -345.33453369140625, + "loss": 0.3641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6952407956123352, + "rewards/margins": 1.5657851696014404, + "rewards/rejected": -0.87054443359375, + "step": 371 + }, + { + "epoch": 0.24, + "learning_rate": 8.921544481913216e-08, + "logits/chosen": -3.294546604156494, + "logits/rejected": -3.108593463897705, + "logps/chosen": -236.22506713867188, + "logps/rejected": -394.5601806640625, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7789223194122314, + "rewards/margins": 1.9400367736816406, + "rewards/rejected": -1.1611144542694092, + "step": 372 + }, + { + "epoch": 0.24, + "learning_rate": 8.915129312088111e-08, + "logits/chosen": -3.2509450912475586, + "logits/rejected": -3.17104434967041, + "logps/chosen": -261.55029296875, + "logps/rejected": -633.017822265625, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6807190179824829, + "rewards/margins": 3.182870388031006, + "rewards/rejected": -2.5021514892578125, + "step": 373 + }, + { + "epoch": 0.24, + "learning_rate": 8.908697439540197e-08, + "logits/chosen": -3.2935218811035156, + "logits/rejected": -3.1586179733276367, + "logps/chosen": -267.66815185546875, + "logps/rejected": -483.009765625, + "loss": 0.3762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9364532232284546, + "rewards/margins": 2.562368869781494, + "rewards/rejected": -1.62591552734375, + "step": 374 + }, + { + "epoch": 0.24, + "learning_rate": 8.902248891709132e-08, + "logits/chosen": -3.192288398742676, + "logits/rejected": -3.157148838043213, + "logps/chosen": -282.722412109375, + "logps/rejected": -848.6507568359375, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7197601795196533, + "rewards/margins": 3.63372802734375, + "rewards/rejected": -2.913968086242676, + "step": 375 + }, + { + "epoch": 0.24, + "learning_rate": 8.895783696105704e-08, + "logits/chosen": -3.167567729949951, + "logits/rejected": -3.0863380432128906, + "logps/chosen": -265.71380615234375, + "logps/rejected": -732.1386108398438, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.818891167640686, + "rewards/margins": 3.0224783420562744, + "rewards/rejected": -2.203587293624878, + "step": 376 + }, + { + "epoch": 0.24, + "learning_rate": 8.889301880311724e-08, + "logits/chosen": -3.2317709922790527, + "logits/rejected": -3.015108108520508, + "logps/chosen": -239.68887329101562, + "logps/rejected": -988.4209594726562, + "loss": 0.3158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7400283813476562, + "rewards/margins": 4.190682888031006, + "rewards/rejected": -3.4506545066833496, + "step": 377 + }, + { + "epoch": 0.24, + "learning_rate": 8.882803471979916e-08, + "logits/chosen": -3.2304928302764893, + "logits/rejected": -3.0638890266418457, + "logps/chosen": -240.0576171875, + "logps/rejected": -1145.3619384765625, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7099090814590454, + "rewards/margins": 3.9495530128479004, + "rewards/rejected": -3.2396440505981445, + "step": 378 + }, + { + "epoch": 0.24, + "learning_rate": 8.876288498833786e-08, + "logits/chosen": -3.212398052215576, + "logits/rejected": -3.183454990386963, + "logps/chosen": -243.90054321289062, + "logps/rejected": -822.63720703125, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8066024780273438, + "rewards/margins": 4.253385543823242, + "rewards/rejected": -3.4467835426330566, + "step": 379 + }, + { + "epoch": 0.24, + "learning_rate": 8.869756988667508e-08, + "logits/chosen": -3.178816318511963, + "logits/rejected": -3.1586508750915527, + "logps/chosen": -257.7279052734375, + "logps/rejected": -431.5225524902344, + "loss": 0.3916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.594714343547821, + "rewards/margins": 2.108023166656494, + "rewards/rejected": -1.5133087635040283, + "step": 380 + }, + { + "epoch": 0.24, + "learning_rate": 8.863208969345809e-08, + "logits/chosen": -3.166484832763672, + "logits/rejected": -3.1178276538848877, + "logps/chosen": -260.15850830078125, + "logps/rejected": -468.8448181152344, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9732513427734375, + "rewards/margins": 2.3969130516052246, + "rewards/rejected": -1.4236618280410767, + "step": 381 + }, + { + "epoch": 0.24, + "learning_rate": 8.856644468803844e-08, + "logits/chosen": -3.215583086013794, + "logits/rejected": -3.0703601837158203, + "logps/chosen": -235.3519287109375, + "logps/rejected": -398.5813293457031, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.018384575843811, + "rewards/margins": 2.3256211280822754, + "rewards/rejected": -1.3072365522384644, + "step": 382 + }, + { + "epoch": 0.24, + "learning_rate": 8.850063515047084e-08, + "logits/chosen": -3.096285581588745, + "logits/rejected": -3.1058478355407715, + "logps/chosen": -280.532958984375, + "logps/rejected": -540.008056640625, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8499725461006165, + "rewards/margins": 2.8393630981445312, + "rewards/rejected": -1.9893906116485596, + "step": 383 + }, + { + "epoch": 0.24, + "learning_rate": 8.84346613615119e-08, + "logits/chosen": -3.2083616256713867, + "logits/rejected": -3.010822296142578, + "logps/chosen": -232.8785858154297, + "logps/rejected": -711.7432861328125, + "loss": 0.3885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7208054065704346, + "rewards/margins": 2.9845004081726074, + "rewards/rejected": -2.263695001602173, + "step": 384 + }, + { + "epoch": 0.25, + "learning_rate": 8.836852360261895e-08, + "logits/chosen": -3.242704391479492, + "logits/rejected": -3.1931989192962646, + "logps/chosen": -234.51011657714844, + "logps/rejected": -783.5361328125, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6545196771621704, + "rewards/margins": 3.9283246994018555, + "rewards/rejected": -3.2738051414489746, + "step": 385 + }, + { + "epoch": 0.25, + "learning_rate": 8.83022221559489e-08, + "logits/chosen": -3.248610258102417, + "logits/rejected": -3.066842794418335, + "logps/chosen": -264.201904296875, + "logps/rejected": -576.207763671875, + "loss": 0.336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7935714721679688, + "rewards/margins": 2.7300171852111816, + "rewards/rejected": -1.9364455938339233, + "step": 386 + }, + { + "epoch": 0.25, + "learning_rate": 8.823575730435693e-08, + "logits/chosen": -3.268435001373291, + "logits/rejected": -3.123642921447754, + "logps/chosen": -270.61248779296875, + "logps/rejected": -436.80645751953125, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.673291802406311, + "rewards/margins": 2.017559051513672, + "rewards/rejected": -1.3442672491073608, + "step": 387 + }, + { + "epoch": 0.25, + "learning_rate": 8.816912933139536e-08, + "logits/chosen": -3.2220840454101562, + "logits/rejected": -3.0730793476104736, + "logps/chosen": -242.43325805664062, + "logps/rejected": -1364.81494140625, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5646271109580994, + "rewards/margins": 4.4891815185546875, + "rewards/rejected": -3.9245543479919434, + "step": 388 + }, + { + "epoch": 0.25, + "learning_rate": 8.810233852131241e-08, + "logits/chosen": -3.1990320682525635, + "logits/rejected": -3.0111141204833984, + "logps/chosen": -262.7906494140625, + "logps/rejected": -325.3678894042969, + "loss": 0.3905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7866897583007812, + "rewards/margins": 1.8394227027893066, + "rewards/rejected": -1.0527328252792358, + "step": 389 + }, + { + "epoch": 0.25, + "learning_rate": 8.8035385159051e-08, + "logits/chosen": -3.211019515991211, + "logits/rejected": -3.135927200317383, + "logps/chosen": -240.24148559570312, + "logps/rejected": -304.78875732421875, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7428749203681946, + "rewards/margins": 1.739424228668213, + "rewards/rejected": -0.9965492486953735, + "step": 390 + }, + { + "epoch": 0.25, + "learning_rate": 8.796826953024757e-08, + "logits/chosen": -3.2936458587646484, + "logits/rejected": -3.151217222213745, + "logps/chosen": -254.5994110107422, + "logps/rejected": -780.1718139648438, + "loss": 0.3895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7813476324081421, + "rewards/margins": 3.343777656555176, + "rewards/rejected": -2.562429904937744, + "step": 391 + }, + { + "epoch": 0.25, + "learning_rate": 8.790099192123073e-08, + "logits/chosen": -3.301299810409546, + "logits/rejected": -3.1376090049743652, + "logps/chosen": -250.9512939453125, + "logps/rejected": -850.6777954101562, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7829033136367798, + "rewards/margins": 3.8416404724121094, + "rewards/rejected": -3.058737277984619, + "step": 392 + }, + { + "epoch": 0.25, + "learning_rate": 8.783355261902021e-08, + "logits/chosen": -3.217095136642456, + "logits/rejected": -3.0592756271362305, + "logps/chosen": -278.64990234375, + "logps/rejected": -1049.12158203125, + "loss": 0.3574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.710003674030304, + "rewards/margins": 3.332507371902466, + "rewards/rejected": -2.6225037574768066, + "step": 393 + }, + { + "epoch": 0.25, + "learning_rate": 8.776595191132553e-08, + "logits/chosen": -3.174435615539551, + "logits/rejected": -3.0744333267211914, + "logps/chosen": -263.8000183105469, + "logps/rejected": -710.4091796875, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8260078430175781, + "rewards/margins": 3.4907448291778564, + "rewards/rejected": -2.6647369861602783, + "step": 394 + }, + { + "epoch": 0.25, + "learning_rate": 8.76981900865448e-08, + "logits/chosen": -3.1630756855010986, + "logits/rejected": -3.034977912902832, + "logps/chosen": -270.9190673828125, + "logps/rejected": -508.86376953125, + "loss": 0.3526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9229980707168579, + "rewards/margins": 2.558772325515747, + "rewards/rejected": -1.6357742547988892, + "step": 395 + }, + { + "epoch": 0.25, + "learning_rate": 8.763026743376347e-08, + "logits/chosen": -3.3110837936401367, + "logits/rejected": -3.1384453773498535, + "logps/chosen": -255.42076110839844, + "logps/rejected": -659.4979858398438, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7589668035507202, + "rewards/margins": 3.077730655670166, + "rewards/rejected": -2.3187637329101562, + "step": 396 + }, + { + "epoch": 0.25, + "learning_rate": 8.756218424275315e-08, + "logits/chosen": -3.2910032272338867, + "logits/rejected": -3.0547873973846436, + "logps/chosen": -250.086669921875, + "logps/rejected": -1255.7503662109375, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7356468439102173, + "rewards/margins": 4.922637462615967, + "rewards/rejected": -4.186990737915039, + "step": 397 + }, + { + "epoch": 0.25, + "learning_rate": 8.74939408039703e-08, + "logits/chosen": -3.2073938846588135, + "logits/rejected": -3.113370895385742, + "logps/chosen": -246.10238647460938, + "logps/rejected": -761.0848388671875, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7637352347373962, + "rewards/margins": 4.07311487197876, + "rewards/rejected": -3.3093795776367188, + "step": 398 + }, + { + "epoch": 0.25, + "learning_rate": 8.742553740855505e-08, + "logits/chosen": -3.30670166015625, + "logits/rejected": -3.099625587463379, + "logps/chosen": -267.3409423828125, + "logps/rejected": -977.3062744140625, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8645179867744446, + "rewards/margins": 4.176316261291504, + "rewards/rejected": -3.311798095703125, + "step": 399 + }, + { + "epoch": 0.25, + "learning_rate": 8.735697434832994e-08, + "logits/chosen": -3.210127830505371, + "logits/rejected": -3.151423215866089, + "logps/chosen": -284.7542724609375, + "logps/rejected": -573.7997436523438, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7354339361190796, + "rewards/margins": 2.626612663269043, + "rewards/rejected": -1.891178846359253, + "step": 400 + }, + { + "epoch": 0.26, + "learning_rate": 8.728825191579865e-08, + "logits/chosen": -3.2510766983032227, + "logits/rejected": -3.0457003116607666, + "logps/chosen": -264.920654296875, + "logps/rejected": -539.9461059570312, + "loss": 0.3653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9035698175430298, + "rewards/margins": 2.336196184158325, + "rewards/rejected": -1.4326263666152954, + "step": 401 + }, + { + "epoch": 0.26, + "learning_rate": 8.72193704041448e-08, + "logits/chosen": -3.193321466445923, + "logits/rejected": -3.0577080249786377, + "logps/chosen": -261.11480712890625, + "logps/rejected": -559.1956787109375, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.782562255859375, + "rewards/margins": 2.3991732597351074, + "rewards/rejected": -1.6166107654571533, + "step": 402 + }, + { + "epoch": 0.26, + "learning_rate": 8.715033010723065e-08, + "logits/chosen": -3.207150936126709, + "logits/rejected": -2.9792118072509766, + "logps/chosen": -261.2318115234375, + "logps/rejected": -395.2323913574219, + "loss": 0.3597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6394241452217102, + "rewards/margins": 1.8775100708007812, + "rewards/rejected": -1.2380859851837158, + "step": 403 + }, + { + "epoch": 0.26, + "learning_rate": 8.708113131959592e-08, + "logits/chosen": -3.2573983669281006, + "logits/rejected": -3.1637139320373535, + "logps/chosen": -244.1874237060547, + "logps/rejected": -851.6396484375, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7609718441963196, + "rewards/margins": 4.116044044494629, + "rewards/rejected": -3.355072021484375, + "step": 404 + }, + { + "epoch": 0.26, + "learning_rate": 8.701177433645639e-08, + "logits/chosen": -3.252218008041382, + "logits/rejected": -3.1240439414978027, + "logps/chosen": -264.5416259765625, + "logps/rejected": -507.94921875, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8482261896133423, + "rewards/margins": 2.6937613487243652, + "rewards/rejected": -1.8455352783203125, + "step": 405 + }, + { + "epoch": 0.26, + "learning_rate": 8.694225945370282e-08, + "logits/chosen": -3.230679988861084, + "logits/rejected": -3.1045002937316895, + "logps/chosen": -231.31353759765625, + "logps/rejected": -472.2550048828125, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7418365478515625, + "rewards/margins": 2.304464817047119, + "rewards/rejected": -1.5626282691955566, + "step": 406 + }, + { + "epoch": 0.26, + "learning_rate": 8.687258696789957e-08, + "logits/chosen": -3.1538939476013184, + "logits/rejected": -3.1520371437072754, + "logps/chosen": -238.90211486816406, + "logps/rejected": -799.3304443359375, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6472259759902954, + "rewards/margins": 3.8867645263671875, + "rewards/rejected": -3.2395386695861816, + "step": 407 + }, + { + "epoch": 0.26, + "learning_rate": 8.680275717628336e-08, + "logits/chosen": -3.263150691986084, + "logits/rejected": -3.0867860317230225, + "logps/chosen": -249.26409912109375, + "logps/rejected": -1162.580810546875, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7352859973907471, + "rewards/margins": 4.925999641418457, + "rewards/rejected": -4.190713882446289, + "step": 408 + }, + { + "epoch": 0.26, + "learning_rate": 8.673277037676201e-08, + "logits/chosen": -3.299978256225586, + "logits/rejected": -3.1486237049102783, + "logps/chosen": -239.47021484375, + "logps/rejected": -987.0233154296875, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6662956476211548, + "rewards/margins": 3.53945255279541, + "rewards/rejected": -2.873156785964966, + "step": 409 + }, + { + "epoch": 0.26, + "learning_rate": 8.666262686791316e-08, + "logits/chosen": -3.1950998306274414, + "logits/rejected": -3.227604389190674, + "logps/chosen": -244.82861328125, + "logps/rejected": -767.97607421875, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8454079031944275, + "rewards/margins": 3.8676795959472656, + "rewards/rejected": -3.0222716331481934, + "step": 410 + }, + { + "epoch": 0.26, + "learning_rate": 8.659232694898307e-08, + "logits/chosen": -3.296271800994873, + "logits/rejected": -3.1282029151916504, + "logps/chosen": -242.1750030517578, + "logps/rejected": -1569.110595703125, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8090301752090454, + "rewards/margins": 5.683239936828613, + "rewards/rejected": -4.874209403991699, + "step": 411 + }, + { + "epoch": 0.26, + "learning_rate": 8.652187091988516e-08, + "logits/chosen": -3.2465109825134277, + "logits/rejected": -3.1299073696136475, + "logps/chosen": -256.95269775390625, + "logps/rejected": -663.001708984375, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7346442937850952, + "rewards/margins": 3.1538002490997314, + "rewards/rejected": -2.4191558361053467, + "step": 412 + }, + { + "epoch": 0.26, + "learning_rate": 8.645125908119892e-08, + "logits/chosen": -3.196105480194092, + "logits/rejected": -3.118927478790283, + "logps/chosen": -284.83319091796875, + "logps/rejected": -545.7821044921875, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7827240228652954, + "rewards/margins": 2.4996583461761475, + "rewards/rejected": -1.7169342041015625, + "step": 413 + }, + { + "epoch": 0.26, + "learning_rate": 8.638049173416855e-08, + "logits/chosen": -3.1731395721435547, + "logits/rejected": -3.048543930053711, + "logps/chosen": -246.98593139648438, + "logps/rejected": -437.69805908203125, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7571342587471008, + "rewards/margins": 2.2362723350524902, + "rewards/rejected": -1.4791382551193237, + "step": 414 + }, + { + "epoch": 0.26, + "learning_rate": 8.630956918070166e-08, + "logits/chosen": -3.241422653198242, + "logits/rejected": -3.1507039070129395, + "logps/chosen": -249.99676513671875, + "logps/rejected": -397.6800231933594, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8303818106651306, + "rewards/margins": 2.4590682983398438, + "rewards/rejected": -1.628686547279358, + "step": 415 + }, + { + "epoch": 0.27, + "learning_rate": 8.623849172336806e-08, + "logits/chosen": -3.182929515838623, + "logits/rejected": -3.0997331142425537, + "logps/chosen": -257.02569580078125, + "logps/rejected": -1414.1915283203125, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8289077877998352, + "rewards/margins": 5.6570940017700195, + "rewards/rejected": -4.82818603515625, + "step": 416 + }, + { + "epoch": 0.27, + "learning_rate": 8.61672596653983e-08, + "logits/chosen": -3.2732272148132324, + "logits/rejected": -3.084160804748535, + "logps/chosen": -250.91964721679688, + "logps/rejected": -839.9296875, + "loss": 0.3476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8075004816055298, + "rewards/margins": 4.142778396606445, + "rewards/rejected": -3.335278272628784, + "step": 417 + }, + { + "epoch": 0.27, + "learning_rate": 8.60958733106826e-08, + "logits/chosen": -3.3014349937438965, + "logits/rejected": -3.187865734100342, + "logps/chosen": -270.8141784667969, + "logps/rejected": -3991.976318359375, + "loss": 0.3479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9049652218818665, + "rewards/margins": 9.028076171875, + "rewards/rejected": -8.1231107711792, + "step": 418 + }, + { + "epoch": 0.27, + "learning_rate": 8.602433296376938e-08, + "logits/chosen": -3.197889804840088, + "logits/rejected": -3.0188138484954834, + "logps/chosen": -246.44969177246094, + "logps/rejected": -400.2145080566406, + "loss": 0.345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8497276306152344, + "rewards/margins": 2.1024270057678223, + "rewards/rejected": -1.252699375152588, + "step": 419 + }, + { + "epoch": 0.27, + "learning_rate": 8.595263892986402e-08, + "logits/chosen": -3.2945992946624756, + "logits/rejected": -3.115190267562866, + "logps/chosen": -277.45440673828125, + "logps/rejected": -1133.80859375, + "loss": 0.3088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.79168701171875, + "rewards/margins": 4.599569797515869, + "rewards/rejected": -3.807882785797119, + "step": 420 + }, + { + "epoch": 0.27, + "learning_rate": 8.588079151482756e-08, + "logits/chosen": -3.2179336547851562, + "logits/rejected": -3.061840534210205, + "logps/chosen": -262.49554443359375, + "logps/rejected": -642.206298828125, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.729626476764679, + "rewards/margins": 2.9988815784454346, + "rewards/rejected": -2.2692551612854004, + "step": 421 + }, + { + "epoch": 0.27, + "learning_rate": 8.580879102517547e-08, + "logits/chosen": -3.1695361137390137, + "logits/rejected": -3.0697474479675293, + "logps/chosen": -254.88230895996094, + "logps/rejected": -690.440185546875, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7701309323310852, + "rewards/margins": 2.707022190093994, + "rewards/rejected": -1.9368913173675537, + "step": 422 + }, + { + "epoch": 0.27, + "learning_rate": 8.573663776807614e-08, + "logits/chosen": -3.1680023670196533, + "logits/rejected": -3.1243128776550293, + "logps/chosen": -259.9607849121094, + "logps/rejected": -743.702392578125, + "loss": 0.3158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7195960879325867, + "rewards/margins": 3.5893259048461914, + "rewards/rejected": -2.869729518890381, + "step": 423 + }, + { + "epoch": 0.27, + "learning_rate": 8.56643320513498e-08, + "logits/chosen": -3.240809917449951, + "logits/rejected": -3.112069606781006, + "logps/chosen": -243.00375366210938, + "logps/rejected": -501.3087158203125, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9249046444892883, + "rewards/margins": 2.8876137733459473, + "rewards/rejected": -1.9627090692520142, + "step": 424 + }, + { + "epoch": 0.27, + "learning_rate": 8.559187418346703e-08, + "logits/chosen": -3.306999683380127, + "logits/rejected": -3.0954651832580566, + "logps/chosen": -242.01187133789062, + "logps/rejected": -719.659423828125, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6299186944961548, + "rewards/margins": 3.3323326110839844, + "rewards/rejected": -2.702414035797119, + "step": 425 + }, + { + "epoch": 0.27, + "learning_rate": 8.551926447354758e-08, + "logits/chosen": -3.196887969970703, + "logits/rejected": -3.1599221229553223, + "logps/chosen": -262.22882080078125, + "logps/rejected": -636.8026123046875, + "loss": 0.3369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.897351861000061, + "rewards/margins": 3.6628000736236572, + "rewards/rejected": -2.7654480934143066, + "step": 426 + }, + { + "epoch": 0.27, + "learning_rate": 8.544650323135896e-08, + "logits/chosen": -3.241366386413574, + "logits/rejected": -3.116771697998047, + "logps/chosen": -215.057861328125, + "logps/rejected": -614.6365966796875, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.708966851234436, + "rewards/margins": 3.3596839904785156, + "rewards/rejected": -2.650717258453369, + "step": 427 + }, + { + "epoch": 0.27, + "learning_rate": 8.537359076731512e-08, + "logits/chosen": -3.2675793170928955, + "logits/rejected": -3.1554713249206543, + "logps/chosen": -253.41812133789062, + "logps/rejected": -251.138671875, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0480735301971436, + "rewards/margins": 1.848813533782959, + "rewards/rejected": -0.8007400631904602, + "step": 428 + }, + { + "epoch": 0.27, + "learning_rate": 8.530052739247521e-08, + "logits/chosen": -3.2787487506866455, + "logits/rejected": -3.0496225357055664, + "logps/chosen": -278.8421936035156, + "logps/rejected": -453.8553466796875, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7828400135040283, + "rewards/margins": 2.4278182983398438, + "rewards/rejected": -1.644978404045105, + "step": 429 + }, + { + "epoch": 0.27, + "learning_rate": 8.522731341854215e-08, + "logits/chosen": -3.232790470123291, + "logits/rejected": -3.1057958602905273, + "logps/chosen": -237.82960510253906, + "logps/rejected": -1337.1163330078125, + "loss": 0.3158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8066993951797485, + "rewards/margins": 4.982611656188965, + "rewards/rejected": -4.175912380218506, + "step": 430 + }, + { + "epoch": 0.27, + "learning_rate": 8.515394915786135e-08, + "logits/chosen": -3.2021596431732178, + "logits/rejected": -3.1579177379608154, + "logps/chosen": -313.05389404296875, + "logps/rejected": -610.0084838867188, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6698868274688721, + "rewards/margins": 2.6247284412384033, + "rewards/rejected": -1.9548416137695312, + "step": 431 + }, + { + "epoch": 0.28, + "learning_rate": 8.508043492341943e-08, + "logits/chosen": -3.173976421356201, + "logits/rejected": -3.0203194618225098, + "logps/chosen": -289.51153564453125, + "logps/rejected": -796.1836547851562, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6921188831329346, + "rewards/margins": 2.7725448608398438, + "rewards/rejected": -2.080425977706909, + "step": 432 + }, + { + "epoch": 0.28, + "learning_rate": 8.500677102884273e-08, + "logits/chosen": -3.228431224822998, + "logits/rejected": -3.106738567352295, + "logps/chosen": -273.86065673828125, + "logps/rejected": -652.41796875, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8169891834259033, + "rewards/margins": 3.3840575218200684, + "rewards/rejected": -2.567068576812744, + "step": 433 + }, + { + "epoch": 0.28, + "learning_rate": 8.493295778839613e-08, + "logits/chosen": -3.191102981567383, + "logits/rejected": -3.1187872886657715, + "logps/chosen": -300.3963317871094, + "logps/rejected": -925.5484619140625, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7323501706123352, + "rewards/margins": 4.210020542144775, + "rewards/rejected": -3.477670192718506, + "step": 434 + }, + { + "epoch": 0.28, + "learning_rate": 8.485899551698166e-08, + "logits/chosen": -3.1820054054260254, + "logits/rejected": -3.0564568042755127, + "logps/chosen": -303.4354553222656, + "logps/rejected": -1129.7261962890625, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8050201535224915, + "rewards/margins": 4.979531764984131, + "rewards/rejected": -4.174511909484863, + "step": 435 + }, + { + "epoch": 0.28, + "learning_rate": 8.478488453013712e-08, + "logits/chosen": -3.309760093688965, + "logits/rejected": -3.1548991203308105, + "logps/chosen": -263.97796630859375, + "logps/rejected": -377.09185791015625, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8812347650527954, + "rewards/margins": 2.5561113357543945, + "rewards/rejected": -1.6748764514923096, + "step": 436 + }, + { + "epoch": 0.28, + "learning_rate": 8.471062514403478e-08, + "logits/chosen": -3.229891777038574, + "logits/rejected": -3.1461563110351562, + "logps/chosen": -257.82183837890625, + "logps/rejected": -629.8749389648438, + "loss": 0.3328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.689252495765686, + "rewards/margins": 2.8879952430725098, + "rewards/rejected": -2.198742628097534, + "step": 437 + }, + { + "epoch": 0.28, + "learning_rate": 8.463621767547997e-08, + "logits/chosen": -3.247783660888672, + "logits/rejected": -3.130075454711914, + "logps/chosen": -262.02545166015625, + "logps/rejected": -613.6490478515625, + "loss": 0.3472, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8932793140411377, + "rewards/margins": 3.441016435623169, + "rewards/rejected": -2.5477371215820312, + "step": 438 + }, + { + "epoch": 0.28, + "learning_rate": 8.456166244190981e-08, + "logits/chosen": -3.1902997493743896, + "logits/rejected": -3.066561222076416, + "logps/chosen": -253.83053588867188, + "logps/rejected": -1329.0164794921875, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8097556829452515, + "rewards/margins": 5.196324825286865, + "rewards/rejected": -4.386569499969482, + "step": 439 + }, + { + "epoch": 0.28, + "learning_rate": 8.44869597613918e-08, + "logits/chosen": -3.184349536895752, + "logits/rejected": -3.108776092529297, + "logps/chosen": -259.64178466796875, + "logps/rejected": -357.25640869140625, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8862556219100952, + "rewards/margins": 2.2680063247680664, + "rewards/rejected": -1.381750464439392, + "step": 440 + }, + { + "epoch": 0.28, + "learning_rate": 8.441210995262249e-08, + "logits/chosen": -3.194333553314209, + "logits/rejected": -3.113084316253662, + "logps/chosen": -251.19117736816406, + "logps/rejected": -491.60504150390625, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7198470830917358, + "rewards/margins": 2.4547410011291504, + "rewards/rejected": -1.734893798828125, + "step": 441 + }, + { + "epoch": 0.28, + "learning_rate": 8.433711333492607e-08, + "logits/chosen": -3.221284866333008, + "logits/rejected": -3.0981736183166504, + "logps/chosen": -260.03076171875, + "logps/rejected": -621.8023071289062, + "loss": 0.3143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9547179937362671, + "rewards/margins": 3.05328369140625, + "rewards/rejected": -2.0985655784606934, + "step": 442 + }, + { + "epoch": 0.28, + "learning_rate": 8.426197022825313e-08, + "logits/chosen": -3.1894426345825195, + "logits/rejected": -3.180811643600464, + "logps/chosen": -256.6485900878906, + "logps/rejected": -788.4493408203125, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9399467706680298, + "rewards/margins": 4.375832557678223, + "rewards/rejected": -3.4358856678009033, + "step": 443 + }, + { + "epoch": 0.28, + "learning_rate": 8.418668095317911e-08, + "logits/chosen": -3.3002443313598633, + "logits/rejected": -3.0366008281707764, + "logps/chosen": -275.41217041015625, + "logps/rejected": -682.3994140625, + "loss": 0.3381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8504234552383423, + "rewards/margins": 2.900270938873291, + "rewards/rejected": -2.049847364425659, + "step": 444 + }, + { + "epoch": 0.28, + "learning_rate": 8.411124583090308e-08, + "logits/chosen": -3.2018613815307617, + "logits/rejected": -3.05545711517334, + "logps/chosen": -270.8179016113281, + "logps/rejected": -461.0833435058594, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.762397050857544, + "rewards/margins": 2.586275577545166, + "rewards/rejected": -1.823878526687622, + "step": 445 + }, + { + "epoch": 0.28, + "learning_rate": 8.403566518324634e-08, + "logits/chosen": -3.235353469848633, + "logits/rejected": -3.183715343475342, + "logps/chosen": -265.96368408203125, + "logps/rejected": -434.42803955078125, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9737045764923096, + "rewards/margins": 2.948927402496338, + "rewards/rejected": -1.9752228260040283, + "step": 446 + }, + { + "epoch": 0.28, + "learning_rate": 8.395993933265101e-08, + "logits/chosen": -3.1960673332214355, + "logits/rejected": -3.0767056941986084, + "logps/chosen": -289.47900390625, + "logps/rejected": -841.3126220703125, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0201020240783691, + "rewards/margins": 4.1690826416015625, + "rewards/rejected": -3.1489806175231934, + "step": 447 + }, + { + "epoch": 0.29, + "learning_rate": 8.388406860217867e-08, + "logits/chosen": -3.2029876708984375, + "logits/rejected": -3.1111903190612793, + "logps/chosen": -278.0013427734375, + "logps/rejected": -381.63177490234375, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.84912109375, + "rewards/margins": 2.2084107398986816, + "rewards/rejected": -1.359289526939392, + "step": 448 + }, + { + "epoch": 0.29, + "learning_rate": 8.3808053315509e-08, + "logits/chosen": -3.2464406490325928, + "logits/rejected": -3.101053237915039, + "logps/chosen": -288.3204345703125, + "logps/rejected": -877.0452270507812, + "loss": 0.2836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7808640003204346, + "rewards/margins": 3.866581916809082, + "rewards/rejected": -3.0857176780700684, + "step": 449 + }, + { + "epoch": 0.29, + "learning_rate": 8.373189379693837e-08, + "logits/chosen": -3.192490816116333, + "logits/rejected": -3.0586307048797607, + "logps/chosen": -256.4227600097656, + "logps/rejected": -631.013427734375, + "loss": 0.3366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.92803955078125, + "rewards/margins": 3.5205445289611816, + "rewards/rejected": -2.5925049781799316, + "step": 450 + }, + { + "epoch": 0.29, + "learning_rate": 8.36555903713785e-08, + "logits/chosen": -3.204375743865967, + "logits/rejected": -3.119520425796509, + "logps/chosen": -260.7015380859375, + "logps/rejected": -647.12744140625, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7936310172080994, + "rewards/margins": 3.354217529296875, + "rewards/rejected": -2.560586452484131, + "step": 451 + }, + { + "epoch": 0.29, + "learning_rate": 8.357914336435503e-08, + "logits/chosen": -3.246866226196289, + "logits/rejected": -3.110795497894287, + "logps/chosen": -271.98876953125, + "logps/rejected": -646.5123291015625, + "loss": 0.326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.891345202922821, + "rewards/margins": 3.591885566711426, + "rewards/rejected": -2.700540065765381, + "step": 452 + }, + { + "epoch": 0.29, + "learning_rate": 8.350255310200611e-08, + "logits/chosen": -3.2070422172546387, + "logits/rejected": -3.0950262546539307, + "logps/chosen": -264.67376708984375, + "logps/rejected": -1589.4371337890625, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9510040283203125, + "rewards/margins": 6.422009468078613, + "rewards/rejected": -5.471005439758301, + "step": 453 + }, + { + "epoch": 0.29, + "learning_rate": 8.342581991108112e-08, + "logits/chosen": -3.3065876960754395, + "logits/rejected": -3.044698715209961, + "logps/chosen": -262.2351989746094, + "logps/rejected": -962.0645751953125, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8951881527900696, + "rewards/margins": 3.804574728012085, + "rewards/rejected": -2.909386396408081, + "step": 454 + }, + { + "epoch": 0.29, + "learning_rate": 8.334894411893913e-08, + "logits/chosen": -3.2423715591430664, + "logits/rejected": -3.1715500354766846, + "logps/chosen": -264.6112365722656, + "logps/rejected": -1287.2357177734375, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7900505065917969, + "rewards/margins": 6.01552677154541, + "rewards/rejected": -5.225476264953613, + "step": 455 + }, + { + "epoch": 0.29, + "learning_rate": 8.327192605354765e-08, + "logits/chosen": -3.22629976272583, + "logits/rejected": -3.02327299118042, + "logps/chosen": -252.02767944335938, + "logps/rejected": -1246.688720703125, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7259575128555298, + "rewards/margins": 5.431679725646973, + "rewards/rejected": -4.705721855163574, + "step": 456 + }, + { + "epoch": 0.29, + "learning_rate": 8.319476604348106e-08, + "logits/chosen": -3.2154674530029297, + "logits/rejected": -3.194767951965332, + "logps/chosen": -291.2386474609375, + "logps/rejected": -753.2738647460938, + "loss": 0.3129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9142929315567017, + "rewards/margins": 3.887392044067383, + "rewards/rejected": -2.9730987548828125, + "step": 457 + }, + { + "epoch": 0.29, + "learning_rate": 8.31174644179194e-08, + "logits/chosen": -3.2466516494750977, + "logits/rejected": -3.162430763244629, + "logps/chosen": -228.81231689453125, + "logps/rejected": -449.1938171386719, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.906354546546936, + "rewards/margins": 2.6830482482910156, + "rewards/rejected": -1.7766938209533691, + "step": 458 + }, + { + "epoch": 0.29, + "learning_rate": 8.304002150664682e-08, + "logits/chosen": -3.23117995262146, + "logits/rejected": -3.1676158905029297, + "logps/chosen": -261.32379150390625, + "logps/rejected": -786.8329467773438, + "loss": 0.3394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8769447207450867, + "rewards/margins": 4.23927116394043, + "rewards/rejected": -3.362326145172119, + "step": 459 + }, + { + "epoch": 0.29, + "learning_rate": 8.296243764005022e-08, + "logits/chosen": -3.247532606124878, + "logits/rejected": -3.0493321418762207, + "logps/chosen": -222.93081665039062, + "logps/rejected": -815.806884765625, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0441231727600098, + "rewards/margins": 3.6818885803222656, + "rewards/rejected": -2.637765407562256, + "step": 460 + }, + { + "epoch": 0.29, + "learning_rate": 8.288471314911786e-08, + "logits/chosen": -3.2415523529052734, + "logits/rejected": -3.2095417976379395, + "logps/chosen": -236.67739868164062, + "logps/rejected": -961.5128173828125, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7469062805175781, + "rewards/margins": 4.685535430908203, + "rewards/rejected": -3.938629150390625, + "step": 461 + }, + { + "epoch": 0.29, + "learning_rate": 8.280684836543793e-08, + "logits/chosen": -3.276689052581787, + "logits/rejected": -3.158508777618408, + "logps/chosen": -278.25634765625, + "logps/rejected": -435.9942932128906, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9819602966308594, + "rewards/margins": 2.5445122718811035, + "rewards/rejected": -1.5625518560409546, + "step": 462 + }, + { + "epoch": 0.3, + "learning_rate": 8.272884362119711e-08, + "logits/chosen": -3.2962234020233154, + "logits/rejected": -3.1869232654571533, + "logps/chosen": -309.333251953125, + "logps/rejected": -686.3385620117188, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9538818597793579, + "rewards/margins": 3.591203451156616, + "rewards/rejected": -2.6373214721679688, + "step": 463 + }, + { + "epoch": 0.3, + "learning_rate": 8.265069924917925e-08, + "logits/chosen": -3.2021660804748535, + "logits/rejected": -3.0500435829162598, + "logps/chosen": -287.169677734375, + "logps/rejected": -406.40191650390625, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8145347833633423, + "rewards/margins": 2.116069793701172, + "rewards/rejected": -1.3015351295471191, + "step": 464 + }, + { + "epoch": 0.3, + "learning_rate": 8.257241558276379e-08, + "logits/chosen": -3.1344170570373535, + "logits/rejected": -3.102492332458496, + "logps/chosen": -240.76426696777344, + "logps/rejected": -482.81207275390625, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9709281921386719, + "rewards/margins": 2.994243621826172, + "rewards/rejected": -2.0233154296875, + "step": 465 + }, + { + "epoch": 0.3, + "learning_rate": 8.24939929559245e-08, + "logits/chosen": -3.2346742153167725, + "logits/rejected": -3.0518195629119873, + "logps/chosen": -294.3301086425781, + "logps/rejected": -552.576904296875, + "loss": 0.3328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7968429327011108, + "rewards/margins": 2.9558136463165283, + "rewards/rejected": -2.158970594406128, + "step": 466 + }, + { + "epoch": 0.3, + "learning_rate": 8.241543170322793e-08, + "logits/chosen": -3.242182731628418, + "logits/rejected": -3.1231961250305176, + "logps/chosen": -241.02255249023438, + "logps/rejected": -435.5545349121094, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8392174243927002, + "rewards/margins": 2.4854698181152344, + "rewards/rejected": -1.6462523937225342, + "step": 467 + }, + { + "epoch": 0.3, + "learning_rate": 8.233673215983206e-08, + "logits/chosen": -3.2572336196899414, + "logits/rejected": -3.1619043350219727, + "logps/chosen": -280.80047607421875, + "logps/rejected": -948.4092407226562, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9132782220840454, + "rewards/margins": 5.309811592102051, + "rewards/rejected": -4.396533012390137, + "step": 468 + }, + { + "epoch": 0.3, + "learning_rate": 8.225789466148486e-08, + "logits/chosen": -3.197516918182373, + "logits/rejected": -3.0661447048187256, + "logps/chosen": -260.4925537109375, + "logps/rejected": -783.745849609375, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8942168951034546, + "rewards/margins": 3.556657314300537, + "rewards/rejected": -2.662440538406372, + "step": 469 + }, + { + "epoch": 0.3, + "learning_rate": 8.217891954452281e-08, + "logits/chosen": -3.2294301986694336, + "logits/rejected": -3.104641914367676, + "logps/chosen": -279.87933349609375, + "logps/rejected": -481.083251953125, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7941192388534546, + "rewards/margins": 2.634103298187256, + "rewards/rejected": -1.8399841785430908, + "step": 470 + }, + { + "epoch": 0.3, + "learning_rate": 8.209980714586954e-08, + "logits/chosen": -3.1802966594696045, + "logits/rejected": -3.0910696983337402, + "logps/chosen": -298.6252746582031, + "logps/rejected": -585.399169921875, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8335800170898438, + "rewards/margins": 2.9357709884643555, + "rewards/rejected": -2.102191209793091, + "step": 471 + }, + { + "epoch": 0.3, + "learning_rate": 8.202055780303431e-08, + "logits/chosen": -3.2273144721984863, + "logits/rejected": -3.120029926300049, + "logps/chosen": -277.7674255371094, + "logps/rejected": -721.1399536132812, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8239395618438721, + "rewards/margins": 4.166945457458496, + "rewards/rejected": -3.343005657196045, + "step": 472 + }, + { + "epoch": 0.3, + "learning_rate": 8.194117185411063e-08, + "logits/chosen": -3.2441070079803467, + "logits/rejected": -3.1841607093811035, + "logps/chosen": -268.97698974609375, + "logps/rejected": -624.8046875, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9642196893692017, + "rewards/margins": 3.7098617553710938, + "rewards/rejected": -2.7456421852111816, + "step": 473 + }, + { + "epoch": 0.3, + "learning_rate": 8.18616496377748e-08, + "logits/chosen": -3.268141031265259, + "logits/rejected": -3.10306715965271, + "logps/chosen": -243.2742919921875, + "logps/rejected": -1111.745361328125, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0710029602050781, + "rewards/margins": 5.014632225036621, + "rewards/rejected": -3.9436287879943848, + "step": 474 + }, + { + "epoch": 0.3, + "learning_rate": 8.178199149328446e-08, + "logits/chosen": -3.218759775161743, + "logits/rejected": -3.1685662269592285, + "logps/chosen": -235.4051971435547, + "logps/rejected": -625.091064453125, + "loss": 0.3362, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0223854780197144, + "rewards/margins": 3.9470527172088623, + "rewards/rejected": -2.9246673583984375, + "step": 475 + }, + { + "epoch": 0.3, + "learning_rate": 8.170219776047715e-08, + "logits/chosen": -3.222189426422119, + "logits/rejected": -3.130654811859131, + "logps/chosen": -254.97869873046875, + "logps/rejected": -602.4218139648438, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8626807928085327, + "rewards/margins": 3.289414405822754, + "rewards/rejected": -2.4267334938049316, + "step": 476 + }, + { + "epoch": 0.3, + "learning_rate": 8.162226877976885e-08, + "logits/chosen": -3.1753172874450684, + "logits/rejected": -3.0904133319854736, + "logps/chosen": -289.74017333984375, + "logps/rejected": -840.0755615234375, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0565879344940186, + "rewards/margins": 4.538838863372803, + "rewards/rejected": -3.482250928878784, + "step": 477 + }, + { + "epoch": 0.3, + "learning_rate": 8.154220489215255e-08, + "logits/chosen": -3.2768478393554688, + "logits/rejected": -3.1496829986572266, + "logps/chosen": -226.74588012695312, + "logps/rejected": -746.61572265625, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9018638730049133, + "rewards/margins": 4.107720375061035, + "rewards/rejected": -3.2058563232421875, + "step": 478 + }, + { + "epoch": 0.31, + "learning_rate": 8.146200643919676e-08, + "logits/chosen": -3.2825725078582764, + "logits/rejected": -3.1205248832702637, + "logps/chosen": -247.45794677734375, + "logps/rejected": -655.0791625976562, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9103172421455383, + "rewards/margins": 3.6316308975219727, + "rewards/rejected": -2.7213134765625, + "step": 479 + }, + { + "epoch": 0.31, + "learning_rate": 8.13816737630441e-08, + "logits/chosen": -3.26187801361084, + "logits/rejected": -3.106823444366455, + "logps/chosen": -299.3409423828125, + "logps/rejected": -616.8367919921875, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9206116199493408, + "rewards/margins": 3.582089424133301, + "rewards/rejected": -2.661477565765381, + "step": 480 + }, + { + "epoch": 0.31, + "learning_rate": 8.130120720640976e-08, + "logits/chosen": -3.28501558303833, + "logits/rejected": -3.1210646629333496, + "logps/chosen": -211.93890380859375, + "logps/rejected": -842.0263061523438, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0528221130371094, + "rewards/margins": 4.836675643920898, + "rewards/rejected": -3.783853054046631, + "step": 481 + }, + { + "epoch": 0.31, + "learning_rate": 8.122060711258017e-08, + "logits/chosen": -3.229231834411621, + "logits/rejected": -3.103806495666504, + "logps/chosen": -251.47073364257812, + "logps/rejected": -710.119873046875, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8037475943565369, + "rewards/margins": 4.036764621734619, + "rewards/rejected": -3.2330169677734375, + "step": 482 + }, + { + "epoch": 0.31, + "learning_rate": 8.113987382541136e-08, + "logits/chosen": -3.202422857284546, + "logits/rejected": -3.1331162452697754, + "logps/chosen": -265.6534118652344, + "logps/rejected": -567.2164306640625, + "loss": 0.3046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.007502794265747, + "rewards/margins": 3.086879014968872, + "rewards/rejected": -2.079376220703125, + "step": 483 + }, + { + "epoch": 0.31, + "learning_rate": 8.105900768932768e-08, + "logits/chosen": -3.2090253829956055, + "logits/rejected": -3.129042148590088, + "logps/chosen": -297.344482421875, + "logps/rejected": -751.6956787109375, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0113861560821533, + "rewards/margins": 3.800097703933716, + "rewards/rejected": -2.7887115478515625, + "step": 484 + }, + { + "epoch": 0.31, + "learning_rate": 8.097800904932018e-08, + "logits/chosen": -3.2018394470214844, + "logits/rejected": -3.1831095218658447, + "logps/chosen": -249.44876098632812, + "logps/rejected": -721.654541015625, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.930389404296875, + "rewards/margins": 4.559140205383301, + "rewards/rejected": -3.628750801086426, + "step": 485 + }, + { + "epoch": 0.31, + "learning_rate": 8.089687825094523e-08, + "logits/chosen": -3.282383918762207, + "logits/rejected": -3.104196071624756, + "logps/chosen": -253.09165954589844, + "logps/rejected": -507.15948486328125, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8521378040313721, + "rewards/margins": 2.761441230773926, + "rewards/rejected": -1.9093034267425537, + "step": 486 + }, + { + "epoch": 0.31, + "learning_rate": 8.081561564032302e-08, + "logits/chosen": -3.2517850399017334, + "logits/rejected": -3.1439132690429688, + "logps/chosen": -257.7375793457031, + "logps/rejected": -995.4550170898438, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8721611499786377, + "rewards/margins": 4.807686805725098, + "rewards/rejected": -3.93552565574646, + "step": 487 + }, + { + "epoch": 0.31, + "learning_rate": 8.073422156413603e-08, + "logits/chosen": -3.223468065261841, + "logits/rejected": -3.039419174194336, + "logps/chosen": -245.85337829589844, + "logps/rejected": -554.776123046875, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.89556884765625, + "rewards/margins": 2.572850227355957, + "rewards/rejected": -1.6772812604904175, + "step": 488 + }, + { + "epoch": 0.31, + "learning_rate": 8.065269636962764e-08, + "logits/chosen": -3.256718635559082, + "logits/rejected": -3.1716885566711426, + "logps/chosen": -232.14389038085938, + "logps/rejected": -499.91485595703125, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8259071707725525, + "rewards/margins": 3.18643856048584, + "rewards/rejected": -2.3605315685272217, + "step": 489 + }, + { + "epoch": 0.31, + "learning_rate": 8.057104040460061e-08, + "logits/chosen": -3.236072063446045, + "logits/rejected": -3.0779943466186523, + "logps/chosen": -246.17271423339844, + "logps/rejected": -676.7431640625, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.82879638671875, + "rewards/margins": 3.058074951171875, + "rewards/rejected": -2.229278564453125, + "step": 490 + }, + { + "epoch": 0.31, + "learning_rate": 8.048925401741555e-08, + "logits/chosen": -3.27371883392334, + "logits/rejected": -3.1650474071502686, + "logps/chosen": -261.25860595703125, + "logps/rejected": -534.2677612304688, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0123367309570312, + "rewards/margins": 3.272369384765625, + "rewards/rejected": -2.2600326538085938, + "step": 491 + }, + { + "epoch": 0.31, + "learning_rate": 8.040733755698953e-08, + "logits/chosen": -3.2504160404205322, + "logits/rejected": -3.1866726875305176, + "logps/chosen": -266.3553161621094, + "logps/rejected": -666.4720458984375, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8463989496231079, + "rewards/margins": 3.920635938644409, + "rewards/rejected": -3.074237108230591, + "step": 492 + }, + { + "epoch": 0.31, + "learning_rate": 8.032529137279451e-08, + "logits/chosen": -3.2457828521728516, + "logits/rejected": -3.0227434635162354, + "logps/chosen": -267.01214599609375, + "logps/rejected": -1183.56787109375, + "loss": 0.297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8538970947265625, + "rewards/margins": 4.626486301422119, + "rewards/rejected": -3.7725892066955566, + "step": 493 + }, + { + "epoch": 0.31, + "learning_rate": 8.024311581485588e-08, + "logits/chosen": -3.251615047454834, + "logits/rejected": -3.143819570541382, + "logps/chosen": -271.641845703125, + "logps/rejected": -651.0635375976562, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.004213809967041, + "rewards/margins": 3.6586756706237793, + "rewards/rejected": -2.6544618606567383, + "step": 494 + }, + { + "epoch": 0.32, + "learning_rate": 8.016081123375097e-08, + "logits/chosen": -3.213390350341797, + "logits/rejected": -3.0982372760772705, + "logps/chosen": -243.90516662597656, + "logps/rejected": -595.5830688476562, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6703781485557556, + "rewards/margins": 2.975332736968994, + "rewards/rejected": -2.3049545288085938, + "step": 495 + }, + { + "epoch": 0.32, + "learning_rate": 8.007837798060753e-08, + "logits/chosen": -3.170309543609619, + "logits/rejected": -3.088820457458496, + "logps/chosen": -238.2137908935547, + "logps/rejected": -591.6632080078125, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9846878051757812, + "rewards/margins": 3.2795228958129883, + "rewards/rejected": -2.294834852218628, + "step": 496 + }, + { + "epoch": 0.32, + "learning_rate": 7.999581640710229e-08, + "logits/chosen": -3.2185304164886475, + "logits/rejected": -3.0916738510131836, + "logps/chosen": -264.068603515625, + "logps/rejected": -591.3475341796875, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7669563293457031, + "rewards/margins": 3.016934871673584, + "rewards/rejected": -2.249978542327881, + "step": 497 + }, + { + "epoch": 0.32, + "learning_rate": 7.991312686545937e-08, + "logits/chosen": -3.1700499057769775, + "logits/rejected": -3.1137561798095703, + "logps/chosen": -247.02880859375, + "logps/rejected": -497.03985595703125, + "loss": 0.3135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9556908011436462, + "rewards/margins": 3.05043363571167, + "rewards/rejected": -2.0947425365448, + "step": 498 + }, + { + "epoch": 0.32, + "learning_rate": 7.983030970844886e-08, + "logits/chosen": -3.2018017768859863, + "logits/rejected": -3.144883632659912, + "logps/chosen": -275.4613037109375, + "logps/rejected": -634.5153198242188, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9418861269950867, + "rewards/margins": 3.9860846996307373, + "rewards/rejected": -3.044198513031006, + "step": 499 + }, + { + "epoch": 0.32, + "learning_rate": 7.97473652893853e-08, + "logits/chosen": -3.243269443511963, + "logits/rejected": -3.110860824584961, + "logps/chosen": -282.032958984375, + "logps/rejected": -372.31201171875, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9730972051620483, + "rewards/margins": 2.1806230545043945, + "rewards/rejected": -1.2075257301330566, + "step": 500 + }, + { + "epoch": 0.32, + "learning_rate": 7.96642939621261e-08, + "logits/chosen": -3.238095760345459, + "logits/rejected": -3.021078586578369, + "logps/chosen": -213.09469604492188, + "logps/rejected": -1389.2373046875, + "loss": 0.332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8247085809707642, + "rewards/margins": 5.242323398590088, + "rewards/rejected": -4.417614936828613, + "step": 501 + }, + { + "epoch": 0.32, + "learning_rate": 7.958109608107013e-08, + "logits/chosen": -3.191554069519043, + "logits/rejected": -3.17812442779541, + "logps/chosen": -259.2890625, + "logps/rejected": -1050.0577392578125, + "loss": 0.3285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8380440473556519, + "rewards/margins": 5.476309776306152, + "rewards/rejected": -4.638266086578369, + "step": 502 + }, + { + "epoch": 0.32, + "learning_rate": 7.949777200115616e-08, + "logits/chosen": -3.1802639961242676, + "logits/rejected": -3.090729236602783, + "logps/chosen": -250.95114135742188, + "logps/rejected": -867.0344848632812, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8933937549591064, + "rewards/margins": 4.24549674987793, + "rewards/rejected": -3.352102756500244, + "step": 503 + }, + { + "epoch": 0.32, + "learning_rate": 7.94143220778613e-08, + "logits/chosen": -3.2129406929016113, + "logits/rejected": -3.139857769012451, + "logps/chosen": -276.60595703125, + "logps/rejected": -626.4881591796875, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9139518737792969, + "rewards/margins": 3.888213634490967, + "rewards/rejected": -2.974261522293091, + "step": 504 + }, + { + "epoch": 0.32, + "learning_rate": 7.933074666719961e-08, + "logits/chosen": -3.2034687995910645, + "logits/rejected": -3.1445045471191406, + "logps/chosen": -271.6267395019531, + "logps/rejected": -493.17962646484375, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0722236633300781, + "rewards/margins": 2.892937660217285, + "rewards/rejected": -1.820713758468628, + "step": 505 + }, + { + "epoch": 0.32, + "learning_rate": 7.924704612572047e-08, + "logits/chosen": -3.253660202026367, + "logits/rejected": -3.0539746284484863, + "logps/chosen": -331.7495422363281, + "logps/rejected": -880.5789184570312, + "loss": 0.344, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.031732201576233, + "rewards/margins": 4.699579238891602, + "rewards/rejected": -3.6678466796875, + "step": 506 + }, + { + "epoch": 0.32, + "learning_rate": 7.916322081050708e-08, + "logits/chosen": -3.2222445011138916, + "logits/rejected": -3.126828193664551, + "logps/chosen": -260.93768310546875, + "logps/rejected": -771.7396240234375, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9749466180801392, + "rewards/margins": 4.40568733215332, + "rewards/rejected": -3.4307403564453125, + "step": 507 + }, + { + "epoch": 0.32, + "learning_rate": 7.907927107917494e-08, + "logits/chosen": -3.261340618133545, + "logits/rejected": -3.13132381439209, + "logps/chosen": -252.41168212890625, + "logps/rejected": -226.0602264404297, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0545905828475952, + "rewards/margins": 1.6815086603164673, + "rewards/rejected": -0.6269180774688721, + "step": 508 + }, + { + "epoch": 0.32, + "learning_rate": 7.89951972898704e-08, + "logits/chosen": -3.223916530609131, + "logits/rejected": -3.1024391651153564, + "logps/chosen": -274.6551513671875, + "logps/rejected": -1106.889892578125, + "loss": 0.3043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9876724481582642, + "rewards/margins": 4.968507766723633, + "rewards/rejected": -3.9808349609375, + "step": 509 + }, + { + "epoch": 0.33, + "learning_rate": 7.891099980126898e-08, + "logits/chosen": -3.1831958293914795, + "logits/rejected": -3.1031322479248047, + "logps/chosen": -271.98809814453125, + "logps/rejected": -439.129638671875, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0244492292404175, + "rewards/margins": 2.6512513160705566, + "rewards/rejected": -1.6268019676208496, + "step": 510 + }, + { + "epoch": 0.33, + "learning_rate": 7.882667897257398e-08, + "logits/chosen": -3.2354612350463867, + "logits/rejected": -3.2139689922332764, + "logps/chosen": -203.7930450439453, + "logps/rejected": -603.6524658203125, + "loss": 0.2993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8609962463378906, + "rewards/margins": 3.589308977127075, + "rewards/rejected": -2.7283127307891846, + "step": 511 + }, + { + "epoch": 0.33, + "learning_rate": 7.874223516351486e-08, + "logits/chosen": -3.255350112915039, + "logits/rejected": -3.0594959259033203, + "logps/chosen": -244.60446166992188, + "logps/rejected": -343.5167541503906, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0141228437423706, + "rewards/margins": 2.320300340652466, + "rewards/rejected": -1.3061774969100952, + "step": 512 + }, + { + "epoch": 0.33, + "learning_rate": 7.86576687343458e-08, + "logits/chosen": -3.2931485176086426, + "logits/rejected": -3.079301357269287, + "logps/chosen": -313.26800537109375, + "logps/rejected": -821.185302734375, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9140838384628296, + "rewards/margins": 4.410525321960449, + "rewards/rejected": -3.4964418411254883, + "step": 513 + }, + { + "epoch": 0.33, + "learning_rate": 7.857298004584402e-08, + "logits/chosen": -3.225327253341675, + "logits/rejected": -3.1312131881713867, + "logps/chosen": -278.90338134765625, + "logps/rejected": -1359.2119140625, + "loss": 0.3129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8767288327217102, + "rewards/margins": 6.5525102615356445, + "rewards/rejected": -5.67578125, + "step": 514 + }, + { + "epoch": 0.33, + "learning_rate": 7.84881694593084e-08, + "logits/chosen": -3.2163009643554688, + "logits/rejected": -3.1953296661376953, + "logps/chosen": -266.98992919921875, + "logps/rejected": -526.9954833984375, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0698410272598267, + "rewards/margins": 3.489968776702881, + "rewards/rejected": -2.4201278686523438, + "step": 515 + }, + { + "epoch": 0.33, + "learning_rate": 7.840323733655779e-08, + "logits/chosen": -3.202148199081421, + "logits/rejected": -3.0696911811828613, + "logps/chosen": -237.3846893310547, + "logps/rejected": -436.80841064453125, + "loss": 0.3554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7136596441268921, + "rewards/margins": 2.6082634925842285, + "rewards/rejected": -1.8946038484573364, + "step": 516 + }, + { + "epoch": 0.33, + "learning_rate": 7.831818403992958e-08, + "logits/chosen": -3.2433905601501465, + "logits/rejected": -3.1749887466430664, + "logps/chosen": -249.56414794921875, + "logps/rejected": -1000.6143798828125, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8143547177314758, + "rewards/margins": 4.475405216217041, + "rewards/rejected": -3.661050319671631, + "step": 517 + }, + { + "epoch": 0.33, + "learning_rate": 7.823300993227811e-08, + "logits/chosen": -3.284581184387207, + "logits/rejected": -3.101226568222046, + "logps/chosen": -286.3742980957031, + "logps/rejected": -248.85403442382812, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9223358631134033, + "rewards/margins": 1.6692558526992798, + "rewards/rejected": -0.7469199895858765, + "step": 518 + }, + { + "epoch": 0.33, + "learning_rate": 7.814771537697312e-08, + "logits/chosen": -3.2308201789855957, + "logits/rejected": -3.163773536682129, + "logps/chosen": -289.88922119140625, + "logps/rejected": -625.2737426757812, + "loss": 0.3243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8914642333984375, + "rewards/margins": 3.266770839691162, + "rewards/rejected": -2.3753066062927246, + "step": 519 + }, + { + "epoch": 0.33, + "learning_rate": 7.806230073789818e-08, + "logits/chosen": -3.196857213973999, + "logits/rejected": -3.042463779449463, + "logps/chosen": -274.9909973144531, + "logps/rejected": -389.6133117675781, + "loss": 0.3255, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0909926891326904, + "rewards/margins": 2.4822585582733154, + "rewards/rejected": -1.391265869140625, + "step": 520 + }, + { + "epoch": 0.33, + "learning_rate": 7.797676637944921e-08, + "logits/chosen": -3.185598850250244, + "logits/rejected": -3.0881400108337402, + "logps/chosen": -246.84002685546875, + "logps/rejected": -753.8795166015625, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9506263732910156, + "rewards/margins": 4.3205180168151855, + "rewards/rejected": -3.369891405105591, + "step": 521 + }, + { + "epoch": 0.33, + "learning_rate": 7.789111266653284e-08, + "logits/chosen": -3.207249641418457, + "logits/rejected": -3.123384714126587, + "logps/chosen": -275.8622131347656, + "logps/rejected": -709.1240844726562, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9510605335235596, + "rewards/margins": 4.087236404418945, + "rewards/rejected": -3.1361756324768066, + "step": 522 + }, + { + "epoch": 0.33, + "learning_rate": 7.780533996456488e-08, + "logits/chosen": -3.2303261756896973, + "logits/rejected": -3.1953885555267334, + "logps/chosen": -255.74195861816406, + "logps/rejected": -754.6007080078125, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8502578735351562, + "rewards/margins": 4.2587690353393555, + "rewards/rejected": -3.4085114002227783, + "step": 523 + }, + { + "epoch": 0.33, + "learning_rate": 7.771944863946883e-08, + "logits/chosen": -3.277351140975952, + "logits/rejected": -3.0773544311523438, + "logps/chosen": -232.7286376953125, + "logps/rejected": -330.1259765625, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7727683782577515, + "rewards/margins": 2.2333884239196777, + "rewards/rejected": -1.4606201648712158, + "step": 524 + }, + { + "epoch": 0.33, + "learning_rate": 7.763343905767419e-08, + "logits/chosen": -3.2685327529907227, + "logits/rejected": -3.1683177947998047, + "logps/chosen": -258.52374267578125, + "logps/rejected": -523.81103515625, + "loss": 0.3, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8826805353164673, + "rewards/margins": 3.3310234546661377, + "rewards/rejected": -2.448342800140381, + "step": 525 + }, + { + "epoch": 0.34, + "learning_rate": 7.754731158611499e-08, + "logits/chosen": -3.2386717796325684, + "logits/rejected": -3.0017786026000977, + "logps/chosen": -235.36598205566406, + "logps/rejected": -614.541259765625, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8612732291221619, + "rewards/margins": 3.4891862869262695, + "rewards/rejected": -2.627912998199463, + "step": 526 + }, + { + "epoch": 0.34, + "learning_rate": 7.746106659222823e-08, + "logits/chosen": -3.271449327468872, + "logits/rejected": -3.1960480213165283, + "logps/chosen": -279.25634765625, + "logps/rejected": -1171.072021484375, + "loss": 0.3135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9560439586639404, + "rewards/margins": 5.4049577713012695, + "rewards/rejected": -4.44891357421875, + "step": 527 + }, + { + "epoch": 0.34, + "learning_rate": 7.737470444395227e-08, + "logits/chosen": -3.2241740226745605, + "logits/rejected": -3.098158597946167, + "logps/chosen": -243.517822265625, + "logps/rejected": -806.861083984375, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9945533871650696, + "rewards/margins": 3.8680624961853027, + "rewards/rejected": -2.873509168624878, + "step": 528 + }, + { + "epoch": 0.34, + "learning_rate": 7.728822550972523e-08, + "logits/chosen": -3.219564914703369, + "logits/rejected": -3.0366411209106445, + "logps/chosen": -293.4433898925781, + "logps/rejected": -467.7142028808594, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0325171947479248, + "rewards/margins": 2.6296441555023193, + "rewards/rejected": -1.597126841545105, + "step": 529 + }, + { + "epoch": 0.34, + "learning_rate": 7.720163015848352e-08, + "logits/chosen": -3.3002912998199463, + "logits/rejected": -3.0603349208831787, + "logps/chosen": -226.92588806152344, + "logps/rejected": -285.59979248046875, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9630096554756165, + "rewards/margins": 2.0594048500061035, + "rewards/rejected": -1.0963951349258423, + "step": 530 + }, + { + "epoch": 0.34, + "learning_rate": 7.711491875966018e-08, + "logits/chosen": -3.1769144535064697, + "logits/rejected": -3.126075267791748, + "logps/chosen": -255.199951171875, + "logps/rejected": -911.9974975585938, + "loss": 0.3387, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0942389965057373, + "rewards/margins": 4.427637577056885, + "rewards/rejected": -3.3333983421325684, + "step": 531 + }, + { + "epoch": 0.34, + "learning_rate": 7.702809168318336e-08, + "logits/chosen": -3.212040901184082, + "logits/rejected": -3.083077907562256, + "logps/chosen": -266.5721740722656, + "logps/rejected": -464.98822021484375, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8867180347442627, + "rewards/margins": 3.1038825511932373, + "rewards/rejected": -2.2171645164489746, + "step": 532 + }, + { + "epoch": 0.34, + "learning_rate": 7.694114929947468e-08, + "logits/chosen": -3.192098617553711, + "logits/rejected": -3.112919330596924, + "logps/chosen": -278.9899597167969, + "logps/rejected": -545.1864013671875, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.062718152999878, + "rewards/margins": 3.303959846496582, + "rewards/rejected": -2.241241455078125, + "step": 533 + }, + { + "epoch": 0.34, + "learning_rate": 7.685409197944768e-08, + "logits/chosen": -3.175856113433838, + "logits/rejected": -3.1158323287963867, + "logps/chosen": -273.540283203125, + "logps/rejected": -471.7594299316406, + "loss": 0.3093, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9835830926895142, + "rewards/margins": 3.1268861293792725, + "rewards/rejected": -2.1433029174804688, + "step": 534 + }, + { + "epoch": 0.34, + "learning_rate": 7.676692009450626e-08, + "logits/chosen": -3.288609504699707, + "logits/rejected": -3.1670100688934326, + "logps/chosen": -281.92999267578125, + "logps/rejected": -331.199462890625, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.125396728515625, + "rewards/margins": 2.5064194202423096, + "rewards/rejected": -1.3810226917266846, + "step": 535 + }, + { + "epoch": 0.34, + "learning_rate": 7.667963401654309e-08, + "logits/chosen": -3.213179588317871, + "logits/rejected": -3.1590867042541504, + "logps/chosen": -318.1152038574219, + "logps/rejected": -707.057861328125, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9125893115997314, + "rewards/margins": 3.8859305381774902, + "rewards/rejected": -2.973341464996338, + "step": 536 + }, + { + "epoch": 0.34, + "learning_rate": 7.659223411793799e-08, + "logits/chosen": -3.180203914642334, + "logits/rejected": -3.0644922256469727, + "logps/chosen": -228.343505859375, + "logps/rejected": -1057.006103515625, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8117141723632812, + "rewards/margins": 4.458503723144531, + "rewards/rejected": -3.64678955078125, + "step": 537 + }, + { + "epoch": 0.34, + "learning_rate": 7.650472077155634e-08, + "logits/chosen": -3.128978729248047, + "logits/rejected": -3.0452778339385986, + "logps/chosen": -256.897216796875, + "logps/rejected": -1325.552978515625, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.046582818031311, + "rewards/margins": 6.088727951049805, + "rewards/rejected": -5.042144775390625, + "step": 538 + }, + { + "epoch": 0.34, + "learning_rate": 7.64170943507476e-08, + "logits/chosen": -3.2413363456726074, + "logits/rejected": -3.054713249206543, + "logps/chosen": -276.5840759277344, + "logps/rejected": -339.455810546875, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0107789039611816, + "rewards/margins": 2.3578994274139404, + "rewards/rejected": -1.3471206426620483, + "step": 539 + }, + { + "epoch": 0.34, + "learning_rate": 7.63293552293435e-08, + "logits/chosen": -3.229644775390625, + "logits/rejected": -3.0996861457824707, + "logps/chosen": -292.22576904296875, + "logps/rejected": -789.3916015625, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8712921142578125, + "rewards/margins": 4.185272216796875, + "rewards/rejected": -3.3139801025390625, + "step": 540 + }, + { + "epoch": 0.34, + "learning_rate": 7.624150378165664e-08, + "logits/chosen": -3.1908669471740723, + "logits/rejected": -3.121490955352783, + "logps/chosen": -270.9578552246094, + "logps/rejected": -832.6439208984375, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1047019958496094, + "rewards/margins": 3.93791127204895, + "rewards/rejected": -2.833209276199341, + "step": 541 + }, + { + "epoch": 0.35, + "learning_rate": 7.615354038247887e-08, + "logits/chosen": -3.1560287475585938, + "logits/rejected": -3.0833191871643066, + "logps/chosen": -246.46237182617188, + "logps/rejected": -834.5211791992188, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.835211992263794, + "rewards/margins": 4.240448951721191, + "rewards/rejected": -3.4052369594573975, + "step": 542 + }, + { + "epoch": 0.35, + "learning_rate": 7.606546540707959e-08, + "logits/chosen": -3.300374984741211, + "logits/rejected": -3.1296167373657227, + "logps/chosen": -249.1905975341797, + "logps/rejected": -981.2171630859375, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9841889142990112, + "rewards/margins": 4.871307373046875, + "rewards/rejected": -3.8871185779571533, + "step": 543 + }, + { + "epoch": 0.35, + "learning_rate": 7.597727923120418e-08, + "logits/chosen": -3.2233457565307617, + "logits/rejected": -3.11433744430542, + "logps/chosen": -263.4425048828125, + "logps/rejected": -820.54248046875, + "loss": 0.3034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8174057006835938, + "rewards/margins": 4.756034851074219, + "rewards/rejected": -3.938629150390625, + "step": 544 + }, + { + "epoch": 0.35, + "learning_rate": 7.58889822310725e-08, + "logits/chosen": -3.2361693382263184, + "logits/rejected": -3.173527240753174, + "logps/chosen": -287.74041748046875, + "logps/rejected": -778.7296142578125, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.90179443359375, + "rewards/margins": 4.583609104156494, + "rewards/rejected": -3.681814670562744, + "step": 545 + }, + { + "epoch": 0.35, + "learning_rate": 7.580057478337716e-08, + "logits/chosen": -3.2768335342407227, + "logits/rejected": -3.1902031898498535, + "logps/chosen": -268.63330078125, + "logps/rejected": -850.83056640625, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.931965708732605, + "rewards/margins": 5.2538347244262695, + "rewards/rejected": -4.321868896484375, + "step": 546 + }, + { + "epoch": 0.35, + "learning_rate": 7.571205726528196e-08, + "logits/chosen": -3.2698893547058105, + "logits/rejected": -3.1701438426971436, + "logps/chosen": -277.3632507324219, + "logps/rejected": -626.2628173828125, + "loss": 0.3285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9351135492324829, + "rewards/margins": 3.4604904651641846, + "rewards/rejected": -2.525377035140991, + "step": 547 + }, + { + "epoch": 0.35, + "learning_rate": 7.562343005442032e-08, + "logits/chosen": -3.244777202606201, + "logits/rejected": -3.0654757022857666, + "logps/chosen": -249.9378662109375, + "logps/rejected": -531.30126953125, + "loss": 0.305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0633888244628906, + "rewards/margins": 3.4704430103302, + "rewards/rejected": -2.4070541858673096, + "step": 548 + }, + { + "epoch": 0.35, + "learning_rate": 7.553469352889356e-08, + "logits/chosen": -3.2529807090759277, + "logits/rejected": -3.0855963230133057, + "logps/chosen": -257.1922607421875, + "logps/rejected": -341.4070739746094, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.009547472000122, + "rewards/margins": 2.1600663661956787, + "rewards/rejected": -1.1505188941955566, + "step": 549 + }, + { + "epoch": 0.35, + "learning_rate": 7.544584806726944e-08, + "logits/chosen": -3.258683204650879, + "logits/rejected": -3.157440662384033, + "logps/chosen": -285.22637939453125, + "logps/rejected": -851.9083251953125, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.918965220451355, + "rewards/margins": 4.111351013183594, + "rewards/rejected": -3.1923859119415283, + "step": 550 + }, + { + "epoch": 0.35, + "learning_rate": 7.535689404858041e-08, + "logits/chosen": -3.2518091201782227, + "logits/rejected": -3.1322784423828125, + "logps/chosen": -243.38546752929688, + "logps/rejected": -774.3284301757812, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9482170343399048, + "rewards/margins": 4.541564464569092, + "rewards/rejected": -3.5933473110198975, + "step": 551 + }, + { + "epoch": 0.35, + "learning_rate": 7.526783185232208e-08, + "logits/chosen": -3.1920366287231445, + "logits/rejected": -3.024285316467285, + "logps/chosen": -258.79150390625, + "logps/rejected": -773.6990356445312, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3482422828674316, + "rewards/margins": 4.470239162445068, + "rewards/rejected": -3.121997117996216, + "step": 552 + }, + { + "epoch": 0.35, + "learning_rate": 7.517866185845152e-08, + "logits/chosen": -3.2930712699890137, + "logits/rejected": -3.0887656211853027, + "logps/chosen": -248.48147583007812, + "logps/rejected": -762.131591796875, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9405449032783508, + "rewards/margins": 3.6554207801818848, + "rewards/rejected": -2.7148759365081787, + "step": 553 + }, + { + "epoch": 0.35, + "learning_rate": 7.508938444738575e-08, + "logits/chosen": -3.2456908226013184, + "logits/rejected": -3.139986515045166, + "logps/chosen": -309.0167236328125, + "logps/rejected": -766.8070678710938, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9998580813407898, + "rewards/margins": 3.9502973556518555, + "rewards/rejected": -2.950439453125, + "step": 554 + }, + { + "epoch": 0.35, + "learning_rate": 7.5e-08, + "logits/chosen": -3.202260971069336, + "logits/rejected": -3.0787644386291504, + "logps/chosen": -243.5330047607422, + "logps/rejected": -578.5911254882812, + "loss": 0.2919, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9429168701171875, + "rewards/margins": 3.4295411109924316, + "rewards/rejected": -2.486624240875244, + "step": 555 + }, + { + "epoch": 0.35, + "learning_rate": 7.491050889762615e-08, + "logits/chosen": -3.215759754180908, + "logits/rejected": -3.07102108001709, + "logps/chosen": -281.59307861328125, + "logps/rejected": -762.1459350585938, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.079034447669983, + "rewards/margins": 3.8826186656951904, + "rewards/rejected": -2.803584337234497, + "step": 556 + }, + { + "epoch": 0.36, + "learning_rate": 7.482091152205112e-08, + "logits/chosen": -3.2519893646240234, + "logits/rejected": -3.180961847305298, + "logps/chosen": -266.01708984375, + "logps/rejected": -717.3250732421875, + "loss": 0.3027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0319702625274658, + "rewards/margins": 3.920915365219116, + "rewards/rejected": -2.8889451026916504, + "step": 557 + }, + { + "epoch": 0.36, + "learning_rate": 7.473120825551516e-08, + "logits/chosen": -3.1822965145111084, + "logits/rejected": -3.1443777084350586, + "logps/chosen": -297.09979248046875, + "logps/rejected": -1066.591064453125, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.005242943763733, + "rewards/margins": 6.125885009765625, + "rewards/rejected": -5.120642185211182, + "step": 558 + }, + { + "epoch": 0.36, + "learning_rate": 7.464139948071033e-08, + "logits/chosen": -3.2384817600250244, + "logits/rejected": -3.129207134246826, + "logps/chosen": -235.37171936035156, + "logps/rejected": -669.3438720703125, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8142585754394531, + "rewards/margins": 4.432706832885742, + "rewards/rejected": -3.61844801902771, + "step": 559 + }, + { + "epoch": 0.36, + "learning_rate": 7.455148558077874e-08, + "logits/chosen": -3.1712162494659424, + "logits/rejected": -3.1303887367248535, + "logps/chosen": -266.2243347167969, + "logps/rejected": -840.3394165039062, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9727394580841064, + "rewards/margins": 5.184085845947266, + "rewards/rejected": -4.211346626281738, + "step": 560 + }, + { + "epoch": 0.36, + "learning_rate": 7.44614669393111e-08, + "logits/chosen": -3.1982386112213135, + "logits/rejected": -3.022965669631958, + "logps/chosen": -285.05914306640625, + "logps/rejected": -1130.00390625, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.946576714515686, + "rewards/margins": 4.884974002838135, + "rewards/rejected": -3.9383974075317383, + "step": 561 + }, + { + "epoch": 0.36, + "learning_rate": 7.437134394034486e-08, + "logits/chosen": -3.2670094966888428, + "logits/rejected": -3.2284555435180664, + "logps/chosen": -274.2417297363281, + "logps/rejected": -542.5322265625, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8457671999931335, + "rewards/margins": 3.6270689964294434, + "rewards/rejected": -2.781301975250244, + "step": 562 + }, + { + "epoch": 0.36, + "learning_rate": 7.428111696836267e-08, + "logits/chosen": -3.267585277557373, + "logits/rejected": -3.048682451248169, + "logps/chosen": -269.5850830078125, + "logps/rejected": -670.1843872070312, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.006605625152588, + "rewards/margins": 3.861241340637207, + "rewards/rejected": -2.854635715484619, + "step": 563 + }, + { + "epoch": 0.36, + "learning_rate": 7.419078640829087e-08, + "logits/chosen": -3.2298460006713867, + "logits/rejected": -3.083395004272461, + "logps/chosen": -270.0779724121094, + "logps/rejected": -964.2897338867188, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0066986083984375, + "rewards/margins": 4.772946357727051, + "rewards/rejected": -3.766247510910034, + "step": 564 + }, + { + "epoch": 0.36, + "learning_rate": 7.41003526454976e-08, + "logits/chosen": -3.233124256134033, + "logits/rejected": -3.042189836502075, + "logps/chosen": -276.3241882324219, + "logps/rejected": -1605.2440185546875, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0332977771759033, + "rewards/margins": 7.3084259033203125, + "rewards/rejected": -6.275128364562988, + "step": 565 + }, + { + "epoch": 0.36, + "learning_rate": 7.400981606579137e-08, + "logits/chosen": -3.2239174842834473, + "logits/rejected": -3.135870933532715, + "logps/chosen": -258.8566589355469, + "logps/rejected": -483.2406921386719, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9120720028877258, + "rewards/margins": 3.10427188873291, + "rewards/rejected": -2.19219970703125, + "step": 566 + }, + { + "epoch": 0.36, + "learning_rate": 7.391917705541926e-08, + "logits/chosen": -3.241091251373291, + "logits/rejected": -3.100034713745117, + "logps/chosen": -226.27577209472656, + "logps/rejected": -354.60919189453125, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0290184020996094, + "rewards/margins": 2.5677528381347656, + "rewards/rejected": -1.5387344360351562, + "step": 567 + }, + { + "epoch": 0.36, + "learning_rate": 7.382843600106538e-08, + "logits/chosen": -3.1979174613952637, + "logits/rejected": -3.1357054710388184, + "logps/chosen": -291.8221130371094, + "logps/rejected": -405.7033996582031, + "loss": 0.3271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9934875965118408, + "rewards/margins": 2.742684841156006, + "rewards/rejected": -1.7491974830627441, + "step": 568 + }, + { + "epoch": 0.36, + "learning_rate": 7.373759328984921e-08, + "logits/chosen": -3.277475357055664, + "logits/rejected": -3.1722488403320312, + "logps/chosen": -278.5740661621094, + "logps/rejected": -454.4758605957031, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8931427001953125, + "rewards/margins": 3.191092014312744, + "rewards/rejected": -2.2979493141174316, + "step": 569 + }, + { + "epoch": 0.36, + "learning_rate": 7.364664930932384e-08, + "logits/chosen": -3.250098705291748, + "logits/rejected": -3.056550979614258, + "logps/chosen": -246.18768310546875, + "logps/rejected": -852.09228515625, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8675544857978821, + "rewards/margins": 3.6870484352111816, + "rewards/rejected": -2.8194940090179443, + "step": 570 + }, + { + "epoch": 0.36, + "learning_rate": 7.355560444747444e-08, + "logits/chosen": -3.238677740097046, + "logits/rejected": -3.0994887351989746, + "logps/chosen": -295.93450927734375, + "logps/rejected": -1197.189697265625, + "loss": 0.3322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9702895879745483, + "rewards/margins": 5.817939758300781, + "rewards/rejected": -4.847650527954102, + "step": 571 + }, + { + "epoch": 0.36, + "learning_rate": 7.346445909271658e-08, + "logits/chosen": -3.2072606086730957, + "logits/rejected": -3.1859607696533203, + "logps/chosen": -248.1750030517578, + "logps/rejected": -682.6871337890625, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0626342296600342, + "rewards/margins": 4.517904758453369, + "rewards/rejected": -3.455270290374756, + "step": 572 + }, + { + "epoch": 0.37, + "learning_rate": 7.337321363389452e-08, + "logits/chosen": -3.204981803894043, + "logits/rejected": -3.0124082565307617, + "logps/chosen": -246.53604125976562, + "logps/rejected": -1090.1898193359375, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0177345275878906, + "rewards/margins": 4.700369834899902, + "rewards/rejected": -3.68263578414917, + "step": 573 + }, + { + "epoch": 0.37, + "learning_rate": 7.328186846027957e-08, + "logits/chosen": -3.2427520751953125, + "logits/rejected": -3.1040732860565186, + "logps/chosen": -254.3851318359375, + "logps/rejected": -455.850341796875, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9431266784667969, + "rewards/margins": 3.0284905433654785, + "rewards/rejected": -2.0853638648986816, + "step": 574 + }, + { + "epoch": 0.37, + "learning_rate": 7.319042396156849e-08, + "logits/chosen": -3.190737724304199, + "logits/rejected": -3.0639896392822266, + "logps/chosen": -251.7333221435547, + "logps/rejected": -465.1044616699219, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9745819568634033, + "rewards/margins": 2.7347540855407715, + "rewards/rejected": -1.7601722478866577, + "step": 575 + }, + { + "epoch": 0.37, + "learning_rate": 7.309888052788175e-08, + "logits/chosen": -3.2380313873291016, + "logits/rejected": -3.154467821121216, + "logps/chosen": -231.68272399902344, + "logps/rejected": -616.6759643554688, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2216888666152954, + "rewards/margins": 4.151281833648682, + "rewards/rejected": -2.9295928478240967, + "step": 576 + }, + { + "epoch": 0.37, + "learning_rate": 7.300723854976191e-08, + "logits/chosen": -3.267435073852539, + "logits/rejected": -3.123250722885132, + "logps/chosen": -244.5547637939453, + "logps/rejected": -609.3018188476562, + "loss": 0.2792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0626640319824219, + "rewards/margins": 4.044859886169434, + "rewards/rejected": -2.982196092605591, + "step": 577 + }, + { + "epoch": 0.37, + "learning_rate": 7.291549841817192e-08, + "logits/chosen": -3.249864339828491, + "logits/rejected": -3.1058783531188965, + "logps/chosen": -232.06118774414062, + "logps/rejected": -510.1038818359375, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0343315601348877, + "rewards/margins": 3.3305487632751465, + "rewards/rejected": -2.296217441558838, + "step": 578 + }, + { + "epoch": 0.37, + "learning_rate": 7.28236605244935e-08, + "logits/chosen": -3.2454652786254883, + "logits/rejected": -3.0764193534851074, + "logps/chosen": -259.77874755859375, + "logps/rejected": -618.8648071289062, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9618469476699829, + "rewards/margins": 3.8037521839141846, + "rewards/rejected": -2.841905117034912, + "step": 579 + }, + { + "epoch": 0.37, + "learning_rate": 7.273172526052542e-08, + "logits/chosen": -3.2537217140197754, + "logits/rejected": -3.067190170288086, + "logps/chosen": -282.48974609375, + "logps/rejected": -899.1914672851562, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1138931512832642, + "rewards/margins": 5.527357578277588, + "rewards/rejected": -4.413464546203613, + "step": 580 + }, + { + "epoch": 0.37, + "learning_rate": 7.263969301848187e-08, + "logits/chosen": -3.2326221466064453, + "logits/rejected": -3.0572009086608887, + "logps/chosen": -260.99542236328125, + "logps/rejected": -685.5864868164062, + "loss": 0.2807, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0414643287658691, + "rewards/margins": 4.590863227844238, + "rewards/rejected": -3.549398899078369, + "step": 581 + }, + { + "epoch": 0.37, + "learning_rate": 7.254756419099072e-08, + "logits/chosen": -3.2493860721588135, + "logits/rejected": -3.085744857788086, + "logps/chosen": -231.8390350341797, + "logps/rejected": -732.6318969726562, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8251708745956421, + "rewards/margins": 4.232324600219727, + "rewards/rejected": -3.407153367996216, + "step": 582 + }, + { + "epoch": 0.37, + "learning_rate": 7.245533917109198e-08, + "logits/chosen": -3.2183117866516113, + "logits/rejected": -3.1352226734161377, + "logps/chosen": -247.2845916748047, + "logps/rejected": -521.9276733398438, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0483665466308594, + "rewards/margins": 3.558112382888794, + "rewards/rejected": -2.5097458362579346, + "step": 583 + }, + { + "epoch": 0.37, + "learning_rate": 7.236301835223597e-08, + "logits/chosen": -3.2868640422821045, + "logits/rejected": -3.134815216064453, + "logps/chosen": -309.5486145019531, + "logps/rejected": -492.8269348144531, + "loss": 0.3318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1907715797424316, + "rewards/margins": 3.275601387023926, + "rewards/rejected": -2.084829807281494, + "step": 584 + }, + { + "epoch": 0.37, + "learning_rate": 7.22706021282817e-08, + "logits/chosen": -3.2444372177124023, + "logits/rejected": -2.973484992980957, + "logps/chosen": -266.84503173828125, + "logps/rejected": -697.4600830078125, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0214996337890625, + "rewards/margins": 3.7215514183044434, + "rewards/rejected": -2.700051784515381, + "step": 585 + }, + { + "epoch": 0.37, + "learning_rate": 7.217809089349524e-08, + "logits/chosen": -3.2985267639160156, + "logits/rejected": -3.1664862632751465, + "logps/chosen": -250.55770874023438, + "logps/rejected": -554.0389404296875, + "loss": 0.3088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9442833662033081, + "rewards/margins": 3.304079532623291, + "rewards/rejected": -2.3597960472106934, + "step": 586 + }, + { + "epoch": 0.37, + "learning_rate": 7.208548504254799e-08, + "logits/chosen": -3.1679039001464844, + "logits/rejected": -3.0742650032043457, + "logps/chosen": -257.1238708496094, + "logps/rejected": -530.1860961914062, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9620979428291321, + "rewards/margins": 3.13710880279541, + "rewards/rejected": -2.1750106811523438, + "step": 587 + }, + { + "epoch": 0.37, + "learning_rate": 7.199278497051497e-08, + "logits/chosen": -3.223172903060913, + "logits/rejected": -3.0425493717193604, + "logps/chosen": -299.3709411621094, + "logps/rejected": -1521.0225830078125, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1704604625701904, + "rewards/margins": 7.093013286590576, + "rewards/rejected": -5.922552585601807, + "step": 588 + }, + { + "epoch": 0.38, + "learning_rate": 7.189999107287317e-08, + "logits/chosen": -3.2589914798736572, + "logits/rejected": -3.1220765113830566, + "logps/chosen": -288.7358093261719, + "logps/rejected": -461.33245849609375, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.206536054611206, + "rewards/margins": 3.0783562660217285, + "rewards/rejected": -1.871820092201233, + "step": 589 + }, + { + "epoch": 0.38, + "learning_rate": 7.18071037454999e-08, + "logits/chosen": -3.2582921981811523, + "logits/rejected": -3.0957067012786865, + "logps/chosen": -314.29046630859375, + "logps/rejected": -629.2083740234375, + "loss": 0.3217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9714508056640625, + "rewards/margins": 3.861135721206665, + "rewards/rejected": -2.8896851539611816, + "step": 590 + }, + { + "epoch": 0.38, + "learning_rate": 7.1714123384671e-08, + "logits/chosen": -3.209165096282959, + "logits/rejected": -3.1809401512145996, + "logps/chosen": -228.6396026611328, + "logps/rejected": -911.9054565429688, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8879745602607727, + "rewards/margins": 4.593623161315918, + "rewards/rejected": -3.705648899078369, + "step": 591 + }, + { + "epoch": 0.38, + "learning_rate": 7.162105038705926e-08, + "logits/chosen": -3.2360496520996094, + "logits/rejected": -3.0909652709960938, + "logps/chosen": -262.6427917480469, + "logps/rejected": -930.4161376953125, + "loss": 0.3111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0865097045898438, + "rewards/margins": 4.596693515777588, + "rewards/rejected": -3.510183811187744, + "step": 592 + }, + { + "epoch": 0.38, + "learning_rate": 7.152788514973267e-08, + "logits/chosen": -3.24783992767334, + "logits/rejected": -3.184368133544922, + "logps/chosen": -262.23760986328125, + "logps/rejected": -654.1806640625, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.774976372718811, + "rewards/margins": 4.03399133682251, + "rewards/rejected": -3.2590150833129883, + "step": 593 + }, + { + "epoch": 0.38, + "learning_rate": 7.14346280701527e-08, + "logits/chosen": -3.2491605281829834, + "logits/rejected": -3.1238346099853516, + "logps/chosen": -242.2777099609375, + "logps/rejected": -601.8994140625, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.099016547203064, + "rewards/margins": 4.104952335357666, + "rewards/rejected": -3.0059356689453125, + "step": 594 + }, + { + "epoch": 0.38, + "learning_rate": 7.134127954617268e-08, + "logits/chosen": -3.2261037826538086, + "logits/rejected": -3.137010335922241, + "logps/chosen": -252.3749237060547, + "logps/rejected": -855.6361694335938, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8561241626739502, + "rewards/margins": 4.87642765045166, + "rewards/rejected": -4.020303249359131, + "step": 595 + }, + { + "epoch": 0.38, + "learning_rate": 7.1247839976036e-08, + "logits/chosen": -3.2766354084014893, + "logits/rejected": -3.1351873874664307, + "logps/chosen": -252.38705444335938, + "logps/rejected": -747.5341796875, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9950500726699829, + "rewards/margins": 4.2521820068359375, + "rewards/rejected": -3.257132053375244, + "step": 596 + }, + { + "epoch": 0.38, + "learning_rate": 7.115430975837456e-08, + "logits/chosen": -3.220630168914795, + "logits/rejected": -3.0968284606933594, + "logps/chosen": -282.58905029296875, + "logps/rejected": -688.4778442382812, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9710372686386108, + "rewards/margins": 3.6422929763793945, + "rewards/rejected": -2.671255588531494, + "step": 597 + }, + { + "epoch": 0.38, + "learning_rate": 7.10606892922069e-08, + "logits/chosen": -3.2353811264038086, + "logits/rejected": -3.0621280670166016, + "logps/chosen": -243.8739013671875, + "logps/rejected": -462.1775207519531, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.144239068031311, + "rewards/margins": 3.033902645111084, + "rewards/rejected": -1.8896636962890625, + "step": 598 + }, + { + "epoch": 0.38, + "learning_rate": 7.096697897693661e-08, + "logits/chosen": -3.2531919479370117, + "logits/rejected": -3.127220869064331, + "logps/chosen": -231.37591552734375, + "logps/rejected": -806.760986328125, + "loss": 0.2821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9551445245742798, + "rewards/margins": 4.537301063537598, + "rewards/rejected": -3.5821564197540283, + "step": 599 + }, + { + "epoch": 0.38, + "learning_rate": 7.087317921235058e-08, + "logits/chosen": -3.233673572540283, + "logits/rejected": -3.0670156478881836, + "logps/chosen": -260.9984436035156, + "logps/rejected": -933.2327880859375, + "loss": 0.2934, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1190239191055298, + "rewards/margins": 5.137862682342529, + "rewards/rejected": -4.018838882446289, + "step": 600 + }, + { + "epoch": 0.38, + "learning_rate": 7.077929039861737e-08, + "logits/chosen": -3.2936198711395264, + "logits/rejected": -3.211731433868408, + "logps/chosen": -284.2884216308594, + "logps/rejected": -603.756103515625, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9645324945449829, + "rewards/margins": 3.8896636962890625, + "rewards/rejected": -2.925131320953369, + "step": 601 + }, + { + "epoch": 0.38, + "learning_rate": 7.068531293628533e-08, + "logits/chosen": -3.151669979095459, + "logits/rejected": -3.065850257873535, + "logps/chosen": -244.0352783203125, + "logps/rejected": -644.3038330078125, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0903176069259644, + "rewards/margins": 3.5531013011932373, + "rewards/rejected": -2.4627838134765625, + "step": 602 + }, + { + "epoch": 0.38, + "learning_rate": 7.059124722628112e-08, + "logits/chosen": -3.179324150085449, + "logits/rejected": -3.1088316440582275, + "logps/chosen": -251.5950927734375, + "logps/rejected": -243.86207580566406, + "loss": 0.3295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.086281657218933, + "rewards/margins": 2.1912760734558105, + "rewards/rejected": -1.104994297027588, + "step": 603 + }, + { + "epoch": 0.38, + "learning_rate": 7.049709366990778e-08, + "logits/chosen": -3.234683036804199, + "logits/rejected": -3.1036996841430664, + "logps/chosen": -267.5047607421875, + "logps/rejected": -438.36322021484375, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9030655026435852, + "rewards/margins": 2.9875216484069824, + "rewards/rejected": -2.084455966949463, + "step": 604 + }, + { + "epoch": 0.39, + "learning_rate": 7.040285266884319e-08, + "logits/chosen": -3.1380672454833984, + "logits/rejected": -3.0925345420837402, + "logps/chosen": -291.91217041015625, + "logps/rejected": -551.72705078125, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1675186157226562, + "rewards/margins": 3.670243978500366, + "rewards/rejected": -2.502725124359131, + "step": 605 + }, + { + "epoch": 0.39, + "learning_rate": 7.030852462513826e-08, + "logits/chosen": -3.282935619354248, + "logits/rejected": -3.120927333831787, + "logps/chosen": -233.52476501464844, + "logps/rejected": -398.0768737792969, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0715911388397217, + "rewards/margins": 2.6992392539978027, + "rewards/rejected": -1.627648115158081, + "step": 606 + }, + { + "epoch": 0.39, + "learning_rate": 7.021410994121524e-08, + "logits/chosen": -3.2209079265594482, + "logits/rejected": -3.1094963550567627, + "logps/chosen": -247.76895141601562, + "logps/rejected": -999.6246948242188, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2232933044433594, + "rewards/margins": 5.707376956939697, + "rewards/rejected": -4.484083652496338, + "step": 607 + }, + { + "epoch": 0.39, + "learning_rate": 7.011960901986604e-08, + "logits/chosen": -3.3166732788085938, + "logits/rejected": -3.110151767730713, + "logps/chosen": -250.36026000976562, + "logps/rejected": -520.3017578125, + "loss": 0.3389, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1883735656738281, + "rewards/margins": 3.4843971729278564, + "rewards/rejected": -2.2960236072540283, + "step": 608 + }, + { + "epoch": 0.39, + "learning_rate": 7.002502226425041e-08, + "logits/chosen": -3.2837576866149902, + "logits/rejected": -3.0622220039367676, + "logps/chosen": -268.04266357421875, + "logps/rejected": -552.4852294921875, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.149623155593872, + "rewards/margins": 3.395148277282715, + "rewards/rejected": -2.2455251216888428, + "step": 609 + }, + { + "epoch": 0.39, + "learning_rate": 6.993035007789434e-08, + "logits/chosen": -3.254826068878174, + "logits/rejected": -3.1681571006774902, + "logps/chosen": -229.6615753173828, + "logps/rejected": -676.25537109375, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0486778020858765, + "rewards/margins": 3.923278331756592, + "rewards/rejected": -2.874600410461426, + "step": 610 + }, + { + "epoch": 0.39, + "learning_rate": 6.983559286468826e-08, + "logits/chosen": -3.226693630218506, + "logits/rejected": -3.035413980484009, + "logps/chosen": -274.842041015625, + "logps/rejected": -1132.2021484375, + "loss": 0.2985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9763091802597046, + "rewards/margins": 4.996530055999756, + "rewards/rejected": -4.020220756530762, + "step": 611 + }, + { + "epoch": 0.39, + "learning_rate": 6.974075102888534e-08, + "logits/chosen": -3.1974687576293945, + "logits/rejected": -3.1221437454223633, + "logps/chosen": -297.755126953125, + "logps/rejected": -583.0824584960938, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9562729597091675, + "rewards/margins": 3.445997714996338, + "rewards/rejected": -2.489724636077881, + "step": 612 + }, + { + "epoch": 0.39, + "learning_rate": 6.96458249750998e-08, + "logits/chosen": -3.2866928577423096, + "logits/rejected": -3.1627283096313477, + "logps/chosen": -259.12677001953125, + "logps/rejected": -341.47412109375, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.925434947013855, + "rewards/margins": 2.604444980621338, + "rewards/rejected": -1.679010033607483, + "step": 613 + }, + { + "epoch": 0.39, + "learning_rate": 6.955081510830509e-08, + "logits/chosen": -3.2361416816711426, + "logits/rejected": -3.208250045776367, + "logps/chosen": -262.73358154296875, + "logps/rejected": -813.87451171875, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0593665838241577, + "rewards/margins": 5.406522750854492, + "rewards/rejected": -4.347156047821045, + "step": 614 + }, + { + "epoch": 0.39, + "learning_rate": 6.945572183383229e-08, + "logits/chosen": -3.233001232147217, + "logits/rejected": -3.2253613471984863, + "logps/chosen": -267.8673095703125, + "logps/rejected": -590.981689453125, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8996612429618835, + "rewards/margins": 3.9885740280151367, + "rewards/rejected": -3.0889129638671875, + "step": 615 + }, + { + "epoch": 0.39, + "learning_rate": 6.936054555736826e-08, + "logits/chosen": -3.256013870239258, + "logits/rejected": -3.1587815284729004, + "logps/chosen": -267.91961669921875, + "logps/rejected": -918.553466796875, + "loss": 0.2794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.081109642982483, + "rewards/margins": 5.261187553405762, + "rewards/rejected": -4.180078506469727, + "step": 616 + }, + { + "epoch": 0.39, + "learning_rate": 6.9265286684954e-08, + "logits/chosen": -3.177145004272461, + "logits/rejected": -3.1192638874053955, + "logps/chosen": -252.241943359375, + "logps/rejected": -376.3951416015625, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1167160272598267, + "rewards/margins": 2.572648525238037, + "rewards/rejected": -1.4559326171875, + "step": 617 + }, + { + "epoch": 0.39, + "learning_rate": 6.916994562298285e-08, + "logits/chosen": -3.2498884201049805, + "logits/rejected": -3.0509307384490967, + "logps/chosen": -279.14642333984375, + "logps/rejected": -766.4974365234375, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9282455444335938, + "rewards/margins": 4.399131774902344, + "rewards/rejected": -3.47088623046875, + "step": 618 + }, + { + "epoch": 0.39, + "learning_rate": 6.907452277819883e-08, + "logits/chosen": -3.255805730819702, + "logits/rejected": -3.156419277191162, + "logps/chosen": -272.9581298828125, + "logps/rejected": -696.35302734375, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0439316034317017, + "rewards/margins": 4.172551155090332, + "rewards/rejected": -3.128619432449341, + "step": 619 + }, + { + "epoch": 0.4, + "learning_rate": 6.897901855769483e-08, + "logits/chosen": -3.3037002086639404, + "logits/rejected": -3.1746273040771484, + "logps/chosen": -268.41595458984375, + "logps/rejected": -674.3356323242188, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0439667701721191, + "rewards/margins": 4.37504768371582, + "rewards/rejected": -3.331080913543701, + "step": 620 + }, + { + "epoch": 0.4, + "learning_rate": 6.888343336891087e-08, + "logits/chosen": -3.270580768585205, + "logits/rejected": -3.0838232040405273, + "logps/chosen": -272.63861083984375, + "logps/rejected": -678.610595703125, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.902906060218811, + "rewards/margins": 3.590975284576416, + "rewards/rejected": -2.6880691051483154, + "step": 621 + }, + { + "epoch": 0.4, + "learning_rate": 6.878776761963247e-08, + "logits/chosen": -3.260668992996216, + "logits/rejected": -3.122955322265625, + "logps/chosen": -270.99114990234375, + "logps/rejected": -661.0062255859375, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.142866611480713, + "rewards/margins": 4.073760986328125, + "rewards/rejected": -2.930894613265991, + "step": 622 + }, + { + "epoch": 0.4, + "learning_rate": 6.869202171798882e-08, + "logits/chosen": -3.2100720405578613, + "logits/rejected": -3.0809593200683594, + "logps/chosen": -265.34161376953125, + "logps/rejected": -420.6043701171875, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.253662109375, + "rewards/margins": 3.1358566284179688, + "rewards/rejected": -1.8821945190429688, + "step": 623 + }, + { + "epoch": 0.4, + "learning_rate": 6.859619607245101e-08, + "logits/chosen": -3.268411159515381, + "logits/rejected": -3.112490177154541, + "logps/chosen": -310.6707458496094, + "logps/rejected": -280.17071533203125, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.203582763671875, + "rewards/margins": 2.4286351203918457, + "rewards/rejected": -1.2250522375106812, + "step": 624 + }, + { + "epoch": 0.4, + "learning_rate": 6.850029109183037e-08, + "logits/chosen": -3.2197155952453613, + "logits/rejected": -3.0792455673217773, + "logps/chosen": -269.3706970214844, + "logps/rejected": -652.0711669921875, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9704040288925171, + "rewards/margins": 4.077631950378418, + "rewards/rejected": -3.1072282791137695, + "step": 625 + }, + { + "epoch": 0.4, + "learning_rate": 6.840430718527668e-08, + "logits/chosen": -3.25508451461792, + "logits/rejected": -3.149697780609131, + "logps/chosen": -237.0126190185547, + "logps/rejected": -483.77313232421875, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0304641723632812, + "rewards/margins": 3.3948349952697754, + "rewards/rejected": -2.364370822906494, + "step": 626 + }, + { + "epoch": 0.4, + "learning_rate": 6.830824476227645e-08, + "logits/chosen": -3.269791603088379, + "logits/rejected": -3.113802433013916, + "logps/chosen": -243.49679565429688, + "logps/rejected": -1092.099365234375, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1545600891113281, + "rewards/margins": 4.91743278503418, + "rewards/rejected": -3.7628722190856934, + "step": 627 + }, + { + "epoch": 0.4, + "learning_rate": 6.821210423265115e-08, + "logits/chosen": -3.25040864944458, + "logits/rejected": -2.959059476852417, + "logps/chosen": -270.0518798828125, + "logps/rejected": -1760.2657470703125, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.01678466796875, + "rewards/margins": 7.298010349273682, + "rewards/rejected": -6.281225681304932, + "step": 628 + }, + { + "epoch": 0.4, + "learning_rate": 6.811588600655542e-08, + "logits/chosen": -3.255092144012451, + "logits/rejected": -3.148210287094116, + "logps/chosen": -248.14405822753906, + "logps/rejected": -592.4345703125, + "loss": 0.3059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0733306407928467, + "rewards/margins": 3.6645233631134033, + "rewards/rejected": -2.5911927223205566, + "step": 629 + }, + { + "epoch": 0.4, + "learning_rate": 6.801959049447546e-08, + "logits/chosen": -3.260125160217285, + "logits/rejected": -3.16839599609375, + "logps/chosen": -256.21978759765625, + "logps/rejected": -279.69146728515625, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1469521522521973, + "rewards/margins": 2.1688027381896973, + "rewards/rejected": -1.0218505859375, + "step": 630 + }, + { + "epoch": 0.4, + "learning_rate": 6.79232181072271e-08, + "logits/chosen": -3.238243579864502, + "logits/rejected": -3.103541851043701, + "logps/chosen": -241.3365478515625, + "logps/rejected": -339.1345520019531, + "loss": 0.3182, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0259239673614502, + "rewards/margins": 2.410114288330078, + "rewards/rejected": -1.3841904401779175, + "step": 631 + }, + { + "epoch": 0.4, + "learning_rate": 6.782676925595419e-08, + "logits/chosen": -3.2400052547454834, + "logits/rejected": -3.105231285095215, + "logps/chosen": -274.76434326171875, + "logps/rejected": -662.2783203125, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.203331708908081, + "rewards/margins": 4.276148319244385, + "rewards/rejected": -3.0728163719177246, + "step": 632 + }, + { + "epoch": 0.4, + "learning_rate": 6.773024435212677e-08, + "logits/chosen": -3.214679002761841, + "logits/rejected": -3.028506278991699, + "logps/chosen": -264.1768798828125, + "logps/rejected": -1171.8670654296875, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1406646966934204, + "rewards/margins": 5.523687362670898, + "rewards/rejected": -4.383023262023926, + "step": 633 + }, + { + "epoch": 0.4, + "learning_rate": 6.763364380753936e-08, + "logits/chosen": -3.2153820991516113, + "logits/rejected": -3.13515043258667, + "logps/chosen": -258.3233337402344, + "logps/rejected": -991.47412109375, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1315094232559204, + "rewards/margins": 5.919842720031738, + "rewards/rejected": -4.788332939147949, + "step": 634 + }, + { + "epoch": 0.4, + "learning_rate": 6.753696803430913e-08, + "logits/chosen": -3.2829384803771973, + "logits/rejected": -3.089212656021118, + "logps/chosen": -222.6449432373047, + "logps/rejected": -589.8428344726562, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2391738891601562, + "rewards/margins": 3.5290863513946533, + "rewards/rejected": -2.289912462234497, + "step": 635 + }, + { + "epoch": 0.41, + "learning_rate": 6.744021744487422e-08, + "logits/chosen": -3.2637739181518555, + "logits/rejected": -3.1319003105163574, + "logps/chosen": -280.4748229980469, + "logps/rejected": -868.957275390625, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9980118274688721, + "rewards/margins": 5.257804870605469, + "rewards/rejected": -4.259793281555176, + "step": 636 + }, + { + "epoch": 0.41, + "learning_rate": 6.734339245199194e-08, + "logits/chosen": -3.1894121170043945, + "logits/rejected": -3.052877902984619, + "logps/chosen": -258.01531982421875, + "logps/rejected": -1014.0223999023438, + "loss": 0.3247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1863007545471191, + "rewards/margins": 5.7838134765625, + "rewards/rejected": -4.597513198852539, + "step": 637 + }, + { + "epoch": 0.41, + "learning_rate": 6.724649346873706e-08, + "logits/chosen": -3.236996650695801, + "logits/rejected": -3.071587562561035, + "logps/chosen": -260.23956298828125, + "logps/rejected": -727.2010498046875, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1189560890197754, + "rewards/margins": 3.9886856079101562, + "rewards/rejected": -2.869729518890381, + "step": 638 + }, + { + "epoch": 0.41, + "learning_rate": 6.714952090849994e-08, + "logits/chosen": -3.2102246284484863, + "logits/rejected": -3.094937324523926, + "logps/chosen": -304.8114929199219, + "logps/rejected": -457.3473205566406, + "loss": 0.3549, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1947784423828125, + "rewards/margins": 3.206956624984741, + "rewards/rejected": -2.0121779441833496, + "step": 639 + }, + { + "epoch": 0.41, + "learning_rate": 6.70524751849849e-08, + "logits/chosen": -3.238091230392456, + "logits/rejected": -3.1247401237487793, + "logps/chosen": -230.4821014404297, + "logps/rejected": -780.70849609375, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9990936517715454, + "rewards/margins": 4.205301284790039, + "rewards/rejected": -3.206207275390625, + "step": 640 + }, + { + "epoch": 0.41, + "learning_rate": 6.695535671220836e-08, + "logits/chosen": -3.234633207321167, + "logits/rejected": -3.12563419342041, + "logps/chosen": -245.2729949951172, + "logps/rejected": -431.490478515625, + "loss": 0.2803, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1749756336212158, + "rewards/margins": 3.206918239593506, + "rewards/rejected": -2.031942844390869, + "step": 641 + }, + { + "epoch": 0.41, + "learning_rate": 6.685816590449707e-08, + "logits/chosen": -3.3009095191955566, + "logits/rejected": -3.0219969749450684, + "logps/chosen": -246.86834716796875, + "logps/rejected": -1041.348876953125, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1460602283477783, + "rewards/margins": 5.100729465484619, + "rewards/rejected": -3.95466947555542, + "step": 642 + }, + { + "epoch": 0.41, + "learning_rate": 6.676090317648645e-08, + "logits/chosen": -3.2560133934020996, + "logits/rejected": -3.1365060806274414, + "logps/chosen": -272.2787170410156, + "logps/rejected": -658.2109985351562, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0118461847305298, + "rewards/margins": 3.7947471141815186, + "rewards/rejected": -2.7829010486602783, + "step": 643 + }, + { + "epoch": 0.41, + "learning_rate": 6.666356894311866e-08, + "logits/chosen": -3.251596450805664, + "logits/rejected": -3.1273863315582275, + "logps/chosen": -251.47718811035156, + "logps/rejected": -741.0098876953125, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1845612525939941, + "rewards/margins": 4.803576469421387, + "rewards/rejected": -3.619015693664551, + "step": 644 + }, + { + "epoch": 0.41, + "learning_rate": 6.6566163619641e-08, + "logits/chosen": -3.235955238342285, + "logits/rejected": -3.0298666954040527, + "logps/chosen": -220.70611572265625, + "logps/rejected": -411.18072509765625, + "loss": 0.2738, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1374496221542358, + "rewards/margins": 2.884568691253662, + "rewards/rejected": -1.7471191883087158, + "step": 645 + }, + { + "epoch": 0.41, + "learning_rate": 6.646868762160398e-08, + "logits/chosen": -3.304365634918213, + "logits/rejected": -3.138376474380493, + "logps/chosen": -259.3811950683594, + "logps/rejected": -312.239990234375, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1396400928497314, + "rewards/margins": 2.6701698303222656, + "rewards/rejected": -1.5305297374725342, + "step": 646 + }, + { + "epoch": 0.41, + "learning_rate": 6.637114136485968e-08, + "logits/chosen": -3.229557991027832, + "logits/rejected": -3.1645126342773438, + "logps/chosen": -243.72225952148438, + "logps/rejected": -531.3258056640625, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9972084164619446, + "rewards/margins": 3.3659005165100098, + "rewards/rejected": -2.368691921234131, + "step": 647 + }, + { + "epoch": 0.41, + "learning_rate": 6.627352526555989e-08, + "logits/chosen": -3.192286968231201, + "logits/rejected": -3.0357375144958496, + "logps/chosen": -259.8262634277344, + "logps/rejected": -472.6700744628906, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0902000665664673, + "rewards/margins": 2.735173225402832, + "rewards/rejected": -1.6449730396270752, + "step": 648 + }, + { + "epoch": 0.41, + "learning_rate": 6.617583974015436e-08, + "logits/chosen": -3.263240337371826, + "logits/rejected": -3.113016128540039, + "logps/chosen": -262.76226806640625, + "logps/rejected": -847.297607421875, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8147506713867188, + "rewards/margins": 5.275343418121338, + "rewards/rejected": -4.460592746734619, + "step": 649 + }, + { + "epoch": 0.41, + "learning_rate": 6.607808520538905e-08, + "logits/chosen": -3.256317138671875, + "logits/rejected": -3.0857627391815186, + "logps/chosen": -274.5611572265625, + "logps/rejected": -542.493408203125, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3194992542266846, + "rewards/margins": 3.9636049270629883, + "rewards/rejected": -2.6441054344177246, + "step": 650 + }, + { + "epoch": 0.41, + "learning_rate": 6.598026207830428e-08, + "logits/chosen": -3.253070592880249, + "logits/rejected": -3.0019378662109375, + "logps/chosen": -249.25619506835938, + "logps/rejected": -1082.962890625, + "loss": 0.2878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0963311195373535, + "rewards/margins": 5.120992660522461, + "rewards/rejected": -4.024661540985107, + "step": 651 + }, + { + "epoch": 0.42, + "learning_rate": 6.588237077623305e-08, + "logits/chosen": -3.272627830505371, + "logits/rejected": -3.1432008743286133, + "logps/chosen": -257.2317199707031, + "logps/rejected": -516.533203125, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2232544422149658, + "rewards/margins": 3.995802402496338, + "rewards/rejected": -2.772547960281372, + "step": 652 + }, + { + "epoch": 0.42, + "learning_rate": 6.578441171679915e-08, + "logits/chosen": -3.271942615509033, + "logits/rejected": -3.1220057010650635, + "logps/chosen": -259.72113037109375, + "logps/rejected": -292.62677001953125, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2092537879943848, + "rewards/margins": 2.599499225616455, + "rewards/rejected": -1.3902454376220703, + "step": 653 + }, + { + "epoch": 0.42, + "learning_rate": 6.568638531791554e-08, + "logits/chosen": -3.27270770072937, + "logits/rejected": -2.986670970916748, + "logps/chosen": -251.82737731933594, + "logps/rejected": -886.43994140625, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0169388055801392, + "rewards/margins": 4.553776741027832, + "rewards/rejected": -3.5368380546569824, + "step": 654 + }, + { + "epoch": 0.42, + "learning_rate": 6.558829199778233e-08, + "logits/chosen": -3.265223979949951, + "logits/rejected": -3.0631868839263916, + "logps/chosen": -272.54931640625, + "logps/rejected": -376.01556396484375, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0543715953826904, + "rewards/margins": 2.5924744606018066, + "rewards/rejected": -1.5381027460098267, + "step": 655 + }, + { + "epoch": 0.42, + "learning_rate": 6.549013217488525e-08, + "logits/chosen": -3.2702994346618652, + "logits/rejected": -3.259650707244873, + "logps/chosen": -242.02606201171875, + "logps/rejected": -659.6177978515625, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9199539422988892, + "rewards/margins": 4.355525016784668, + "rewards/rejected": -3.4355714321136475, + "step": 656 + }, + { + "epoch": 0.42, + "learning_rate": 6.539190626799365e-08, + "logits/chosen": -3.2699697017669678, + "logits/rejected": -3.1219677925109863, + "logps/chosen": -227.8836212158203, + "logps/rejected": -626.3045654296875, + "loss": 0.2661, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1900482177734375, + "rewards/margins": 4.664950370788574, + "rewards/rejected": -3.474902391433716, + "step": 657 + }, + { + "epoch": 0.42, + "learning_rate": 6.529361469615887e-08, + "logits/chosen": -3.1857192516326904, + "logits/rejected": -3.1942789554595947, + "logps/chosen": -247.5574493408203, + "logps/rejected": -1045.357177734375, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2572486400604248, + "rewards/margins": 6.570174217224121, + "rewards/rejected": -5.312925815582275, + "step": 658 + }, + { + "epoch": 0.42, + "learning_rate": 6.519525787871234e-08, + "logits/chosen": -3.243391990661621, + "logits/rejected": -3.1460914611816406, + "logps/chosen": -252.83412170410156, + "logps/rejected": -632.6868896484375, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7758201360702515, + "rewards/margins": 3.9934563636779785, + "rewards/rejected": -3.2176361083984375, + "step": 659 + }, + { + "epoch": 0.42, + "learning_rate": 6.50968362352639e-08, + "logits/chosen": -3.1958229541778564, + "logits/rejected": -3.10756254196167, + "logps/chosen": -286.71484375, + "logps/rejected": -645.5418701171875, + "loss": 0.3099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1584136486053467, + "rewards/margins": 3.9970507621765137, + "rewards/rejected": -2.838636875152588, + "step": 660 + }, + { + "epoch": 0.42, + "learning_rate": 6.49983501856999e-08, + "logits/chosen": -3.2914514541625977, + "logits/rejected": -3.1319286823272705, + "logps/chosen": -237.7256622314453, + "logps/rejected": -472.40966796875, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9757988452911377, + "rewards/margins": 3.246976375579834, + "rewards/rejected": -2.2711777687072754, + "step": 661 + }, + { + "epoch": 0.42, + "learning_rate": 6.489980015018148e-08, + "logits/chosen": -3.2308993339538574, + "logits/rejected": -3.0712103843688965, + "logps/chosen": -252.45993041992188, + "logps/rejected": -378.4193115234375, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.230298638343811, + "rewards/margins": 2.6498138904571533, + "rewards/rejected": -1.4195152521133423, + "step": 662 + }, + { + "epoch": 0.42, + "learning_rate": 6.480118654914275e-08, + "logits/chosen": -3.2434794902801514, + "logits/rejected": -3.0722172260284424, + "logps/chosen": -281.66436767578125, + "logps/rejected": -375.106201171875, + "loss": 0.3046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2582390308380127, + "rewards/margins": 3.0584511756896973, + "rewards/rejected": -1.8002121448516846, + "step": 663 + }, + { + "epoch": 0.42, + "learning_rate": 6.470250980328903e-08, + "logits/chosen": -3.239778995513916, + "logits/rejected": -3.030277729034424, + "logps/chosen": -253.76528930664062, + "logps/rejected": -1029.136962890625, + "loss": 0.3197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0991272926330566, + "rewards/margins": 5.072690010070801, + "rewards/rejected": -3.973562717437744, + "step": 664 + }, + { + "epoch": 0.42, + "learning_rate": 6.460377033359499e-08, + "logits/chosen": -3.3149526119232178, + "logits/rejected": -3.0698423385620117, + "logps/chosen": -279.904296875, + "logps/rejected": -509.29644775390625, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3064972162246704, + "rewards/margins": 3.650630235671997, + "rewards/rejected": -2.344132900238037, + "step": 665 + }, + { + "epoch": 0.42, + "learning_rate": 6.45049685613029e-08, + "logits/chosen": -3.230177402496338, + "logits/rejected": -3.0496292114257812, + "logps/chosen": -262.74334716796875, + "logps/rejected": -570.52978515625, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1097862720489502, + "rewards/margins": 3.6786856651306152, + "rewards/rejected": -2.568899631500244, + "step": 666 + }, + { + "epoch": 0.43, + "learning_rate": 6.440610490792085e-08, + "logits/chosen": -3.2471704483032227, + "logits/rejected": -3.0745701789855957, + "logps/chosen": -237.25225830078125, + "logps/rejected": -453.5918884277344, + "loss": 0.2793, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0124794244766235, + "rewards/margins": 3.1296730041503906, + "rewards/rejected": -2.1171936988830566, + "step": 667 + }, + { + "epoch": 0.43, + "learning_rate": 6.430717979522088e-08, + "logits/chosen": -3.257082939147949, + "logits/rejected": -3.1266257762908936, + "logps/chosen": -258.51641845703125, + "logps/rejected": -984.7080688476562, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.118476152420044, + "rewards/margins": 6.192054271697998, + "rewards/rejected": -5.073577880859375, + "step": 668 + }, + { + "epoch": 0.43, + "learning_rate": 6.420819364523731e-08, + "logits/chosen": -3.232201099395752, + "logits/rejected": -3.1102771759033203, + "logps/chosen": -294.30047607421875, + "logps/rejected": -558.680419921875, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2014663219451904, + "rewards/margins": 3.2460007667541504, + "rewards/rejected": -2.044534206390381, + "step": 669 + }, + { + "epoch": 0.43, + "learning_rate": 6.410914688026476e-08, + "logits/chosen": -3.240095615386963, + "logits/rejected": -3.1648383140563965, + "logps/chosen": -240.13250732421875, + "logps/rejected": -811.7657470703125, + "loss": 0.2815, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1264206171035767, + "rewards/margins": 4.6736040115356445, + "rewards/rejected": -3.5471832752227783, + "step": 670 + }, + { + "epoch": 0.43, + "learning_rate": 6.401003992285652e-08, + "logits/chosen": -3.270123243331909, + "logits/rejected": -3.1256184577941895, + "logps/chosen": -264.0732116699219, + "logps/rejected": -721.8040771484375, + "loss": 0.3197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3254151344299316, + "rewards/margins": 4.9112548828125, + "rewards/rejected": -3.5858399868011475, + "step": 671 + }, + { + "epoch": 0.43, + "learning_rate": 6.391087319582263e-08, + "logits/chosen": -3.2856192588806152, + "logits/rejected": -3.2076213359832764, + "logps/chosen": -240.462158203125, + "logps/rejected": -626.5499877929688, + "loss": 0.2718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9949349164962769, + "rewards/margins": 4.12637996673584, + "rewards/rejected": -3.1314454078674316, + "step": 672 + }, + { + "epoch": 0.43, + "learning_rate": 6.381164712222814e-08, + "logits/chosen": -3.193040370941162, + "logits/rejected": -3.139495611190796, + "logps/chosen": -283.8707580566406, + "logps/rejected": -1053.9482421875, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2100090980529785, + "rewards/margins": 5.758312225341797, + "rewards/rejected": -4.548303127288818, + "step": 673 + }, + { + "epoch": 0.43, + "learning_rate": 6.371236212539129e-08, + "logits/chosen": -3.2375316619873047, + "logits/rejected": -3.039773464202881, + "logps/chosen": -304.7027282714844, + "logps/rejected": -1208.306884765625, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.262373447418213, + "rewards/margins": 6.307197570800781, + "rewards/rejected": -5.044824600219727, + "step": 674 + }, + { + "epoch": 0.43, + "learning_rate": 6.361301862888164e-08, + "logits/chosen": -3.206918716430664, + "logits/rejected": -3.101038932800293, + "logps/chosen": -278.6039123535156, + "logps/rejected": -619.19775390625, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9490097165107727, + "rewards/margins": 3.6560592651367188, + "rewards/rejected": -2.707049608230591, + "step": 675 + }, + { + "epoch": 0.43, + "learning_rate": 6.351361705651841e-08, + "logits/chosen": -3.1856863498687744, + "logits/rejected": -3.197894334793091, + "logps/chosen": -274.5509033203125, + "logps/rejected": -868.6858520507812, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1153724193572998, + "rewards/margins": 4.950056076049805, + "rewards/rejected": -3.8346831798553467, + "step": 676 + }, + { + "epoch": 0.43, + "learning_rate": 6.341415783236855e-08, + "logits/chosen": -3.257843255996704, + "logits/rejected": -3.185007095336914, + "logps/chosen": -241.24078369140625, + "logps/rejected": -705.5994873046875, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0373696088790894, + "rewards/margins": 3.850287914276123, + "rewards/rejected": -2.812918186187744, + "step": 677 + }, + { + "epoch": 0.43, + "learning_rate": 6.331464138074491e-08, + "logits/chosen": -3.2571654319763184, + "logits/rejected": -3.2297823429107666, + "logps/chosen": -236.95535278320312, + "logps/rejected": -458.53369140625, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.135585069656372, + "rewards/margins": 3.141122341156006, + "rewards/rejected": -2.005537509918213, + "step": 678 + }, + { + "epoch": 0.43, + "learning_rate": 6.321506812620457e-08, + "logits/chosen": -3.2016844749450684, + "logits/rejected": -3.1492698192596436, + "logps/chosen": -270.8961181640625, + "logps/rejected": -781.1340942382812, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1345794200897217, + "rewards/margins": 4.765545845031738, + "rewards/rejected": -3.6309661865234375, + "step": 679 + }, + { + "epoch": 0.43, + "learning_rate": 6.311543849354689e-08, + "logits/chosen": -3.2408854961395264, + "logits/rejected": -3.0776405334472656, + "logps/chosen": -263.8012390136719, + "logps/rejected": -1037.8388671875, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.237532138824463, + "rewards/margins": 6.016120910644531, + "rewards/rejected": -4.778588771820068, + "step": 680 + }, + { + "epoch": 0.43, + "learning_rate": 6.301575290781173e-08, + "logits/chosen": -3.289414405822754, + "logits/rejected": -3.192552089691162, + "logps/chosen": -242.95919799804688, + "logps/rejected": -703.9187622070312, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1235138177871704, + "rewards/margins": 4.930493354797363, + "rewards/rejected": -3.8069796562194824, + "step": 681 + }, + { + "epoch": 0.43, + "learning_rate": 6.291601179427774e-08, + "logits/chosen": -3.2682480812072754, + "logits/rejected": -3.0645008087158203, + "logps/chosen": -249.56198120117188, + "logps/rejected": -292.2048034667969, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3220009803771973, + "rewards/margins": 2.5578131675720215, + "rewards/rejected": -1.2358124256134033, + "step": 682 + }, + { + "epoch": 0.44, + "learning_rate": 6.281621557846038e-08, + "logits/chosen": -3.247715473175049, + "logits/rejected": -3.13863205909729, + "logps/chosen": -284.39752197265625, + "logps/rejected": -476.85137939453125, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.228703260421753, + "rewards/margins": 3.501115322113037, + "rewards/rejected": -2.272412061691284, + "step": 683 + }, + { + "epoch": 0.44, + "learning_rate": 6.271636468611021e-08, + "logits/chosen": -3.3048033714294434, + "logits/rejected": -3.149029493331909, + "logps/chosen": -289.59075927734375, + "logps/rejected": -391.9459228515625, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3418700695037842, + "rewards/margins": 3.1024093627929688, + "rewards/rejected": -1.7605392932891846, + "step": 684 + }, + { + "epoch": 0.44, + "learning_rate": 6.261645954321109e-08, + "logits/chosen": -3.2297840118408203, + "logits/rejected": -3.0560760498046875, + "logps/chosen": -284.4635009765625, + "logps/rejected": -360.1093444824219, + "loss": 0.3594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.951128363609314, + "rewards/margins": 2.3362069129943848, + "rewards/rejected": -1.3850784301757812, + "step": 685 + }, + { + "epoch": 0.44, + "learning_rate": 6.251650057597826e-08, + "logits/chosen": -3.243971347808838, + "logits/rejected": -3.16678524017334, + "logps/chosen": -266.87213134765625, + "logps/rejected": -401.19366455078125, + "loss": 0.3035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1278367042541504, + "rewards/margins": 3.228621006011963, + "rewards/rejected": -2.1007843017578125, + "step": 686 + }, + { + "epoch": 0.44, + "learning_rate": 6.241648821085665e-08, + "logits/chosen": -3.1942224502563477, + "logits/rejected": -3.065464496612549, + "logps/chosen": -254.15609741210938, + "logps/rejected": -1133.6505126953125, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0328254699707031, + "rewards/margins": 5.02721643447876, + "rewards/rejected": -3.9943909645080566, + "step": 687 + }, + { + "epoch": 0.44, + "learning_rate": 6.231642287451894e-08, + "logits/chosen": -3.2696375846862793, + "logits/rejected": -3.060393810272217, + "logps/chosen": -253.6329803466797, + "logps/rejected": -1536.707275390625, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0058104991912842, + "rewards/margins": 6.650842666625977, + "rewards/rejected": -5.645031929016113, + "step": 688 + }, + { + "epoch": 0.44, + "learning_rate": 6.221630499386383e-08, + "logits/chosen": -3.1557459831237793, + "logits/rejected": -3.1744794845581055, + "logps/chosen": -275.9868469238281, + "logps/rejected": -742.018310546875, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0600991249084473, + "rewards/margins": 4.578522682189941, + "rewards/rejected": -3.518423557281494, + "step": 689 + }, + { + "epoch": 0.44, + "learning_rate": 6.211613499601418e-08, + "logits/chosen": -3.231558322906494, + "logits/rejected": -3.0343546867370605, + "logps/chosen": -285.39532470703125, + "logps/rejected": -990.5440673828125, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9316909909248352, + "rewards/margins": 4.43044900894165, + "rewards/rejected": -3.498757839202881, + "step": 690 + }, + { + "epoch": 0.44, + "learning_rate": 6.201591330831517e-08, + "logits/chosen": -3.228025436401367, + "logits/rejected": -3.1709225177764893, + "logps/chosen": -275.63946533203125, + "logps/rejected": -872.8828125, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0911834239959717, + "rewards/margins": 5.202359199523926, + "rewards/rejected": -4.111175537109375, + "step": 691 + }, + { + "epoch": 0.44, + "learning_rate": 6.191564035833254e-08, + "logits/chosen": -3.306725025177002, + "logits/rejected": -3.1256237030029297, + "logps/chosen": -252.68475341796875, + "logps/rejected": -473.69573974609375, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9251022338867188, + "rewards/margins": 3.237597703933716, + "rewards/rejected": -2.312495470046997, + "step": 692 + }, + { + "epoch": 0.44, + "learning_rate": 6.181531657385067e-08, + "logits/chosen": -3.219155788421631, + "logits/rejected": -3.1338319778442383, + "logps/chosen": -272.1253356933594, + "logps/rejected": -500.90142822265625, + "loss": 0.2991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.234808325767517, + "rewards/margins": 3.880841016769409, + "rewards/rejected": -2.6460328102111816, + "step": 693 + }, + { + "epoch": 0.44, + "learning_rate": 6.171494238287088e-08, + "logits/chosen": -3.191708564758301, + "logits/rejected": -3.1768367290496826, + "logps/chosen": -307.30194091796875, + "logps/rejected": -516.26513671875, + "loss": 0.3128, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1601760387420654, + "rewards/margins": 3.676265239715576, + "rewards/rejected": -2.5160889625549316, + "step": 694 + }, + { + "epoch": 0.44, + "learning_rate": 6.161451821360947e-08, + "logits/chosen": -3.2734713554382324, + "logits/rejected": -3.168649196624756, + "logps/chosen": -263.6370544433594, + "logps/rejected": -546.1144409179688, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2208259105682373, + "rewards/margins": 3.715256452560425, + "rewards/rejected": -2.4944305419921875, + "step": 695 + }, + { + "epoch": 0.44, + "learning_rate": 6.151404449449599e-08, + "logits/chosen": -3.239344596862793, + "logits/rejected": -2.988429307937622, + "logps/chosen": -256.3049011230469, + "logps/rejected": -2626.0048828125, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2504456043243408, + "rewards/margins": 9.572198867797852, + "rewards/rejected": -8.321752548217773, + "step": 696 + }, + { + "epoch": 0.44, + "learning_rate": 6.141352165417137e-08, + "logits/chosen": -3.298875331878662, + "logits/rejected": -3.110008716583252, + "logps/chosen": -261.4716491699219, + "logps/rejected": -494.24310302734375, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1748757362365723, + "rewards/margins": 3.193253517150879, + "rewards/rejected": -2.0183777809143066, + "step": 697 + }, + { + "epoch": 0.44, + "learning_rate": 6.131295012148612e-08, + "logits/chosen": -3.2487642765045166, + "logits/rejected": -3.018545627593994, + "logps/chosen": -244.36459350585938, + "logps/rejected": -660.6981201171875, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2239166498184204, + "rewards/margins": 4.024125576019287, + "rewards/rejected": -2.8002090454101562, + "step": 698 + }, + { + "epoch": 0.45, + "learning_rate": 6.121233032549841e-08, + "logits/chosen": -3.221096992492676, + "logits/rejected": -3.0551419258117676, + "logps/chosen": -261.89581298828125, + "logps/rejected": -570.96630859375, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.210200548171997, + "rewards/margins": 4.129920959472656, + "rewards/rejected": -2.919720411300659, + "step": 699 + }, + { + "epoch": 0.45, + "learning_rate": 6.111166269547243e-08, + "logits/chosen": -3.29272198677063, + "logits/rejected": -3.1049938201904297, + "logps/chosen": -280.56536865234375, + "logps/rejected": -615.7660522460938, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9493774771690369, + "rewards/margins": 3.795398235321045, + "rewards/rejected": -2.8460206985473633, + "step": 700 + }, + { + "epoch": 0.45, + "learning_rate": 6.101094766087637e-08, + "logits/chosen": -3.253357410430908, + "logits/rejected": -3.14511775970459, + "logps/chosen": -293.42401123046875, + "logps/rejected": -512.1869506835938, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1476044654846191, + "rewards/margins": 3.7056398391723633, + "rewards/rejected": -2.558035373687744, + "step": 701 + }, + { + "epoch": 0.45, + "learning_rate": 6.091018565138062e-08, + "logits/chosen": -3.2995119094848633, + "logits/rejected": -3.159092903137207, + "logps/chosen": -260.6828918457031, + "logps/rejected": -552.5848388671875, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.116113305091858, + "rewards/margins": 3.767178535461426, + "rewards/rejected": -2.6510651111602783, + "step": 702 + }, + { + "epoch": 0.45, + "learning_rate": 6.080937709685604e-08, + "logits/chosen": -3.2402706146240234, + "logits/rejected": -3.106663942337036, + "logps/chosen": -238.6129150390625, + "logps/rejected": -662.66259765625, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2821670770645142, + "rewards/margins": 4.286504745483398, + "rewards/rejected": -3.004338026046753, + "step": 703 + }, + { + "epoch": 0.45, + "learning_rate": 6.070852242737205e-08, + "logits/chosen": -3.2597100734710693, + "logits/rejected": -3.1632180213928223, + "logps/chosen": -256.8880615234375, + "logps/rejected": -1051.880126953125, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2051606178283691, + "rewards/margins": 6.8575439453125, + "rewards/rejected": -5.652383327484131, + "step": 704 + }, + { + "epoch": 0.45, + "learning_rate": 6.060762207319479e-08, + "logits/chosen": -3.289513111114502, + "logits/rejected": -3.1171579360961914, + "logps/chosen": -231.77169799804688, + "logps/rejected": -354.68328857421875, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.356367588043213, + "rewards/margins": 2.922776699066162, + "rewards/rejected": -1.5664093494415283, + "step": 705 + }, + { + "epoch": 0.45, + "learning_rate": 6.050667646478527e-08, + "logits/chosen": -3.299774169921875, + "logits/rejected": -3.1370320320129395, + "logps/chosen": -258.1971130371094, + "logps/rejected": -1000.6177368164062, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1467331647872925, + "rewards/margins": 5.655989646911621, + "rewards/rejected": -4.509256362915039, + "step": 706 + }, + { + "epoch": 0.45, + "learning_rate": 6.040568603279764e-08, + "logits/chosen": -3.2418084144592285, + "logits/rejected": -3.10087251663208, + "logps/chosen": -231.324951171875, + "logps/rejected": -553.3638916015625, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2150757312774658, + "rewards/margins": 3.9611024856567383, + "rewards/rejected": -2.7460267543792725, + "step": 707 + }, + { + "epoch": 0.45, + "learning_rate": 6.030465120807719e-08, + "logits/chosen": -3.2148704528808594, + "logits/rejected": -3.118741512298584, + "logps/chosen": -301.95013427734375, + "logps/rejected": -421.0423583984375, + "loss": 0.318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2320754528045654, + "rewards/margins": 3.162301540374756, + "rewards/rejected": -1.9302260875701904, + "step": 708 + }, + { + "epoch": 0.45, + "learning_rate": 6.020357242165868e-08, + "logits/chosen": -3.24859619140625, + "logits/rejected": -3.144928455352783, + "logps/chosen": -242.94692993164062, + "logps/rejected": -544.6812744140625, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9727821350097656, + "rewards/margins": 3.860732316970825, + "rewards/rejected": -2.8879501819610596, + "step": 709 + }, + { + "epoch": 0.45, + "learning_rate": 6.010245010476435e-08, + "logits/chosen": -3.2444610595703125, + "logits/rejected": -3.0471296310424805, + "logps/chosen": -231.33590698242188, + "logps/rejected": -613.6610107421875, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9987335205078125, + "rewards/margins": 4.329833984375, + "rewards/rejected": -3.3311007022857666, + "step": 710 + }, + { + "epoch": 0.45, + "learning_rate": 6.000128468880222e-08, + "logits/chosen": -3.265688896179199, + "logits/rejected": -3.2133126258850098, + "logps/chosen": -227.54270935058594, + "logps/rejected": -765.0145263671875, + "loss": 0.2703, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0722694396972656, + "rewards/margins": 4.848905563354492, + "rewards/rejected": -3.7766358852386475, + "step": 711 + }, + { + "epoch": 0.45, + "learning_rate": 5.990007660536411e-08, + "logits/chosen": -3.2578125, + "logits/rejected": -3.0742578506469727, + "logps/chosen": -286.1873779296875, + "logps/rejected": -739.43798828125, + "loss": 0.3054, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3169617652893066, + "rewards/margins": 4.531611919403076, + "rewards/rejected": -3.2146501541137695, + "step": 712 + }, + { + "epoch": 0.45, + "learning_rate": 5.979882628622389e-08, + "logits/chosen": -3.257758855819702, + "logits/rejected": -3.110711097717285, + "logps/chosen": -246.28079223632812, + "logps/rejected": -903.649169921875, + "loss": 0.2828, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.216546654701233, + "rewards/margins": 5.75300931930542, + "rewards/rejected": -4.536462783813477, + "step": 713 + }, + { + "epoch": 0.46, + "learning_rate": 5.969753416333564e-08, + "logits/chosen": -3.2207555770874023, + "logits/rejected": -3.0861942768096924, + "logps/chosen": -282.84661865234375, + "logps/rejected": -779.4188842773438, + "loss": 0.3052, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0736618041992188, + "rewards/margins": 4.187861442565918, + "rewards/rejected": -3.1141998767852783, + "step": 714 + }, + { + "epoch": 0.46, + "learning_rate": 5.959620066883175e-08, + "logits/chosen": -3.22202205657959, + "logits/rejected": -3.1317365169525146, + "logps/chosen": -283.7796630859375, + "logps/rejected": -948.5114135742188, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2664108276367188, + "rewards/margins": 6.373747825622559, + "rewards/rejected": -5.107336521148682, + "step": 715 + }, + { + "epoch": 0.46, + "learning_rate": 5.949482623502116e-08, + "logits/chosen": -3.268834114074707, + "logits/rejected": -3.0511891841888428, + "logps/chosen": -238.0427703857422, + "logps/rejected": -698.5928955078125, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4536781311035156, + "rewards/margins": 4.086337089538574, + "rewards/rejected": -2.6326591968536377, + "step": 716 + }, + { + "epoch": 0.46, + "learning_rate": 5.939341129438738e-08, + "logits/chosen": -3.267418622970581, + "logits/rejected": -3.168358325958252, + "logps/chosen": -255.043701171875, + "logps/rejected": -652.1651611328125, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1867172718048096, + "rewards/margins": 4.485784530639648, + "rewards/rejected": -3.299067497253418, + "step": 717 + }, + { + "epoch": 0.46, + "learning_rate": 5.929195627958683e-08, + "logits/chosen": -3.255711555480957, + "logits/rejected": -3.117825508117676, + "logps/chosen": -216.626220703125, + "logps/rejected": -996.1156005859375, + "loss": 0.26, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3780922889709473, + "rewards/margins": 6.4425177574157715, + "rewards/rejected": -5.064425468444824, + "step": 718 + }, + { + "epoch": 0.46, + "learning_rate": 5.919046162344683e-08, + "logits/chosen": -3.288215160369873, + "logits/rejected": -3.113583564758301, + "logps/chosen": -256.187255859375, + "logps/rejected": -268.170654296875, + "loss": 0.3212, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.264062523841858, + "rewards/margins": 2.5383026599884033, + "rewards/rejected": -1.2742401361465454, + "step": 719 + }, + { + "epoch": 0.46, + "learning_rate": 5.908892775896382e-08, + "logits/chosen": -3.2839784622192383, + "logits/rejected": -3.155642509460449, + "logps/chosen": -267.02264404296875, + "logps/rejected": -1448.7205810546875, + "loss": 0.2883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6362273693084717, + "rewards/margins": 6.872464179992676, + "rewards/rejected": -5.236236572265625, + "step": 720 + }, + { + "epoch": 0.46, + "learning_rate": 5.898735511930154e-08, + "logits/chosen": -3.2839999198913574, + "logits/rejected": -3.202322006225586, + "logps/chosen": -262.8075256347656, + "logps/rejected": -560.08740234375, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.115923285484314, + "rewards/margins": 3.557600975036621, + "rewards/rejected": -2.4416778087615967, + "step": 721 + }, + { + "epoch": 0.46, + "learning_rate": 5.8885744137789126e-08, + "logits/chosen": -3.2515759468078613, + "logits/rejected": -3.115744113922119, + "logps/chosen": -267.3365783691406, + "logps/rejected": -351.71722412109375, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.130669355392456, + "rewards/margins": 2.3596243858337402, + "rewards/rejected": -1.2289550304412842, + "step": 722 + }, + { + "epoch": 0.46, + "learning_rate": 5.87840952479193e-08, + "logits/chosen": -3.2497057914733887, + "logits/rejected": -3.091675281524658, + "logps/chosen": -267.09942626953125, + "logps/rejected": -1080.642822265625, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.382887363433838, + "rewards/margins": 6.076472282409668, + "rewards/rejected": -4.693585395812988, + "step": 723 + }, + { + "epoch": 0.46, + "learning_rate": 5.868240888334653e-08, + "logits/chosen": -3.2575950622558594, + "logits/rejected": -3.163058280944824, + "logps/chosen": -249.7154998779297, + "logps/rejected": -1114.96533203125, + "loss": 0.2863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.132856011390686, + "rewards/margins": 6.82158899307251, + "rewards/rejected": -5.688733100891113, + "step": 724 + }, + { + "epoch": 0.46, + "learning_rate": 5.8580685477885086e-08, + "logits/chosen": -3.1925063133239746, + "logits/rejected": -3.1713852882385254, + "logps/chosen": -259.99371337890625, + "logps/rejected": -707.19970703125, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3365752696990967, + "rewards/margins": 4.931494235992432, + "rewards/rejected": -3.594918727874756, + "step": 725 + }, + { + "epoch": 0.46, + "learning_rate": 5.847892546550737e-08, + "logits/chosen": -3.239408493041992, + "logits/rejected": -3.060739755630493, + "logps/chosen": -300.53131103515625, + "logps/rejected": -877.5219116210938, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7939415574073792, + "rewards/margins": 4.479028701782227, + "rewards/rejected": -3.6850876808166504, + "step": 726 + }, + { + "epoch": 0.46, + "learning_rate": 5.8377129280341865e-08, + "logits/chosen": -3.2183289527893066, + "logits/rejected": -3.1810567378997803, + "logps/chosen": -288.28302001953125, + "logps/rejected": -621.5866088867188, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3633590936660767, + "rewards/margins": 4.432664394378662, + "rewards/rejected": -3.069305419921875, + "step": 727 + }, + { + "epoch": 0.46, + "learning_rate": 5.82752973566714e-08, + "logits/chosen": -3.2423903942108154, + "logits/rejected": -3.138906478881836, + "logps/chosen": -267.29266357421875, + "logps/rejected": -829.9496459960938, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0629791021347046, + "rewards/margins": 5.3854522705078125, + "rewards/rejected": -4.322473526000977, + "step": 728 + }, + { + "epoch": 0.46, + "learning_rate": 5.817343012893131e-08, + "logits/chosen": -3.2583811283111572, + "logits/rejected": -3.1403121948242188, + "logps/chosen": -243.02969360351562, + "logps/rejected": -628.0733642578125, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.156684160232544, + "rewards/margins": 4.211139678955078, + "rewards/rejected": -3.0544557571411133, + "step": 729 + }, + { + "epoch": 0.47, + "learning_rate": 5.8071528031707494e-08, + "logits/chosen": -3.2500808238983154, + "logits/rejected": -2.9522829055786133, + "logps/chosen": -206.52476501464844, + "logps/rejected": -1133.843017578125, + "loss": 0.2574, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1675498485565186, + "rewards/margins": 5.727756500244141, + "rewards/rejected": -4.560206413269043, + "step": 730 + }, + { + "epoch": 0.47, + "learning_rate": 5.796959149973463e-08, + "logits/chosen": -3.1893234252929688, + "logits/rejected": -3.136024236679077, + "logps/chosen": -262.1903076171875, + "logps/rejected": -937.2138671875, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2909018993377686, + "rewards/margins": 5.230716705322266, + "rewards/rejected": -3.939814805984497, + "step": 731 + }, + { + "epoch": 0.47, + "learning_rate": 5.78676209678943e-08, + "logits/chosen": -3.3024678230285645, + "logits/rejected": -2.924048900604248, + "logps/chosen": -244.73095703125, + "logps/rejected": -1480.7763671875, + "loss": 0.2816, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2856674194335938, + "rewards/margins": 6.929753303527832, + "rewards/rejected": -5.644085884094238, + "step": 732 + }, + { + "epoch": 0.47, + "learning_rate": 5.776561687121315e-08, + "logits/chosen": -3.217041492462158, + "logits/rejected": -3.097848653793335, + "logps/chosen": -241.10797119140625, + "logps/rejected": -565.6630859375, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2754684686660767, + "rewards/margins": 3.3885836601257324, + "rewards/rejected": -2.113115072250366, + "step": 733 + }, + { + "epoch": 0.47, + "learning_rate": 5.766357964486101e-08, + "logits/chosen": -3.2430977821350098, + "logits/rejected": -3.1885194778442383, + "logps/chosen": -240.96237182617188, + "logps/rejected": -943.4083862304688, + "loss": 0.3016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2231712341308594, + "rewards/margins": 6.205269813537598, + "rewards/rejected": -4.982098579406738, + "step": 734 + }, + { + "epoch": 0.47, + "learning_rate": 5.7561509724149035e-08, + "logits/chosen": -3.247375965118408, + "logits/rejected": -3.1862454414367676, + "logps/chosen": -282.81396484375, + "logps/rejected": -855.7865600585938, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3796653747558594, + "rewards/margins": 5.668013572692871, + "rewards/rejected": -4.288348197937012, + "step": 735 + }, + { + "epoch": 0.47, + "learning_rate": 5.7459407544527875e-08, + "logits/chosen": -3.2185184955596924, + "logits/rejected": -3.114236831665039, + "logps/chosen": -270.2156066894531, + "logps/rejected": -599.6084594726562, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0749832391738892, + "rewards/margins": 3.959812879562378, + "rewards/rejected": -2.8848297595977783, + "step": 736 + }, + { + "epoch": 0.47, + "learning_rate": 5.7357273541585805e-08, + "logits/chosen": -3.2451627254486084, + "logits/rejected": -3.1300392150878906, + "logps/chosen": -259.2723388671875, + "logps/rejected": -440.16033935546875, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4485688209533691, + "rewards/margins": 3.4944400787353516, + "rewards/rejected": -2.0458710193634033, + "step": 737 + }, + { + "epoch": 0.47, + "learning_rate": 5.7255108151046846e-08, + "logits/chosen": -3.256478786468506, + "logits/rejected": -3.021470546722412, + "logps/chosen": -223.9180145263672, + "logps/rejected": -1120.7176513671875, + "loss": 0.2583, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1916687488555908, + "rewards/margins": 5.81278133392334, + "rewards/rejected": -4.62111234664917, + "step": 738 + }, + { + "epoch": 0.47, + "learning_rate": 5.7152911808768965e-08, + "logits/chosen": -3.251474380493164, + "logits/rejected": -3.090053081512451, + "logps/chosen": -262.49859619140625, + "logps/rejected": -383.35076904296875, + "loss": 0.3072, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3171287775039673, + "rewards/margins": 3.1139748096466064, + "rewards/rejected": -1.7968460321426392, + "step": 739 + }, + { + "epoch": 0.47, + "learning_rate": 5.705068495074211e-08, + "logits/chosen": -3.2750611305236816, + "logits/rejected": -3.168043851852417, + "logps/chosen": -253.5315704345703, + "logps/rejected": -447.78826904296875, + "loss": 0.3048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1730027198791504, + "rewards/margins": 3.579193353652954, + "rewards/rejected": -2.4061906337738037, + "step": 740 + }, + { + "epoch": 0.47, + "learning_rate": 5.6948428013086506e-08, + "logits/chosen": -3.254154682159424, + "logits/rejected": -3.0937342643737793, + "logps/chosen": -220.32708740234375, + "logps/rejected": -354.77496337890625, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.338350772857666, + "rewards/margins": 2.7257256507873535, + "rewards/rejected": -1.3873748779296875, + "step": 741 + }, + { + "epoch": 0.47, + "learning_rate": 5.6846141432050595e-08, + "logits/chosen": -3.218142032623291, + "logits/rejected": -3.1052629947662354, + "logps/chosen": -285.5289001464844, + "logps/rejected": -395.0816650390625, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.314479112625122, + "rewards/margins": 3.271989345550537, + "rewards/rejected": -1.9575104713439941, + "step": 742 + }, + { + "epoch": 0.47, + "learning_rate": 5.674382564400938e-08, + "logits/chosen": -3.274945020675659, + "logits/rejected": -3.1385021209716797, + "logps/chosen": -277.593017578125, + "logps/rejected": -505.638671875, + "loss": 0.3127, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2347290515899658, + "rewards/margins": 3.5482726097106934, + "rewards/rejected": -2.3135437965393066, + "step": 743 + }, + { + "epoch": 0.47, + "learning_rate": 5.664148108546242e-08, + "logits/chosen": -3.2228174209594727, + "logits/rejected": -3.1606454849243164, + "logps/chosen": -272.0680847167969, + "logps/rejected": -883.376220703125, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.079921007156372, + "rewards/margins": 6.21291971206665, + "rewards/rejected": -5.132998466491699, + "step": 744 + }, + { + "epoch": 0.47, + "learning_rate": 5.6539108193032015e-08, + "logits/chosen": -3.3239738941192627, + "logits/rejected": -3.176309108734131, + "logps/chosen": -263.5393981933594, + "logps/rejected": -474.248291015625, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2332442998886108, + "rewards/margins": 3.7036728858947754, + "rewards/rejected": -2.470428466796875, + "step": 745 + }, + { + "epoch": 0.48, + "learning_rate": 5.6436707403461334e-08, + "logits/chosen": -3.2444868087768555, + "logits/rejected": -3.0631308555603027, + "logps/chosen": -260.5082702636719, + "logps/rejected": -861.2071533203125, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2310020923614502, + "rewards/margins": 4.608579635620117, + "rewards/rejected": -3.377577304840088, + "step": 746 + }, + { + "epoch": 0.48, + "learning_rate": 5.633427915361261e-08, + "logits/chosen": -3.2303433418273926, + "logits/rejected": -3.086585521697998, + "logps/chosen": -271.35693359375, + "logps/rejected": -737.554443359375, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1608482599258423, + "rewards/margins": 4.433879852294922, + "rewards/rejected": -3.273031711578369, + "step": 747 + }, + { + "epoch": 0.48, + "learning_rate": 5.623182388046517e-08, + "logits/chosen": -3.2484264373779297, + "logits/rejected": -3.1233601570129395, + "logps/chosen": -224.201171875, + "logps/rejected": -529.2217407226562, + "loss": 0.2587, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.064043402671814, + "rewards/margins": 3.6536705493927, + "rewards/rejected": -2.589627265930176, + "step": 748 + }, + { + "epoch": 0.48, + "learning_rate": 5.612934202111367e-08, + "logits/chosen": -3.2768845558166504, + "logits/rejected": -3.0766103267669678, + "logps/chosen": -256.9068603515625, + "logps/rejected": -246.67355346679688, + "loss": 0.3281, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0044350624084473, + "rewards/margins": 2.0377793312072754, + "rewards/rejected": -1.0333442687988281, + "step": 749 + }, + { + "epoch": 0.48, + "learning_rate": 5.6026834012766146e-08, + "logits/chosen": -3.202866315841675, + "logits/rejected": -3.140598773956299, + "logps/chosen": -324.2156982421875, + "logps/rejected": -782.813232421875, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2858352661132812, + "rewards/margins": 4.897688388824463, + "rewards/rejected": -3.6118531227111816, + "step": 750 + }, + { + "epoch": 0.48, + "learning_rate": 5.5924300292742244e-08, + "logits/chosen": -3.2776436805725098, + "logits/rejected": -3.102142095565796, + "logps/chosen": -258.5435791015625, + "logps/rejected": -651.7376098632812, + "loss": 0.299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5113716125488281, + "rewards/margins": 4.523059844970703, + "rewards/rejected": -3.011688232421875, + "step": 751 + }, + { + "epoch": 0.48, + "learning_rate": 5.5821741298471245e-08, + "logits/chosen": -3.207672595977783, + "logits/rejected": -3.0843312740325928, + "logps/chosen": -260.82257080078125, + "logps/rejected": -953.2174072265625, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1509673595428467, + "rewards/margins": 5.587286949157715, + "rewards/rejected": -4.436319351196289, + "step": 752 + }, + { + "epoch": 0.48, + "learning_rate": 5.57191574674903e-08, + "logits/chosen": -3.340733528137207, + "logits/rejected": -3.0723323822021484, + "logps/chosen": -245.663330078125, + "logps/rejected": -733.21826171875, + "loss": 0.2653, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.347754716873169, + "rewards/margins": 4.689871788024902, + "rewards/rejected": -3.3421173095703125, + "step": 753 + }, + { + "epoch": 0.48, + "learning_rate": 5.561654923744248e-08, + "logits/chosen": -3.19484281539917, + "logits/rejected": -3.0686511993408203, + "logps/chosen": -274.9570007324219, + "logps/rejected": -1131.00927734375, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1807587146759033, + "rewards/margins": 6.269064426422119, + "rewards/rejected": -5.088305473327637, + "step": 754 + }, + { + "epoch": 0.48, + "learning_rate": 5.5513917046074966e-08, + "logits/chosen": -3.225614070892334, + "logits/rejected": -3.136812210083008, + "logps/chosen": -275.3087463378906, + "logps/rejected": -665.6967163085938, + "loss": 0.2964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6815261840820312, + "rewards/margins": 3.955488681793213, + "rewards/rejected": -3.2739624977111816, + "step": 755 + }, + { + "epoch": 0.48, + "learning_rate": 5.54112613312372e-08, + "logits/chosen": -3.239199638366699, + "logits/rejected": -3.132276773452759, + "logps/chosen": -279.6530456542969, + "logps/rejected": -1262.7979736328125, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.332331895828247, + "rewards/margins": 6.2333173751831055, + "rewards/rejected": -4.9009857177734375, + "step": 756 + }, + { + "epoch": 0.48, + "learning_rate": 5.5308582530878914e-08, + "logits/chosen": -3.2862191200256348, + "logits/rejected": -3.069009304046631, + "logps/chosen": -301.0624694824219, + "logps/rejected": -440.5684509277344, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.30731201171875, + "rewards/margins": 3.3786163330078125, + "rewards/rejected": -2.0713043212890625, + "step": 757 + }, + { + "epoch": 0.48, + "learning_rate": 5.5205881083048354e-08, + "logits/chosen": -3.306549549102783, + "logits/rejected": -3.151865005493164, + "logps/chosen": -261.56781005859375, + "logps/rejected": -616.4158325195312, + "loss": 0.2866, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1344337463378906, + "rewards/margins": 4.00324821472168, + "rewards/rejected": -2.868813991546631, + "step": 758 + }, + { + "epoch": 0.48, + "learning_rate": 5.510315742589041e-08, + "logits/chosen": -3.267911672592163, + "logits/rejected": -3.1317386627197266, + "logps/chosen": -223.14816284179688, + "logps/rejected": -482.32598876953125, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.324866533279419, + "rewards/margins": 3.4828989505767822, + "rewards/rejected": -2.1580324172973633, + "step": 759 + }, + { + "epoch": 0.48, + "learning_rate": 5.5000411997644694e-08, + "logits/chosen": -3.1534290313720703, + "logits/rejected": -3.112243890762329, + "logps/chosen": -244.7600555419922, + "logps/rejected": -1222.482666015625, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3666244745254517, + "rewards/margins": 7.259382247924805, + "rewards/rejected": -5.892758369445801, + "step": 760 + }, + { + "epoch": 0.49, + "learning_rate": 5.4897645236643663e-08, + "logits/chosen": -3.159553050994873, + "logits/rejected": -3.1503853797912598, + "logps/chosen": -249.36473083496094, + "logps/rejected": -554.248779296875, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2541732788085938, + "rewards/margins": 4.105623245239258, + "rewards/rejected": -2.851449489593506, + "step": 761 + }, + { + "epoch": 0.49, + "learning_rate": 5.479485758131088e-08, + "logits/chosen": -3.17903208732605, + "logits/rejected": -3.138093948364258, + "logps/chosen": -259.7050476074219, + "logps/rejected": -977.8814086914062, + "loss": 0.2983, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9445892572402954, + "rewards/margins": 5.663439750671387, + "rewards/rejected": -4.718851089477539, + "step": 762 + }, + { + "epoch": 0.49, + "learning_rate": 5.469204947015896e-08, + "logits/chosen": -3.288407802581787, + "logits/rejected": -3.2334861755371094, + "logps/chosen": -252.3090362548828, + "logps/rejected": -780.7552490234375, + "loss": 0.2774, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.151909589767456, + "rewards/margins": 5.60494327545166, + "rewards/rejected": -4.453033447265625, + "step": 763 + }, + { + "epoch": 0.49, + "learning_rate": 5.458922134178784e-08, + "logits/chosen": -3.26237154006958, + "logits/rejected": -3.0503945350646973, + "logps/chosen": -246.1390380859375, + "logps/rejected": -3942.619384765625, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.259020209312439, + "rewards/margins": 7.1895012855529785, + "rewards/rejected": -5.93048095703125, + "step": 764 + }, + { + "epoch": 0.49, + "learning_rate": 5.44863736348828e-08, + "logits/chosen": -3.217618942260742, + "logits/rejected": -3.062906265258789, + "logps/chosen": -282.45220947265625, + "logps/rejected": -623.56005859375, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3463027477264404, + "rewards/margins": 4.906161308288574, + "rewards/rejected": -3.559858798980713, + "step": 765 + }, + { + "epoch": 0.49, + "learning_rate": 5.43835067882127e-08, + "logits/chosen": -3.310088634490967, + "logits/rejected": -3.1422815322875977, + "logps/chosen": -275.69696044921875, + "logps/rejected": -846.8067016601562, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.259851098060608, + "rewards/margins": 5.145901679992676, + "rewards/rejected": -3.8860504627227783, + "step": 766 + }, + { + "epoch": 0.49, + "learning_rate": 5.4280621240628034e-08, + "logits/chosen": -3.26016902923584, + "logits/rejected": -3.1481316089630127, + "logps/chosen": -267.6517333984375, + "logps/rejected": -455.85186767578125, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1074280738830566, + "rewards/margins": 3.381688117980957, + "rewards/rejected": -2.2742600440979004, + "step": 767 + }, + { + "epoch": 0.49, + "learning_rate": 5.417771743105907e-08, + "logits/chosen": -3.227877140045166, + "logits/rejected": -3.082292079925537, + "logps/chosen": -250.81109619140625, + "logps/rejected": -629.5337524414062, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2466721534729004, + "rewards/margins": 4.360350608825684, + "rewards/rejected": -3.113677978515625, + "step": 768 + }, + { + "epoch": 0.49, + "learning_rate": 5.4074795798513984e-08, + "logits/chosen": -3.2568039894104004, + "logits/rejected": -3.1453661918640137, + "logps/chosen": -249.64019775390625, + "logps/rejected": -584.8233032226562, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1606537103652954, + "rewards/margins": 4.532862663269043, + "rewards/rejected": -3.372209310531616, + "step": 769 + }, + { + "epoch": 0.49, + "learning_rate": 5.3971856782077e-08, + "logits/chosen": -3.239262342453003, + "logits/rejected": -3.150084972381592, + "logps/chosen": -213.57415771484375, + "logps/rejected": -686.5186157226562, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9217315912246704, + "rewards/margins": 4.645510673522949, + "rewards/rejected": -3.7237792015075684, + "step": 770 + }, + { + "epoch": 0.49, + "learning_rate": 5.386890082090651e-08, + "logits/chosen": -3.1698436737060547, + "logits/rejected": -3.05365252494812, + "logps/chosen": -236.8397216796875, + "logps/rejected": -357.8713073730469, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.128157138824463, + "rewards/margins": 2.8561692237854004, + "rewards/rejected": -1.7280120849609375, + "step": 771 + }, + { + "epoch": 0.49, + "learning_rate": 5.376592835423318e-08, + "logits/chosen": -3.2760307788848877, + "logits/rejected": -3.204519748687744, + "logps/chosen": -249.23696899414062, + "logps/rejected": -778.638671875, + "loss": 0.2714, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.338566541671753, + "rewards/margins": 5.185643196105957, + "rewards/rejected": -3.847076416015625, + "step": 772 + }, + { + "epoch": 0.49, + "learning_rate": 5.36629398213581e-08, + "logits/chosen": -3.293525218963623, + "logits/rejected": -3.236456871032715, + "logps/chosen": -259.64825439453125, + "logps/rejected": -765.787353515625, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.175550937652588, + "rewards/margins": 5.319068908691406, + "rewards/rejected": -4.143518447875977, + "step": 773 + }, + { + "epoch": 0.49, + "learning_rate": 5.35599356616509e-08, + "logits/chosen": -3.183631181716919, + "logits/rejected": -3.1286771297454834, + "logps/chosen": -264.71868896484375, + "logps/rejected": -800.5596923828125, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2836289405822754, + "rewards/margins": 5.220715522766113, + "rewards/rejected": -3.937086582183838, + "step": 774 + }, + { + "epoch": 0.49, + "learning_rate": 5.345691631454788e-08, + "logits/chosen": -3.1670780181884766, + "logits/rejected": -3.114151954650879, + "logps/chosen": -260.997314453125, + "logps/rejected": -633.682373046875, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.223700761795044, + "rewards/margins": 4.5474066734313965, + "rewards/rejected": -3.3237061500549316, + "step": 775 + }, + { + "epoch": 0.49, + "learning_rate": 5.335388221955012e-08, + "logits/chosen": -3.1674017906188965, + "logits/rejected": -3.059783458709717, + "logps/chosen": -280.29803466796875, + "logps/rejected": -1051.3253173828125, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2504181861877441, + "rewards/margins": 6.982229709625244, + "rewards/rejected": -5.7318115234375, + "step": 776 + }, + { + "epoch": 0.5, + "learning_rate": 5.325083381622164e-08, + "logits/chosen": -3.2552149295806885, + "logits/rejected": -3.0877175331115723, + "logps/chosen": -260.3902587890625, + "logps/rejected": -341.22930908203125, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1899826526641846, + "rewards/margins": 2.560882568359375, + "rewards/rejected": -1.37090003490448, + "step": 777 + }, + { + "epoch": 0.5, + "learning_rate": 5.314777154418746e-08, + "logits/chosen": -3.2586655616760254, + "logits/rejected": -3.0823938846588135, + "logps/chosen": -251.89845275878906, + "logps/rejected": -561.0534057617188, + "loss": 0.2852, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0838744640350342, + "rewards/margins": 3.3945205211639404, + "rewards/rejected": -2.3106460571289062, + "step": 778 + }, + { + "epoch": 0.5, + "learning_rate": 5.304469584313184e-08, + "logits/chosen": -3.2533302307128906, + "logits/rejected": -3.165090560913086, + "logps/chosen": -254.0764617919922, + "logps/rejected": -667.9938354492188, + "loss": 0.3012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.156091332435608, + "rewards/margins": 4.686388969421387, + "rewards/rejected": -3.5302977561950684, + "step": 779 + }, + { + "epoch": 0.5, + "learning_rate": 5.2941607152796256e-08, + "logits/chosen": -3.217761993408203, + "logits/rejected": -3.1251721382141113, + "logps/chosen": -250.074951171875, + "logps/rejected": -1032.4520263671875, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2812583446502686, + "rewards/margins": 6.836582183837891, + "rewards/rejected": -5.555324077606201, + "step": 780 + }, + { + "epoch": 0.5, + "learning_rate": 5.283850591297764e-08, + "logits/chosen": -3.3099703788757324, + "logits/rejected": -3.1572303771972656, + "logps/chosen": -245.91127014160156, + "logps/rejected": -876.3511962890625, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4926345348358154, + "rewards/margins": 5.205310344696045, + "rewards/rejected": -3.7126755714416504, + "step": 781 + }, + { + "epoch": 0.5, + "learning_rate": 5.273539256352645e-08, + "logits/chosen": -3.2948596477508545, + "logits/rejected": -3.193735122680664, + "logps/chosen": -253.1470184326172, + "logps/rejected": -685.9242553710938, + "loss": 0.299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.216770887374878, + "rewards/margins": 4.5339813232421875, + "rewards/rejected": -3.3172106742858887, + "step": 782 + }, + { + "epoch": 0.5, + "learning_rate": 5.26322675443448e-08, + "logits/chosen": -3.2211782932281494, + "logits/rejected": -3.1952247619628906, + "logps/chosen": -314.13787841796875, + "logps/rejected": -551.8158569335938, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.082330346107483, + "rewards/margins": 4.062200546264648, + "rewards/rejected": -2.979870557785034, + "step": 783 + }, + { + "epoch": 0.5, + "learning_rate": 5.252913129538462e-08, + "logits/chosen": -3.1451311111450195, + "logits/rejected": -3.1117162704467773, + "logps/chosen": -263.7748718261719, + "logps/rejected": -765.4031982421875, + "loss": 0.3059, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.352268934249878, + "rewards/margins": 5.229182243347168, + "rewards/rejected": -3.876913547515869, + "step": 784 + }, + { + "epoch": 0.5, + "learning_rate": 5.242598425664569e-08, + "logits/chosen": -3.281672954559326, + "logits/rejected": -3.146439552307129, + "logps/chosen": -232.06741333007812, + "logps/rejected": -510.921875, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.278206706047058, + "rewards/margins": 3.4463555812835693, + "rewards/rejected": -2.168148994445801, + "step": 785 + }, + { + "epoch": 0.5, + "learning_rate": 5.2322826868173906e-08, + "logits/chosen": -3.2793030738830566, + "logits/rejected": -3.1883065700531006, + "logps/chosen": -256.99346923828125, + "logps/rejected": -785.7507934570312, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3688347339630127, + "rewards/margins": 5.666863441467285, + "rewards/rejected": -4.298028945922852, + "step": 786 + }, + { + "epoch": 0.5, + "learning_rate": 5.221965957005923e-08, + "logits/chosen": -3.2586684226989746, + "logits/rejected": -3.175499677658081, + "logps/chosen": -239.56317138671875, + "logps/rejected": -459.8846740722656, + "loss": 0.2913, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.260887861251831, + "rewards/margins": 3.2769722938537598, + "rewards/rejected": -2.0160841941833496, + "step": 787 + }, + { + "epoch": 0.5, + "learning_rate": 5.2116482802433946e-08, + "logits/chosen": -3.2671473026275635, + "logits/rejected": -3.0847063064575195, + "logps/chosen": -248.25155639648438, + "logps/rejected": -414.625, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2179558277130127, + "rewards/margins": 3.1639633178710938, + "rewards/rejected": -1.9460076093673706, + "step": 788 + }, + { + "epoch": 0.5, + "learning_rate": 5.201329700547076e-08, + "logits/chosen": -3.1792960166931152, + "logits/rejected": -3.045623540878296, + "logps/chosen": -257.2506103515625, + "logps/rejected": -1205.407958984375, + "loss": 0.2739, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3550300598144531, + "rewards/margins": 6.195035934448242, + "rewards/rejected": -4.840005874633789, + "step": 789 + }, + { + "epoch": 0.5, + "learning_rate": 5.191010261938084e-08, + "logits/chosen": -3.2051336765289307, + "logits/rejected": -3.066464900970459, + "logps/chosen": -286.255126953125, + "logps/rejected": -440.00970458984375, + "loss": 0.313, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.196476697921753, + "rewards/margins": 3.2581849098205566, + "rewards/rejected": -2.0617079734802246, + "step": 790 + }, + { + "epoch": 0.5, + "learning_rate": 5.180690008441202e-08, + "logits/chosen": -3.198859691619873, + "logits/rejected": -3.082568645477295, + "logps/chosen": -274.90521240234375, + "logps/rejected": -562.7221069335938, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0687789916992188, + "rewards/margins": 3.9790711402893066, + "rewards/rejected": -2.910292148590088, + "step": 791 + }, + { + "epoch": 0.5, + "learning_rate": 5.170368984084694e-08, + "logits/chosen": -3.28316068649292, + "logits/rejected": -3.051743268966675, + "logps/chosen": -211.52621459960938, + "logps/rejected": -667.8677368164062, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2497735023498535, + "rewards/margins": 3.9434595108032227, + "rewards/rejected": -2.693686008453369, + "step": 792 + }, + { + "epoch": 0.51, + "learning_rate": 5.1600472329001054e-08, + "logits/chosen": -3.231142520904541, + "logits/rejected": -3.121051549911499, + "logps/chosen": -260.951171875, + "logps/rejected": -471.626220703125, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.939012885093689, + "rewards/margins": 2.7430214881896973, + "rewards/rejected": -1.8040084838867188, + "step": 793 + }, + { + "epoch": 0.51, + "learning_rate": 5.149724798922089e-08, + "logits/chosen": -3.2083563804626465, + "logits/rejected": -3.04780912399292, + "logps/chosen": -260.2342529296875, + "logps/rejected": -698.513916015625, + "loss": 0.2757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3569793701171875, + "rewards/margins": 4.550421237945557, + "rewards/rejected": -3.193441867828369, + "step": 794 + }, + { + "epoch": 0.51, + "learning_rate": 5.139401726188207e-08, + "logits/chosen": -3.1735169887542725, + "logits/rejected": -3.1710638999938965, + "logps/chosen": -240.410888671875, + "logps/rejected": -415.97021484375, + "loss": 0.2928, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1196808815002441, + "rewards/margins": 3.290095567703247, + "rewards/rejected": -2.170414686203003, + "step": 795 + }, + { + "epoch": 0.51, + "learning_rate": 5.129078058738747e-08, + "logits/chosen": -3.218931198120117, + "logits/rejected": -3.1685705184936523, + "logps/chosen": -235.01873779296875, + "logps/rejected": -1088.0062255859375, + "loss": 0.283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1463744640350342, + "rewards/margins": 7.069561958312988, + "rewards/rejected": -5.923187255859375, + "step": 796 + }, + { + "epoch": 0.51, + "learning_rate": 5.1187538406165354e-08, + "logits/chosen": -3.2594029903411865, + "logits/rejected": -3.1306517124176025, + "logps/chosen": -258.90447998046875, + "logps/rejected": -1123.546630859375, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2262626886367798, + "rewards/margins": 5.912817001342773, + "rewards/rejected": -4.686553955078125, + "step": 797 + }, + { + "epoch": 0.51, + "learning_rate": 5.1084291158667435e-08, + "logits/chosen": -3.192535877227783, + "logits/rejected": -3.1589159965515137, + "logps/chosen": -272.2476501464844, + "logps/rejected": -998.7219848632812, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.16510009765625, + "rewards/margins": 5.8424072265625, + "rewards/rejected": -4.67730712890625, + "step": 798 + }, + { + "epoch": 0.51, + "learning_rate": 5.09810392853671e-08, + "logits/chosen": -3.2492215633392334, + "logits/rejected": -3.1399660110473633, + "logps/chosen": -212.95529174804688, + "logps/rejected": -801.6835327148438, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.185193657875061, + "rewards/margins": 5.780241012573242, + "rewards/rejected": -4.5950469970703125, + "step": 799 + }, + { + "epoch": 0.51, + "learning_rate": 5.0877783226757444e-08, + "logits/chosen": -3.239391326904297, + "logits/rejected": -3.0579357147216797, + "logps/chosen": -262.12646484375, + "logps/rejected": -505.6028747558594, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.185810923576355, + "rewards/margins": 3.285832166671753, + "rewards/rejected": -2.1000213623046875, + "step": 800 + }, + { + "epoch": 0.51, + "learning_rate": 5.077452342334938e-08, + "logits/chosen": -3.2034072875976562, + "logits/rejected": -3.1162986755371094, + "logps/chosen": -233.5153350830078, + "logps/rejected": -3960.724365234375, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.043330430984497, + "rewards/margins": 7.194941997528076, + "rewards/rejected": -6.151611328125, + "step": 801 + }, + { + "epoch": 0.51, + "learning_rate": 5.067126031566987e-08, + "logits/chosen": -3.2747957706451416, + "logits/rejected": -3.20676326751709, + "logps/chosen": -283.0986328125, + "logps/rejected": -505.9333801269531, + "loss": 0.3254, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2771728038787842, + "rewards/margins": 3.837628126144409, + "rewards/rejected": -2.560455322265625, + "step": 802 + }, + { + "epoch": 0.51, + "learning_rate": 5.056799434425992e-08, + "logits/chosen": -3.270975112915039, + "logits/rejected": -3.07293701171875, + "logps/chosen": -255.01614379882812, + "logps/rejected": -669.941162109375, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4092261791229248, + "rewards/margins": 4.027832984924316, + "rewards/rejected": -2.6186065673828125, + "step": 803 + }, + { + "epoch": 0.51, + "learning_rate": 5.0464725949672784e-08, + "logits/chosen": -3.300999164581299, + "logits/rejected": -3.169215679168701, + "logps/chosen": -289.02301025390625, + "logps/rejected": -583.187255859375, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2712433338165283, + "rewards/margins": 4.7860918045043945, + "rewards/rejected": -3.514848232269287, + "step": 804 + }, + { + "epoch": 0.51, + "learning_rate": 5.036145557247199e-08, + "logits/chosen": -3.2808103561401367, + "logits/rejected": -3.259978771209717, + "logps/chosen": -264.6490478515625, + "logps/rejected": -710.324951171875, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1878143548965454, + "rewards/margins": 5.5112152099609375, + "rewards/rejected": -4.323400974273682, + "step": 805 + }, + { + "epoch": 0.51, + "learning_rate": 5.025818365322963e-08, + "logits/chosen": -3.2392969131469727, + "logits/rejected": -3.0371806621551514, + "logps/chosen": -251.6566162109375, + "logps/rejected": -1001.6617431640625, + "loss": 0.3016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5367317199707031, + "rewards/margins": 5.6471686363220215, + "rewards/rejected": -4.110436916351318, + "step": 806 + }, + { + "epoch": 0.51, + "learning_rate": 5.0154910632524294e-08, + "logits/chosen": -3.2390923500061035, + "logits/rejected": -3.205148220062256, + "logps/chosen": -250.29029846191406, + "logps/rejected": -1025.278076171875, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2600730657577515, + "rewards/margins": 6.205532073974609, + "rewards/rejected": -4.945458889007568, + "step": 807 + }, + { + "epoch": 0.51, + "learning_rate": 5.0051636950939267e-08, + "logits/chosen": -3.246303081512451, + "logits/rejected": -3.16975998878479, + "logps/chosen": -243.78497314453125, + "logps/rejected": -458.42938232421875, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9027794003486633, + "rewards/margins": 3.1650307178497314, + "rewards/rejected": -2.262251377105713, + "step": 808 + }, + { + "epoch": 0.52, + "learning_rate": 4.994836304906073e-08, + "logits/chosen": -3.1790771484375, + "logits/rejected": -3.0400071144104004, + "logps/chosen": -264.4449462890625, + "logps/rejected": -910.6681518554688, + "loss": 0.2964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5527825355529785, + "rewards/margins": 5.079875946044922, + "rewards/rejected": -3.5270934104919434, + "step": 809 + }, + { + "epoch": 0.52, + "learning_rate": 4.9845089367475714e-08, + "logits/chosen": -3.200263500213623, + "logits/rejected": -3.0307414531707764, + "logps/chosen": -304.0167236328125, + "logps/rejected": -415.6505126953125, + "loss": 0.3445, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.291243076324463, + "rewards/margins": 2.920931339263916, + "rewards/rejected": -1.6296882629394531, + "step": 810 + }, + { + "epoch": 0.52, + "learning_rate": 4.974181634677036e-08, + "logits/chosen": -3.203570604324341, + "logits/rejected": -2.995429754257202, + "logps/chosen": -246.8053436279297, + "logps/rejected": -1360.123046875, + "loss": 0.2774, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1997802257537842, + "rewards/margins": 6.519525527954102, + "rewards/rejected": -5.319745063781738, + "step": 811 + }, + { + "epoch": 0.52, + "learning_rate": 4.963854442752801e-08, + "logits/chosen": -3.3154382705688477, + "logits/rejected": -3.1455299854278564, + "logps/chosen": -225.182373046875, + "logps/rejected": -685.1643676757812, + "loss": 0.2928, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4132027626037598, + "rewards/margins": 4.986774444580078, + "rewards/rejected": -3.5735719203948975, + "step": 812 + }, + { + "epoch": 0.52, + "learning_rate": 4.9535274050327225e-08, + "logits/chosen": -3.2573671340942383, + "logits/rejected": -3.184609889984131, + "logps/chosen": -263.50701904296875, + "logps/rejected": -450.92547607421875, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9016640186309814, + "rewards/margins": 3.4609274864196777, + "rewards/rejected": -2.5592637062072754, + "step": 813 + }, + { + "epoch": 0.52, + "learning_rate": 4.9432005655740066e-08, + "logits/chosen": -3.202157974243164, + "logits/rejected": -3.038264751434326, + "logps/chosen": -261.002197265625, + "logps/rejected": -912.7891845703125, + "loss": 0.3021, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1235946416854858, + "rewards/margins": 4.29228401184082, + "rewards/rejected": -3.168689250946045, + "step": 814 + }, + { + "epoch": 0.52, + "learning_rate": 4.932873968433013e-08, + "logits/chosen": -3.287548303604126, + "logits/rejected": -3.228242874145508, + "logps/chosen": -259.6133728027344, + "logps/rejected": -648.8674926757812, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2268822193145752, + "rewards/margins": 4.93757438659668, + "rewards/rejected": -3.7106919288635254, + "step": 815 + }, + { + "epoch": 0.52, + "learning_rate": 4.922547657665061e-08, + "logits/chosen": -3.1871442794799805, + "logits/rejected": -3.121387004852295, + "logps/chosen": -272.25, + "logps/rejected": -662.4222412109375, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2192573547363281, + "rewards/margins": 4.854325294494629, + "rewards/rejected": -3.635067939758301, + "step": 816 + }, + { + "epoch": 0.52, + "learning_rate": 4.912221677324257e-08, + "logits/chosen": -3.2662665843963623, + "logits/rejected": -3.139801025390625, + "logps/chosen": -244.52203369140625, + "logps/rejected": -634.49169921875, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3333160877227783, + "rewards/margins": 5.077539443969727, + "rewards/rejected": -3.744223117828369, + "step": 817 + }, + { + "epoch": 0.52, + "learning_rate": 4.9018960714632894e-08, + "logits/chosen": -3.2577505111694336, + "logits/rejected": -3.1320042610168457, + "logps/chosen": -252.473876953125, + "logps/rejected": -570.4916381835938, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3564879894256592, + "rewards/margins": 3.9922776222229004, + "rewards/rejected": -2.635789394378662, + "step": 818 + }, + { + "epoch": 0.52, + "learning_rate": 4.8915708841332553e-08, + "logits/chosen": -3.263702154159546, + "logits/rejected": -3.0309033393859863, + "logps/chosen": -295.2715759277344, + "logps/rejected": -1984.279296875, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3997894525527954, + "rewards/margins": 9.4775972366333, + "rewards/rejected": -8.077807426452637, + "step": 819 + }, + { + "epoch": 0.52, + "learning_rate": 4.881246159383466e-08, + "logits/chosen": -3.2567195892333984, + "logits/rejected": -3.0917093753814697, + "logps/chosen": -290.201171875, + "logps/rejected": -612.7618408203125, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4179763793945312, + "rewards/margins": 4.5215559005737305, + "rewards/rejected": -3.1035797595977783, + "step": 820 + }, + { + "epoch": 0.52, + "learning_rate": 4.870921941261252e-08, + "logits/chosen": -3.2142910957336426, + "logits/rejected": -3.1209523677825928, + "logps/chosen": -283.753173828125, + "logps/rejected": -404.7484130859375, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2876709699630737, + "rewards/margins": 2.9434738159179688, + "rewards/rejected": -1.6558029651641846, + "step": 821 + }, + { + "epoch": 0.52, + "learning_rate": 4.8605982738117917e-08, + "logits/chosen": -3.2573471069335938, + "logits/rejected": -3.1131625175476074, + "logps/chosen": -232.778076171875, + "logps/rejected": -641.11376953125, + "loss": 0.2854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.187208652496338, + "rewards/margins": 4.383119583129883, + "rewards/rejected": -3.195910692214966, + "step": 822 + }, + { + "epoch": 0.52, + "learning_rate": 4.850275201077911e-08, + "logits/chosen": -3.282968282699585, + "logits/rejected": -3.1894068717956543, + "logps/chosen": -230.90762329101562, + "logps/rejected": -339.5931091308594, + "loss": 0.2727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1816726922988892, + "rewards/margins": 3.268653154373169, + "rewards/rejected": -2.0869803428649902, + "step": 823 + }, + { + "epoch": 0.53, + "learning_rate": 4.839952767099894e-08, + "logits/chosen": -3.27779483795166, + "logits/rejected": -3.1535048484802246, + "logps/chosen": -221.10006713867188, + "logps/rejected": -352.9542541503906, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3939118385314941, + "rewards/margins": 2.877401828765869, + "rewards/rejected": -1.483489990234375, + "step": 824 + }, + { + "epoch": 0.53, + "learning_rate": 4.829631015915306e-08, + "logits/chosen": -3.2411227226257324, + "logits/rejected": -3.195073366165161, + "logps/chosen": -237.69107055664062, + "logps/rejected": -688.3804931640625, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1400253772735596, + "rewards/margins": 4.947340488433838, + "rewards/rejected": -3.8073153495788574, + "step": 825 + }, + { + "epoch": 0.53, + "learning_rate": 4.819309991558798e-08, + "logits/chosen": -3.2450783252716064, + "logits/rejected": -3.1764976978302, + "logps/chosen": -245.75103759765625, + "logps/rejected": -404.04345703125, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2743781805038452, + "rewards/margins": 3.5963265895843506, + "rewards/rejected": -2.321948289871216, + "step": 826 + }, + { + "epoch": 0.53, + "learning_rate": 4.808989738061916e-08, + "logits/chosen": -3.1948657035827637, + "logits/rejected": -3.1382689476013184, + "logps/chosen": -284.936279296875, + "logps/rejected": -338.2620849609375, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4329605102539062, + "rewards/margins": 3.319338321685791, + "rewards/rejected": -1.8863776922225952, + "step": 827 + }, + { + "epoch": 0.53, + "learning_rate": 4.798670299452925e-08, + "logits/chosen": -3.1967854499816895, + "logits/rejected": -3.095245838165283, + "logps/chosen": -288.4198913574219, + "logps/rejected": -507.6197509765625, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2872428894042969, + "rewards/margins": 3.5025887489318848, + "rewards/rejected": -2.215345859527588, + "step": 828 + }, + { + "epoch": 0.53, + "learning_rate": 4.788351719756605e-08, + "logits/chosen": -3.267775058746338, + "logits/rejected": -3.01686429977417, + "logps/chosen": -245.81744384765625, + "logps/rejected": -1424.912109375, + "loss": 0.2923, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3708114624023438, + "rewards/margins": 6.785459995269775, + "rewards/rejected": -5.414648532867432, + "step": 829 + }, + { + "epoch": 0.53, + "learning_rate": 4.7780340429940766e-08, + "logits/chosen": -3.2186036109924316, + "logits/rejected": -2.999908685684204, + "logps/chosen": -240.61355590820312, + "logps/rejected": -380.8897705078125, + "loss": 0.2883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4082214832305908, + "rewards/margins": 3.197873115539551, + "rewards/rejected": -1.7896515130996704, + "step": 830 + }, + { + "epoch": 0.53, + "learning_rate": 4.7677173131826096e-08, + "logits/chosen": -3.2884531021118164, + "logits/rejected": -3.0131375789642334, + "logps/chosen": -248.48373413085938, + "logps/rejected": -1441.71044921875, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4562957286834717, + "rewards/margins": 7.775143623352051, + "rewards/rejected": -6.31884765625, + "step": 831 + }, + { + "epoch": 0.53, + "learning_rate": 4.757401574335431e-08, + "logits/chosen": -3.2686398029327393, + "logits/rejected": -3.18149995803833, + "logps/chosen": -233.93572998046875, + "logps/rejected": -400.8757019042969, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2194747924804688, + "rewards/margins": 3.2771682739257812, + "rewards/rejected": -2.0576934814453125, + "step": 832 + }, + { + "epoch": 0.53, + "learning_rate": 4.747086870461539e-08, + "logits/chosen": -3.2080485820770264, + "logits/rejected": -3.1643178462982178, + "logps/chosen": -257.528076171875, + "logps/rejected": -563.3797607421875, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6034218072891235, + "rewards/margins": 4.508219242095947, + "rewards/rejected": -2.904797315597534, + "step": 833 + }, + { + "epoch": 0.53, + "learning_rate": 4.7367732455655205e-08, + "logits/chosen": -3.238051652908325, + "logits/rejected": -3.127446174621582, + "logps/chosen": -295.4306640625, + "logps/rejected": -469.76556396484375, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4526488780975342, + "rewards/margins": 3.954087734222412, + "rewards/rejected": -2.501438856124878, + "step": 834 + }, + { + "epoch": 0.53, + "learning_rate": 4.7264607436473554e-08, + "logits/chosen": -3.217329740524292, + "logits/rejected": -3.1086068153381348, + "logps/chosen": -237.8825225830078, + "logps/rejected": -1521.953369140625, + "loss": 0.2615, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.363562822341919, + "rewards/margins": 8.099576950073242, + "rewards/rejected": -6.736013412475586, + "step": 835 + }, + { + "epoch": 0.53, + "learning_rate": 4.716149408702235e-08, + "logits/chosen": -3.210029363632202, + "logits/rejected": -3.134248733520508, + "logps/chosen": -306.6370849609375, + "logps/rejected": -395.7296142578125, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2903343439102173, + "rewards/margins": 3.530254364013672, + "rewards/rejected": -2.239920139312744, + "step": 836 + }, + { + "epoch": 0.53, + "learning_rate": 4.705839284720375e-08, + "logits/chosen": -3.2597451210021973, + "logits/rejected": -3.1944315433502197, + "logps/chosen": -259.3458251953125, + "logps/rejected": -1343.5897216796875, + "loss": 0.288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3668197393417358, + "rewards/margins": 7.982505798339844, + "rewards/rejected": -6.615686416625977, + "step": 837 + }, + { + "epoch": 0.53, + "learning_rate": 4.695530415686816e-08, + "logits/chosen": -3.24227237701416, + "logits/rejected": -3.1721906661987305, + "logps/chosen": -272.8883361816406, + "logps/rejected": -841.92236328125, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2420135736465454, + "rewards/margins": 5.949243545532227, + "rewards/rejected": -4.7072296142578125, + "step": 838 + }, + { + "epoch": 0.53, + "learning_rate": 4.685222845581254e-08, + "logits/chosen": -3.2869458198547363, + "logits/rejected": -3.1406023502349854, + "logps/chosen": -273.2125244140625, + "logps/rejected": -480.1291809082031, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.569453477859497, + "rewards/margins": 3.877333164215088, + "rewards/rejected": -2.307879686355591, + "step": 839 + }, + { + "epoch": 0.54, + "learning_rate": 4.674916618377837e-08, + "logits/chosen": -3.2490243911743164, + "logits/rejected": -3.1376850605010986, + "logps/chosen": -253.1889190673828, + "logps/rejected": -1693.19384765625, + "loss": 0.2769, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.359065294265747, + "rewards/margins": 8.261977195739746, + "rewards/rejected": -6.90291166305542, + "step": 840 + }, + { + "epoch": 0.54, + "learning_rate": 4.664611778044987e-08, + "logits/chosen": -3.196967124938965, + "logits/rejected": -3.046222686767578, + "logps/chosen": -305.357666015625, + "logps/rejected": -491.4900817871094, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.078588843345642, + "rewards/margins": 4.116742134094238, + "rewards/rejected": -3.0381531715393066, + "step": 841 + }, + { + "epoch": 0.54, + "learning_rate": 4.654308368545213e-08, + "logits/chosen": -3.2067956924438477, + "logits/rejected": -3.1426138877868652, + "logps/chosen": -229.02627563476562, + "logps/rejected": -876.2607421875, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.175537109375, + "rewards/margins": 5.881317138671875, + "rewards/rejected": -4.705780029296875, + "step": 842 + }, + { + "epoch": 0.54, + "learning_rate": 4.6440064338349096e-08, + "logits/chosen": -3.2879374027252197, + "logits/rejected": -3.1504859924316406, + "logps/chosen": -267.1827392578125, + "logps/rejected": -652.8941040039062, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3282501697540283, + "rewards/margins": 4.8092193603515625, + "rewards/rejected": -3.4809694290161133, + "step": 843 + }, + { + "epoch": 0.54, + "learning_rate": 4.633706017864189e-08, + "logits/chosen": -3.2042133808135986, + "logits/rejected": -3.0948948860168457, + "logps/chosen": -227.5879669189453, + "logps/rejected": -584.9437866210938, + "loss": 0.2792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2219696044921875, + "rewards/margins": 5.116336822509766, + "rewards/rejected": -3.8943674564361572, + "step": 844 + }, + { + "epoch": 0.54, + "learning_rate": 4.623407164576682e-08, + "logits/chosen": -3.231184959411621, + "logits/rejected": -3.0499792098999023, + "logps/chosen": -265.0291748046875, + "logps/rejected": -335.7197265625, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3906784057617188, + "rewards/margins": 2.9581117630004883, + "rewards/rejected": -1.5674331188201904, + "step": 845 + }, + { + "epoch": 0.54, + "learning_rate": 4.613109917909349e-08, + "logits/chosen": -3.296175479888916, + "logits/rejected": -3.08642840385437, + "logps/chosen": -235.01376342773438, + "logps/rejected": -458.86566162109375, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.240930199623108, + "rewards/margins": 3.6996307373046875, + "rewards/rejected": -2.458700656890869, + "step": 846 + }, + { + "epoch": 0.54, + "learning_rate": 4.602814321792299e-08, + "logits/chosen": -3.290743827819824, + "logits/rejected": -3.1005444526672363, + "logps/chosen": -257.08892822265625, + "logps/rejected": -722.0699462890625, + "loss": 0.2721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1920212507247925, + "rewards/margins": 4.9208269119262695, + "rewards/rejected": -3.7288055419921875, + "step": 847 + }, + { + "epoch": 0.54, + "learning_rate": 4.5925204201486025e-08, + "logits/chosen": -3.279010057449341, + "logits/rejected": -3.1454238891601562, + "logps/chosen": -248.76194763183594, + "logps/rejected": -996.741943359375, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6814301013946533, + "rewards/margins": 6.710812568664551, + "rewards/rejected": -5.029382228851318, + "step": 848 + }, + { + "epoch": 0.54, + "learning_rate": 4.582228256894093e-08, + "logits/chosen": -3.217235565185547, + "logits/rejected": -3.1037752628326416, + "logps/chosen": -267.3941650390625, + "logps/rejected": -715.4697265625, + "loss": 0.2616, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2198090553283691, + "rewards/margins": 5.05987548828125, + "rewards/rejected": -3.84006667137146, + "step": 849 + }, + { + "epoch": 0.54, + "learning_rate": 4.571937875937198e-08, + "logits/chosen": -3.208808422088623, + "logits/rejected": -3.170489549636841, + "logps/chosen": -260.45770263671875, + "logps/rejected": -346.26495361328125, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.179559350013733, + "rewards/margins": 3.142259120941162, + "rewards/rejected": -1.9626998901367188, + "step": 850 + }, + { + "epoch": 0.54, + "learning_rate": 4.56164932117873e-08, + "logits/chosen": -3.272657871246338, + "logits/rejected": -3.1863584518432617, + "logps/chosen": -248.49295043945312, + "logps/rejected": -775.4620361328125, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3241372108459473, + "rewards/margins": 5.044000625610352, + "rewards/rejected": -3.7198638916015625, + "step": 851 + }, + { + "epoch": 0.54, + "learning_rate": 4.55136263651172e-08, + "logits/chosen": -3.2746686935424805, + "logits/rejected": -3.1292059421539307, + "logps/chosen": -287.2186279296875, + "logps/rejected": -417.4698486328125, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3674569129943848, + "rewards/margins": 3.5494284629821777, + "rewards/rejected": -2.181971788406372, + "step": 852 + }, + { + "epoch": 0.54, + "learning_rate": 4.541077865821218e-08, + "logits/chosen": -3.2939276695251465, + "logits/rejected": -3.260232925415039, + "logps/chosen": -231.46417236328125, + "logps/rejected": -578.69970703125, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1829407215118408, + "rewards/margins": 4.700286865234375, + "rewards/rejected": -3.5173463821411133, + "step": 853 + }, + { + "epoch": 0.54, + "learning_rate": 4.530795052984104e-08, + "logits/chosen": -3.297544002532959, + "logits/rejected": -3.1297430992126465, + "logps/chosen": -255.80384826660156, + "logps/rejected": -1197.95849609375, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.306978702545166, + "rewards/margins": 6.9374260902404785, + "rewards/rejected": -5.6304473876953125, + "step": 854 + }, + { + "epoch": 0.54, + "learning_rate": 4.520514241868912e-08, + "logits/chosen": -3.2780659198760986, + "logits/rejected": -3.202423572540283, + "logps/chosen": -243.92520141601562, + "logps/rejected": -731.151123046875, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.370903730392456, + "rewards/margins": 5.38265323638916, + "rewards/rejected": -4.011749267578125, + "step": 855 + }, + { + "epoch": 0.55, + "learning_rate": 4.510235476335633e-08, + "logits/chosen": -3.2667646408081055, + "logits/rejected": -3.20778489112854, + "logps/chosen": -288.4941711425781, + "logps/rejected": -620.5662231445312, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3632781505584717, + "rewards/margins": 4.803262710571289, + "rewards/rejected": -3.4399843215942383, + "step": 856 + }, + { + "epoch": 0.55, + "learning_rate": 4.4999588002355314e-08, + "logits/chosen": -3.2520556449890137, + "logits/rejected": -3.224618911743164, + "logps/chosen": -276.549560546875, + "logps/rejected": -636.802734375, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0727195739746094, + "rewards/margins": 5.010699272155762, + "rewards/rejected": -3.937979221343994, + "step": 857 + }, + { + "epoch": 0.55, + "learning_rate": 4.489684257410958e-08, + "logits/chosen": -3.203165054321289, + "logits/rejected": -3.2023873329162598, + "logps/chosen": -270.4219665527344, + "logps/rejected": -777.0526733398438, + "loss": 0.2854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5760666131973267, + "rewards/margins": 5.573982238769531, + "rewards/rejected": -3.997915744781494, + "step": 858 + }, + { + "epoch": 0.55, + "learning_rate": 4.479411891695165e-08, + "logits/chosen": -3.317882537841797, + "logits/rejected": -3.219918966293335, + "logps/chosen": -227.08433532714844, + "logps/rejected": -509.49224853515625, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3117188215255737, + "rewards/margins": 4.1408185958862305, + "rewards/rejected": -2.8291001319885254, + "step": 859 + }, + { + "epoch": 0.55, + "learning_rate": 4.469141746912108e-08, + "logits/chosen": -3.1558189392089844, + "logits/rejected": -3.082869529724121, + "logps/chosen": -257.0174560546875, + "logps/rejected": -845.84716796875, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.102410078048706, + "rewards/margins": 5.491677284240723, + "rewards/rejected": -4.3892669677734375, + "step": 860 + }, + { + "epoch": 0.55, + "learning_rate": 4.458873866876281e-08, + "logits/chosen": -3.2388358116149902, + "logits/rejected": -3.142777919769287, + "logps/chosen": -263.8978271484375, + "logps/rejected": -451.0018615722656, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3428771495819092, + "rewards/margins": 3.479605197906494, + "rewards/rejected": -2.136727809906006, + "step": 861 + }, + { + "epoch": 0.55, + "learning_rate": 4.448608295392503e-08, + "logits/chosen": -3.193924903869629, + "logits/rejected": -3.077503204345703, + "logps/chosen": -239.30848693847656, + "logps/rejected": -998.73388671875, + "loss": 0.2722, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3351974487304688, + "rewards/margins": 5.655082702636719, + "rewards/rejected": -4.31988525390625, + "step": 862 + }, + { + "epoch": 0.55, + "learning_rate": 4.438345076255753e-08, + "logits/chosen": -3.2081851959228516, + "logits/rejected": -3.1279139518737793, + "logps/chosen": -254.03225708007812, + "logps/rejected": -812.857421875, + "loss": 0.2753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.129387617111206, + "rewards/margins": 5.276125431060791, + "rewards/rejected": -4.146737575531006, + "step": 863 + }, + { + "epoch": 0.55, + "learning_rate": 4.4280842532509716e-08, + "logits/chosen": -3.216433048248291, + "logits/rejected": -3.186192512512207, + "logps/chosen": -256.02532958984375, + "logps/rejected": -854.1292114257812, + "loss": 0.2874, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2643638849258423, + "rewards/margins": 5.59835147857666, + "rewards/rejected": -4.333987236022949, + "step": 864 + }, + { + "epoch": 0.55, + "learning_rate": 4.417825870152875e-08, + "logits/chosen": -3.1628599166870117, + "logits/rejected": -3.214664936065674, + "logps/chosen": -231.276123046875, + "logps/rejected": -532.2940063476562, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.276228427886963, + "rewards/margins": 3.916995048522949, + "rewards/rejected": -2.6407668590545654, + "step": 865 + }, + { + "epoch": 0.55, + "learning_rate": 4.407569970725775e-08, + "logits/chosen": -3.2327451705932617, + "logits/rejected": -3.079179286956787, + "logps/chosen": -254.5703582763672, + "logps/rejected": -471.1910400390625, + "loss": 0.2855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1627212762832642, + "rewards/margins": 3.4950852394104004, + "rewards/rejected": -2.3323638439178467, + "step": 866 + }, + { + "epoch": 0.55, + "learning_rate": 4.397316598723385e-08, + "logits/chosen": -3.2268173694610596, + "logits/rejected": -3.0820670127868652, + "logps/chosen": -247.58387756347656, + "logps/rejected": -979.3521118164062, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.025611162185669, + "rewards/margins": 5.344629764556885, + "rewards/rejected": -4.319018363952637, + "step": 867 + }, + { + "epoch": 0.55, + "learning_rate": 4.387065797888633e-08, + "logits/chosen": -3.273664712905884, + "logits/rejected": -3.188760280609131, + "logps/chosen": -260.932861328125, + "logps/rejected": -642.7342529296875, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1230751276016235, + "rewards/margins": 4.268253326416016, + "rewards/rejected": -3.1451783180236816, + "step": 868 + }, + { + "epoch": 0.55, + "learning_rate": 4.376817611953481e-08, + "logits/chosen": -3.28859281539917, + "logits/rejected": -3.0824553966522217, + "logps/chosen": -229.80560302734375, + "logps/rejected": -906.6924438476562, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2645447254180908, + "rewards/margins": 5.2753376960754395, + "rewards/rejected": -4.0107927322387695, + "step": 869 + }, + { + "epoch": 0.55, + "learning_rate": 4.36657208463874e-08, + "logits/chosen": -3.2617878913879395, + "logits/rejected": -2.9813945293426514, + "logps/chosen": -280.6742248535156, + "logps/rejected": -463.95037841796875, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.522840142250061, + "rewards/margins": 3.5850350856781006, + "rewards/rejected": -2.06219482421875, + "step": 870 + }, + { + "epoch": 0.56, + "learning_rate": 4.356329259653866e-08, + "logits/chosen": -3.241844415664673, + "logits/rejected": -3.2164831161499023, + "logps/chosen": -253.18707275390625, + "logps/rejected": -875.3026733398438, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2814216613769531, + "rewards/margins": 6.639423370361328, + "rewards/rejected": -5.358001708984375, + "step": 871 + }, + { + "epoch": 0.56, + "learning_rate": 4.3460891806968e-08, + "logits/chosen": -3.225637912750244, + "logits/rejected": -3.1623995304107666, + "logps/chosen": -275.46270751953125, + "logps/rejected": -713.6419677734375, + "loss": 0.2781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0910110473632812, + "rewards/margins": 4.995048522949219, + "rewards/rejected": -3.9040374755859375, + "step": 872 + }, + { + "epoch": 0.56, + "learning_rate": 4.3358518914537585e-08, + "logits/chosen": -3.235560894012451, + "logits/rejected": -3.214782476425171, + "logps/chosen": -272.440673828125, + "logps/rejected": -1316.48046875, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3972450494766235, + "rewards/margins": 8.304319381713867, + "rewards/rejected": -6.907073974609375, + "step": 873 + }, + { + "epoch": 0.56, + "learning_rate": 4.325617435599061e-08, + "logits/chosen": -3.255769729614258, + "logits/rejected": -2.9270105361938477, + "logps/chosen": -245.94680786132812, + "logps/rejected": -1361.9083251953125, + "loss": 0.284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1300445795059204, + "rewards/margins": 6.414621353149414, + "rewards/rejected": -5.284576416015625, + "step": 874 + }, + { + "epoch": 0.56, + "learning_rate": 4.3153858567949407e-08, + "logits/chosen": -3.2599895000457764, + "logits/rejected": -3.103327751159668, + "logps/chosen": -281.0069580078125, + "logps/rejected": -649.1396484375, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.257318139076233, + "rewards/margins": 4.880300998687744, + "rewards/rejected": -3.622982978820801, + "step": 875 + }, + { + "epoch": 0.56, + "learning_rate": 4.30515719869135e-08, + "logits/chosen": -3.2577550411224365, + "logits/rejected": -3.133408546447754, + "logps/chosen": -266.1474914550781, + "logps/rejected": -563.37158203125, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0668320655822754, + "rewards/margins": 4.1830339431762695, + "rewards/rejected": -3.116201877593994, + "step": 876 + }, + { + "epoch": 0.56, + "learning_rate": 4.294931504925787e-08, + "logits/chosen": -3.265195608139038, + "logits/rejected": -3.121643543243408, + "logps/chosen": -264.1125183105469, + "logps/rejected": -792.638916015625, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1903434991836548, + "rewards/margins": 4.724602699279785, + "rewards/rejected": -3.534259080886841, + "step": 877 + }, + { + "epoch": 0.56, + "learning_rate": 4.284708819123104e-08, + "logits/chosen": -3.217766046524048, + "logits/rejected": -3.1175954341888428, + "logps/chosen": -270.53631591796875, + "logps/rejected": -580.743408203125, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2635154724121094, + "rewards/margins": 4.23304557800293, + "rewards/rejected": -2.969529867172241, + "step": 878 + }, + { + "epoch": 0.56, + "learning_rate": 4.274489184895315e-08, + "logits/chosen": -3.226287841796875, + "logits/rejected": -3.1964638233184814, + "logps/chosen": -255.2384796142578, + "logps/rejected": -621.531005859375, + "loss": 0.2753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4484268426895142, + "rewards/margins": 4.778552532196045, + "rewards/rejected": -3.330125570297241, + "step": 879 + }, + { + "epoch": 0.56, + "learning_rate": 4.264272645841419e-08, + "logits/chosen": -3.2709290981292725, + "logits/rejected": -3.165365695953369, + "logps/chosen": -260.419921875, + "logps/rejected": -939.796630859375, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1838524341583252, + "rewards/margins": 6.4564995765686035, + "rewards/rejected": -5.272646903991699, + "step": 880 + }, + { + "epoch": 0.56, + "learning_rate": 4.254059245547212e-08, + "logits/chosen": -3.272313117980957, + "logits/rejected": -3.084449291229248, + "logps/chosen": -265.2523193359375, + "logps/rejected": -954.2078247070312, + "loss": 0.281, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.03564453125, + "rewards/margins": 5.685190200805664, + "rewards/rejected": -4.649545669555664, + "step": 881 + }, + { + "epoch": 0.56, + "learning_rate": 4.243849027585096e-08, + "logits/chosen": -3.194695472717285, + "logits/rejected": -3.0442442893981934, + "logps/chosen": -256.0579833984375, + "logps/rejected": -846.6865234375, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.295331597328186, + "rewards/margins": 5.508072853088379, + "rewards/rejected": -4.212740898132324, + "step": 882 + }, + { + "epoch": 0.56, + "learning_rate": 4.2336420355139e-08, + "logits/chosen": -3.228877067565918, + "logits/rejected": -3.09515643119812, + "logps/chosen": -209.27877807617188, + "logps/rejected": -514.7904663085938, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4278571605682373, + "rewards/margins": 4.042006492614746, + "rewards/rejected": -2.614149570465088, + "step": 883 + }, + { + "epoch": 0.56, + "learning_rate": 4.223438312878685e-08, + "logits/chosen": -3.170905828475952, + "logits/rejected": -3.141228675842285, + "logps/chosen": -253.1133575439453, + "logps/rejected": -613.6198120117188, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.159101963043213, + "rewards/margins": 4.077606201171875, + "rewards/rejected": -2.918504238128662, + "step": 884 + }, + { + "epoch": 0.56, + "learning_rate": 4.2132379032105693e-08, + "logits/chosen": -3.2628884315490723, + "logits/rejected": -3.0020885467529297, + "logps/chosen": -249.82664489746094, + "logps/rejected": -1987.088134765625, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1948761940002441, + "rewards/margins": 8.218832015991211, + "rewards/rejected": -7.023956298828125, + "step": 885 + }, + { + "epoch": 0.56, + "learning_rate": 4.203040850026537e-08, + "logits/chosen": -3.299010753631592, + "logits/rejected": -3.159254312515259, + "logps/chosen": -234.16836547851562, + "logps/rejected": -1034.166259765625, + "loss": 0.2452, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5132942199707031, + "rewards/margins": 7.895069122314453, + "rewards/rejected": -6.38177490234375, + "step": 886 + }, + { + "epoch": 0.57, + "learning_rate": 4.192847196829251e-08, + "logits/chosen": -3.2852113246917725, + "logits/rejected": -3.1200037002563477, + "logps/chosen": -246.24093627929688, + "logps/rejected": -463.65576171875, + "loss": 0.2918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3675506114959717, + "rewards/margins": 4.0143327713012695, + "rewards/rejected": -2.646782159805298, + "step": 887 + }, + { + "epoch": 0.57, + "learning_rate": 4.1826569871068685e-08, + "logits/chosen": -3.1904635429382324, + "logits/rejected": -3.1452736854553223, + "logps/chosen": -262.7826232910156, + "logps/rejected": -1014.885009765625, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2047592401504517, + "rewards/margins": 6.171018600463867, + "rewards/rejected": -4.966259956359863, + "step": 888 + }, + { + "epoch": 0.57, + "learning_rate": 4.17247026433286e-08, + "logits/chosen": -3.224358081817627, + "logits/rejected": -3.176666498184204, + "logps/chosen": -276.46820068359375, + "logps/rejected": -529.9740600585938, + "loss": 0.2721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6583099365234375, + "rewards/margins": 4.373715400695801, + "rewards/rejected": -2.7154054641723633, + "step": 889 + }, + { + "epoch": 0.57, + "learning_rate": 4.1622870719658144e-08, + "logits/chosen": -3.325345516204834, + "logits/rejected": -3.1590113639831543, + "logps/chosen": -257.8096618652344, + "logps/rejected": -639.43212890625, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2272636890411377, + "rewards/margins": 4.811417579650879, + "rewards/rejected": -3.584153652191162, + "step": 890 + }, + { + "epoch": 0.57, + "learning_rate": 4.152107453449263e-08, + "logits/chosen": -3.2262890338897705, + "logits/rejected": -3.1527819633483887, + "logps/chosen": -289.965576171875, + "logps/rejected": -385.3173828125, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4476333856582642, + "rewards/margins": 3.834864854812622, + "rewards/rejected": -2.3872315883636475, + "step": 891 + }, + { + "epoch": 0.57, + "learning_rate": 4.1419314522114916e-08, + "logits/chosen": -3.289612293243408, + "logits/rejected": -3.098963737487793, + "logps/chosen": -243.5721893310547, + "logps/rejected": -437.5738525390625, + "loss": 0.2903, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3567291498184204, + "rewards/margins": 3.75876784324646, + "rewards/rejected": -2.40203857421875, + "step": 892 + }, + { + "epoch": 0.57, + "learning_rate": 4.131759111665348e-08, + "logits/chosen": -3.213575839996338, + "logits/rejected": -3.1206464767456055, + "logps/chosen": -247.96533203125, + "logps/rejected": -616.1702880859375, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.26390540599823, + "rewards/margins": 5.1081438064575195, + "rewards/rejected": -3.84423828125, + "step": 893 + }, + { + "epoch": 0.57, + "learning_rate": 4.1215904752080705e-08, + "logits/chosen": -3.2797350883483887, + "logits/rejected": -3.150458335876465, + "logps/chosen": -238.24710083007812, + "logps/rejected": -715.559326171875, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.404656171798706, + "rewards/margins": 5.28647518157959, + "rewards/rejected": -3.881819248199463, + "step": 894 + }, + { + "epoch": 0.57, + "learning_rate": 4.111425586221087e-08, + "logits/chosen": -3.1910157203674316, + "logits/rejected": -2.9884448051452637, + "logps/chosen": -226.78265380859375, + "logps/rejected": -1018.447265625, + "loss": 0.281, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3508110046386719, + "rewards/margins": 5.427008628845215, + "rewards/rejected": -4.076197624206543, + "step": 895 + }, + { + "epoch": 0.57, + "learning_rate": 4.1012644880698454e-08, + "logits/chosen": -3.2123897075653076, + "logits/rejected": -3.1962649822235107, + "logps/chosen": -240.9392547607422, + "logps/rejected": -642.5337524414062, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2070465087890625, + "rewards/margins": 4.8479461669921875, + "rewards/rejected": -3.640899658203125, + "step": 896 + }, + { + "epoch": 0.57, + "learning_rate": 4.091107224103619e-08, + "logits/chosen": -3.2032294273376465, + "logits/rejected": -3.108726739883423, + "logps/chosen": -249.60992431640625, + "logps/rejected": -611.4676513671875, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4450149536132812, + "rewards/margins": 4.738293647766113, + "rewards/rejected": -3.293278694152832, + "step": 897 + }, + { + "epoch": 0.57, + "learning_rate": 4.080953837655317e-08, + "logits/chosen": -3.237184762954712, + "logits/rejected": -3.0855934619903564, + "logps/chosen": -240.70468139648438, + "logps/rejected": -417.35748291015625, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5159454345703125, + "rewards/margins": 3.485361099243164, + "rewards/rejected": -1.9694154262542725, + "step": 898 + }, + { + "epoch": 0.57, + "learning_rate": 4.0708043720413155e-08, + "logits/chosen": -3.2501983642578125, + "logits/rejected": -3.173161029815674, + "logps/chosen": -233.0078582763672, + "logps/rejected": -633.2166137695312, + "loss": 0.261, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2897255420684814, + "rewards/margins": 4.201792240142822, + "rewards/rejected": -2.912066698074341, + "step": 899 + }, + { + "epoch": 0.57, + "learning_rate": 4.060658870561262e-08, + "logits/chosen": -3.2916858196258545, + "logits/rejected": -3.134068727493286, + "logps/chosen": -262.4672546386719, + "logps/rejected": -328.2461853027344, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4616241455078125, + "rewards/margins": 2.9635114669799805, + "rewards/rejected": -1.501887559890747, + "step": 900 + }, + { + "epoch": 0.57, + "learning_rate": 4.050517376497885e-08, + "logits/chosen": -3.166189193725586, + "logits/rejected": -3.0521159172058105, + "logps/chosen": -249.28790283203125, + "logps/rejected": -496.10369873046875, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.405207872390747, + "rewards/margins": 4.289495944976807, + "rewards/rejected": -2.8842880725860596, + "step": 901 + }, + { + "epoch": 0.57, + "learning_rate": 4.0403799331168244e-08, + "logits/chosen": -3.2125022411346436, + "logits/rejected": -3.155515193939209, + "logps/chosen": -246.03216552734375, + "logps/rejected": -651.8116455078125, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4138572216033936, + "rewards/margins": 5.034224510192871, + "rewards/rejected": -3.6203675270080566, + "step": 902 + }, + { + "epoch": 0.58, + "learning_rate": 4.030246583666437e-08, + "logits/chosen": -3.284153938293457, + "logits/rejected": -3.098407745361328, + "logps/chosen": -233.6990966796875, + "logps/rejected": -741.1893310546875, + "loss": 0.2786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3882957696914673, + "rewards/margins": 4.849381446838379, + "rewards/rejected": -3.461085557937622, + "step": 903 + }, + { + "epoch": 0.58, + "learning_rate": 4.0201173713776105e-08, + "logits/chosen": -3.232515811920166, + "logits/rejected": -3.1058526039123535, + "logps/chosen": -252.39801025390625, + "logps/rejected": -542.7147216796875, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3235435485839844, + "rewards/margins": 3.883596181869507, + "rewards/rejected": -2.5600526332855225, + "step": 904 + }, + { + "epoch": 0.58, + "learning_rate": 4.0099923394635906e-08, + "logits/chosen": -3.158115863800049, + "logits/rejected": -3.1114001274108887, + "logps/chosen": -287.732421875, + "logps/rejected": -1552.083984375, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4461731910705566, + "rewards/margins": 7.425676345825195, + "rewards/rejected": -5.979503154754639, + "step": 905 + }, + { + "epoch": 0.58, + "learning_rate": 3.9998715311197785e-08, + "logits/chosen": -3.2606425285339355, + "logits/rejected": -3.132460594177246, + "logps/chosen": -265.72283935546875, + "logps/rejected": -633.5768432617188, + "loss": 0.2836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2468445301055908, + "rewards/margins": 4.414083957672119, + "rewards/rejected": -3.1672394275665283, + "step": 906 + }, + { + "epoch": 0.58, + "learning_rate": 3.989754989523563e-08, + "logits/chosen": -3.2933883666992188, + "logits/rejected": -3.17411208152771, + "logps/chosen": -239.8134765625, + "logps/rejected": -731.6232299804688, + "loss": 0.2657, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3911751508712769, + "rewards/margins": 5.00196647644043, + "rewards/rejected": -3.610790967941284, + "step": 907 + }, + { + "epoch": 0.58, + "learning_rate": 3.9796427578341326e-08, + "logits/chosen": -3.2919983863830566, + "logits/rejected": -3.115795135498047, + "logps/chosen": -286.7726135253906, + "logps/rejected": -397.46728515625, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.197163462638855, + "rewards/margins": 3.2812347412109375, + "rewards/rejected": -2.084071397781372, + "step": 908 + }, + { + "epoch": 0.58, + "learning_rate": 3.969534879192281e-08, + "logits/chosen": -3.1925177574157715, + "logits/rejected": -3.105247735977173, + "logps/chosen": -224.52337646484375, + "logps/rejected": -357.61627197265625, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6049858331680298, + "rewards/margins": 3.389218807220459, + "rewards/rejected": -1.7842330932617188, + "step": 909 + }, + { + "epoch": 0.58, + "learning_rate": 3.959431396720237e-08, + "logits/chosen": -3.2323410511016846, + "logits/rejected": -3.0458178520202637, + "logps/chosen": -286.109375, + "logps/rejected": -1214.836669921875, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2929130792617798, + "rewards/margins": 6.51906681060791, + "rewards/rejected": -5.226153373718262, + "step": 910 + }, + { + "epoch": 0.58, + "learning_rate": 3.949332353521474e-08, + "logits/chosen": -3.2781856060028076, + "logits/rejected": -3.139697313308716, + "logps/chosen": -241.753173828125, + "logps/rejected": -438.2841491699219, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4368736743927002, + "rewards/margins": 3.7385354042053223, + "rewards/rejected": -2.301661729812622, + "step": 911 + }, + { + "epoch": 0.58, + "learning_rate": 3.939237792680522e-08, + "logits/chosen": -3.260305881500244, + "logits/rejected": -3.1861190795898438, + "logps/chosen": -250.22877502441406, + "logps/rejected": -838.1343994140625, + "loss": 0.275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.199853539466858, + "rewards/margins": 5.97430419921875, + "rewards/rejected": -4.774450778961182, + "step": 912 + }, + { + "epoch": 0.58, + "learning_rate": 3.929147757262794e-08, + "logits/chosen": -3.2136788368225098, + "logits/rejected": -3.1328678131103516, + "logps/chosen": -281.1153564453125, + "logps/rejected": -616.76025390625, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.391972303390503, + "rewards/margins": 4.6357011795043945, + "rewards/rejected": -3.2437286376953125, + "step": 913 + }, + { + "epoch": 0.58, + "learning_rate": 3.919062290314395e-08, + "logits/chosen": -3.2360024452209473, + "logits/rejected": -3.0708727836608887, + "logps/chosen": -232.62396240234375, + "logps/rejected": -995.1893310546875, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1766357421875, + "rewards/margins": 5.236918926239014, + "rewards/rejected": -4.060283184051514, + "step": 914 + }, + { + "epoch": 0.58, + "learning_rate": 3.9089814348619374e-08, + "logits/chosen": -3.170652389526367, + "logits/rejected": -3.151066780090332, + "logps/chosen": -269.1226806640625, + "logps/rejected": -559.7852783203125, + "loss": 0.2783, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.281214952468872, + "rewards/margins": 4.704754829406738, + "rewards/rejected": -3.423539876937866, + "step": 915 + }, + { + "epoch": 0.58, + "learning_rate": 3.898905233912365e-08, + "logits/chosen": -3.205157995223999, + "logits/rejected": -3.0467023849487305, + "logps/chosen": -268.7356872558594, + "logps/rejected": -455.22869873046875, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5258057117462158, + "rewards/margins": 3.746164083480835, + "rewards/rejected": -2.220358371734619, + "step": 916 + }, + { + "epoch": 0.58, + "learning_rate": 3.8888337304527564e-08, + "logits/chosen": -3.259908676147461, + "logits/rejected": -3.091580867767334, + "logps/chosen": -266.5911865234375, + "logps/rejected": -496.78460693359375, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4207786321640015, + "rewards/margins": 3.8520045280456543, + "rewards/rejected": -2.4312257766723633, + "step": 917 + }, + { + "epoch": 0.59, + "learning_rate": 3.878766967450158e-08, + "logits/chosen": -3.2639522552490234, + "logits/rejected": -3.144225597381592, + "logps/chosen": -242.2237548828125, + "logps/rejected": -450.0152587890625, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.435235619544983, + "rewards/margins": 3.918499708175659, + "rewards/rejected": -2.483264207839966, + "step": 918 + }, + { + "epoch": 0.59, + "learning_rate": 3.86870498785139e-08, + "logits/chosen": -3.226830005645752, + "logits/rejected": -3.1760761737823486, + "logps/chosen": -240.45440673828125, + "logps/rejected": -603.61669921875, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2311172485351562, + "rewards/margins": 4.2382493019104, + "rewards/rejected": -3.007132053375244, + "step": 919 + }, + { + "epoch": 0.59, + "learning_rate": 3.8586478345828636e-08, + "logits/chosen": -3.312051773071289, + "logits/rejected": -2.998701572418213, + "logps/chosen": -255.1887664794922, + "logps/rejected": -359.8844299316406, + "loss": 0.3016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3937149047851562, + "rewards/margins": 3.116647243499756, + "rewards/rejected": -1.7229324579238892, + "step": 920 + }, + { + "epoch": 0.59, + "learning_rate": 3.8485955505504e-08, + "logits/chosen": -3.2127292156219482, + "logits/rejected": -3.0515542030334473, + "logps/chosen": -237.78460693359375, + "logps/rejected": -981.7273559570312, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.320630669593811, + "rewards/margins": 6.0734782218933105, + "rewards/rejected": -4.752847671508789, + "step": 921 + }, + { + "epoch": 0.59, + "learning_rate": 3.838548178639054e-08, + "logits/chosen": -3.2594223022460938, + "logits/rejected": -3.1560511589050293, + "logps/chosen": -250.4254150390625, + "logps/rejected": -420.3065185546875, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4712257385253906, + "rewards/margins": 4.050362586975098, + "rewards/rejected": -2.579136848449707, + "step": 922 + }, + { + "epoch": 0.59, + "learning_rate": 3.828505761712912e-08, + "logits/chosen": -3.2309963703155518, + "logits/rejected": -3.0699305534362793, + "logps/chosen": -258.3431701660156, + "logps/rejected": -1381.628662109375, + "loss": 0.2766, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4673888683319092, + "rewards/margins": 6.54205322265625, + "rewards/rejected": -5.07466459274292, + "step": 923 + }, + { + "epoch": 0.59, + "learning_rate": 3.818468342614932e-08, + "logits/chosen": -3.2381677627563477, + "logits/rejected": -3.1665823459625244, + "logps/chosen": -249.3985137939453, + "logps/rejected": -472.8653259277344, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3089401721954346, + "rewards/margins": 3.6244492530822754, + "rewards/rejected": -2.315509080886841, + "step": 924 + }, + { + "epoch": 0.59, + "learning_rate": 3.808435964166748e-08, + "logits/chosen": -3.2775216102600098, + "logits/rejected": -3.09277606010437, + "logps/chosen": -285.02740478515625, + "logps/rejected": -464.8793029785156, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5224266052246094, + "rewards/margins": 3.3305397033691406, + "rewards/rejected": -1.8081130981445312, + "step": 925 + }, + { + "epoch": 0.59, + "learning_rate": 3.798408669168483e-08, + "logits/chosen": -3.2823867797851562, + "logits/rejected": -3.088627815246582, + "logps/chosen": -248.14425659179688, + "logps/rejected": -1369.1392822265625, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5562599897384644, + "rewards/margins": 8.484919548034668, + "rewards/rejected": -6.928659439086914, + "step": 926 + }, + { + "epoch": 0.59, + "learning_rate": 3.788386500398583e-08, + "logits/chosen": -3.266371488571167, + "logits/rejected": -3.0986647605895996, + "logps/chosen": -284.92376708984375, + "logps/rejected": -1280.4088134765625, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3163132667541504, + "rewards/margins": 7.73831033706665, + "rewards/rejected": -6.4219970703125, + "step": 927 + }, + { + "epoch": 0.59, + "learning_rate": 3.7783695006136166e-08, + "logits/chosen": -3.220144271850586, + "logits/rejected": -3.0362751483917236, + "logps/chosen": -269.55328369140625, + "logps/rejected": -850.62646484375, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3129745721817017, + "rewards/margins": 4.35244607925415, + "rewards/rejected": -3.0394716262817383, + "step": 928 + }, + { + "epoch": 0.59, + "learning_rate": 3.768357712548105e-08, + "logits/chosen": -3.2530431747436523, + "logits/rejected": -3.1154422760009766, + "logps/chosen": -275.5526428222656, + "logps/rejected": -613.0908203125, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3629295825958252, + "rewards/margins": 4.539862632751465, + "rewards/rejected": -3.1769332885742188, + "step": 929 + }, + { + "epoch": 0.59, + "learning_rate": 3.7583511789143355e-08, + "logits/chosen": -3.184992790222168, + "logits/rejected": -3.135517120361328, + "logps/chosen": -302.04986572265625, + "logps/rejected": -1048.4676513671875, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2511589527130127, + "rewards/margins": 6.266332149505615, + "rewards/rejected": -5.015173435211182, + "step": 930 + }, + { + "epoch": 0.59, + "learning_rate": 3.748349942402174e-08, + "logits/chosen": -3.2540037631988525, + "logits/rejected": -3.1166985034942627, + "logps/chosen": -232.94969177246094, + "logps/rejected": -401.25494384765625, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.257897973060608, + "rewards/margins": 3.458630323410034, + "rewards/rejected": -2.200732469558716, + "step": 931 + }, + { + "epoch": 0.59, + "learning_rate": 3.738354045678891e-08, + "logits/chosen": -3.2413077354431152, + "logits/rejected": -3.142016887664795, + "logps/chosen": -225.6209259033203, + "logps/rejected": -525.21533203125, + "loss": 0.2748, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.148322343826294, + "rewards/margins": 4.271533966064453, + "rewards/rejected": -3.1232118606567383, + "step": 932 + }, + { + "epoch": 0.59, + "learning_rate": 3.728363531388978e-08, + "logits/chosen": -3.2397165298461914, + "logits/rejected": -3.131099224090576, + "logps/chosen": -247.28282165527344, + "logps/rejected": -1195.12890625, + "loss": 0.2741, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2026984691619873, + "rewards/margins": 6.229798316955566, + "rewards/rejected": -5.027099609375, + "step": 933 + }, + { + "epoch": 0.6, + "learning_rate": 3.718378442153961e-08, + "logits/chosen": -3.2541775703430176, + "logits/rejected": -3.149825096130371, + "logps/chosen": -284.3398132324219, + "logps/rejected": -755.5133666992188, + "loss": 0.28, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1195740699768066, + "rewards/margins": 5.028700351715088, + "rewards/rejected": -3.9091262817382812, + "step": 934 + }, + { + "epoch": 0.6, + "learning_rate": 3.708398820572225e-08, + "logits/chosen": -3.192582845687866, + "logits/rejected": -3.0501527786254883, + "logps/chosen": -295.12322998046875, + "logps/rejected": -813.62548828125, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2117218971252441, + "rewards/margins": 5.0825958251953125, + "rewards/rejected": -3.8708739280700684, + "step": 935 + }, + { + "epoch": 0.6, + "learning_rate": 3.698424709218826e-08, + "logits/chosen": -3.2643227577209473, + "logits/rejected": -3.1433517932891846, + "logps/chosen": -253.6551055908203, + "logps/rejected": -323.50091552734375, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4621398448944092, + "rewards/margins": 2.843249559402466, + "rewards/rejected": -1.3811097145080566, + "step": 936 + }, + { + "epoch": 0.6, + "learning_rate": 3.688456150645311e-08, + "logits/chosen": -3.2433857917785645, + "logits/rejected": -3.157040596008301, + "logps/chosen": -246.78353881835938, + "logps/rejected": -366.12115478515625, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2972686290740967, + "rewards/margins": 3.2995147705078125, + "rewards/rejected": -2.002246141433716, + "step": 937 + }, + { + "epoch": 0.6, + "learning_rate": 3.678493187379543e-08, + "logits/chosen": -3.2265729904174805, + "logits/rejected": -3.05633282661438, + "logps/chosen": -231.12193298339844, + "logps/rejected": -687.6162109375, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5290436744689941, + "rewards/margins": 4.617055892944336, + "rewards/rejected": -3.0880126953125, + "step": 938 + }, + { + "epoch": 0.6, + "learning_rate": 3.668535861925508e-08, + "logits/chosen": -3.320521831512451, + "logits/rejected": -3.1195859909057617, + "logps/chosen": -237.93417358398438, + "logps/rejected": -942.7433471679688, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2386451959609985, + "rewards/margins": 5.6172966957092285, + "rewards/rejected": -4.3786516189575195, + "step": 939 + }, + { + "epoch": 0.6, + "learning_rate": 3.658584216763145e-08, + "logits/chosen": -3.2608461380004883, + "logits/rejected": -3.167431116104126, + "logps/chosen": -248.35118103027344, + "logps/rejected": -527.3385620117188, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3875489234924316, + "rewards/margins": 3.653395175933838, + "rewards/rejected": -2.2658462524414062, + "step": 940 + }, + { + "epoch": 0.6, + "learning_rate": 3.648638294348158e-08, + "logits/chosen": -3.195746421813965, + "logits/rejected": -3.060030460357666, + "logps/chosen": -245.0909423828125, + "logps/rejected": -1440.821533203125, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6255576610565186, + "rewards/margins": 8.731689453125, + "rewards/rejected": -7.106131076812744, + "step": 941 + }, + { + "epoch": 0.6, + "learning_rate": 3.638698137111835e-08, + "logits/chosen": -3.201563835144043, + "logits/rejected": -3.1764492988586426, + "logps/chosen": -274.5476379394531, + "logps/rejected": -1005.3630981445312, + "loss": 0.2704, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3715476989746094, + "rewards/margins": 7.646889686584473, + "rewards/rejected": -6.275341987609863, + "step": 942 + }, + { + "epoch": 0.6, + "learning_rate": 3.6287637874608714e-08, + "logits/chosen": -3.2727277278900146, + "logits/rejected": -3.1833224296569824, + "logps/chosen": -231.03762817382812, + "logps/rejected": -901.6517333984375, + "loss": 0.263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1382964849472046, + "rewards/margins": 6.513546943664551, + "rewards/rejected": -5.375250339508057, + "step": 943 + }, + { + "epoch": 0.6, + "learning_rate": 3.6188352877771865e-08, + "logits/chosen": -3.2819643020629883, + "logits/rejected": -3.1674094200134277, + "logps/chosen": -258.3857116699219, + "logps/rejected": -663.59130859375, + "loss": 0.2848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1676926612854004, + "rewards/margins": 4.611135482788086, + "rewards/rejected": -3.4434432983398438, + "step": 944 + }, + { + "epoch": 0.6, + "learning_rate": 3.608912680417737e-08, + "logits/chosen": -3.2253313064575195, + "logits/rejected": -3.076523780822754, + "logps/chosen": -328.5078430175781, + "logps/rejected": -531.330322265625, + "loss": 0.3099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2476043701171875, + "rewards/margins": 4.287965774536133, + "rewards/rejected": -3.040361166000366, + "step": 945 + }, + { + "epoch": 0.6, + "learning_rate": 3.598996007714347e-08, + "logits/chosen": -3.255401611328125, + "logits/rejected": -3.065359592437744, + "logps/chosen": -262.0342712402344, + "logps/rejected": -627.7860717773438, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4759507179260254, + "rewards/margins": 4.429792881011963, + "rewards/rejected": -2.9538421630859375, + "step": 946 + }, + { + "epoch": 0.6, + "learning_rate": 3.5890853119735244e-08, + "logits/chosen": -3.194418430328369, + "logits/rejected": -3.1498801708221436, + "logps/chosen": -271.0760803222656, + "logps/rejected": -893.5, + "loss": 0.2878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2410073280334473, + "rewards/margins": 5.646613121032715, + "rewards/rejected": -4.405606269836426, + "step": 947 + }, + { + "epoch": 0.6, + "learning_rate": 3.57918063547627e-08, + "logits/chosen": -3.2935633659362793, + "logits/rejected": -3.1465249061584473, + "logps/chosen": -251.5843505859375, + "logps/rejected": -843.894287109375, + "loss": 0.2753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3389053344726562, + "rewards/margins": 5.845579624176025, + "rewards/rejected": -4.506674289703369, + "step": 948 + }, + { + "epoch": 0.6, + "learning_rate": 3.569282020477912e-08, + "logits/chosen": -3.2915470600128174, + "logits/rejected": -3.0871872901916504, + "logps/chosen": -303.5621643066406, + "logps/rejected": -618.6182861328125, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2863372564315796, + "rewards/margins": 4.177087306976318, + "rewards/rejected": -2.8907501697540283, + "step": 949 + }, + { + "epoch": 0.61, + "learning_rate": 3.559389509207916e-08, + "logits/chosen": -3.2186214923858643, + "logits/rejected": -3.1673765182495117, + "logps/chosen": -237.37411499023438, + "logps/rejected": -687.6409912109375, + "loss": 0.2652, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3492553234100342, + "rewards/margins": 5.275579929351807, + "rewards/rejected": -3.9263246059417725, + "step": 950 + }, + { + "epoch": 0.61, + "learning_rate": 3.54950314386971e-08, + "logits/chosen": -3.273444652557373, + "logits/rejected": -3.0856149196624756, + "logps/chosen": -290.1059875488281, + "logps/rejected": -473.55438232421875, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.163953423500061, + "rewards/margins": 3.9170312881469727, + "rewards/rejected": -2.753077745437622, + "step": 951 + }, + { + "epoch": 0.61, + "learning_rate": 3.539622966640502e-08, + "logits/chosen": -3.2300186157226562, + "logits/rejected": -3.1029012203216553, + "logps/chosen": -274.4117736816406, + "logps/rejected": -520.0410766601562, + "loss": 0.2888, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5517334938049316, + "rewards/margins": 4.888978958129883, + "rewards/rejected": -3.337245464324951, + "step": 952 + }, + { + "epoch": 0.61, + "learning_rate": 3.5297490196710973e-08, + "logits/chosen": -3.188220977783203, + "logits/rejected": -3.0442657470703125, + "logps/chosen": -251.01300048828125, + "logps/rejected": -1233.241943359375, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2552978992462158, + "rewards/margins": 6.722290515899658, + "rewards/rejected": -5.466992378234863, + "step": 953 + }, + { + "epoch": 0.61, + "learning_rate": 3.5198813450857234e-08, + "logits/chosen": -3.2600834369659424, + "logits/rejected": -3.1389992237091064, + "logps/chosen": -260.0856018066406, + "logps/rejected": -378.5822448730469, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4807021617889404, + "rewards/margins": 3.3870017528533936, + "rewards/rejected": -1.9062995910644531, + "step": 954 + }, + { + "epoch": 0.61, + "learning_rate": 3.510019984981853e-08, + "logits/chosen": -3.250176191329956, + "logits/rejected": -3.141314744949341, + "logps/chosen": -263.5655517578125, + "logps/rejected": -740.30029296875, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.246870517730713, + "rewards/margins": 5.751091003417969, + "rewards/rejected": -4.504220962524414, + "step": 955 + }, + { + "epoch": 0.61, + "learning_rate": 3.50016498143001e-08, + "logits/chosen": -3.23067569732666, + "logits/rejected": -3.085888385772705, + "logps/chosen": -240.66763305664062, + "logps/rejected": -878.5548095703125, + "loss": 0.3035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3060951232910156, + "rewards/margins": 6.12202262878418, + "rewards/rejected": -4.815927505493164, + "step": 956 + }, + { + "epoch": 0.61, + "learning_rate": 3.49031637647361e-08, + "logits/chosen": -3.2745234966278076, + "logits/rejected": -3.051891326904297, + "logps/chosen": -261.751220703125, + "logps/rejected": -970.9080200195312, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2734076976776123, + "rewards/margins": 5.525166988372803, + "rewards/rejected": -4.2517595291137695, + "step": 957 + }, + { + "epoch": 0.61, + "learning_rate": 3.480474212128766e-08, + "logits/chosen": -3.3112878799438477, + "logits/rejected": -3.1367428302764893, + "logps/chosen": -247.1437225341797, + "logps/rejected": -305.36114501953125, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3427002429962158, + "rewards/margins": 3.3408021926879883, + "rewards/rejected": -1.998101830482483, + "step": 958 + }, + { + "epoch": 0.61, + "learning_rate": 3.4706385303841134e-08, + "logits/chosen": -3.286686420440674, + "logits/rejected": -3.125129222869873, + "logps/chosen": -234.08035278320312, + "logps/rejected": -1267.2706298828125, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4498977661132812, + "rewards/margins": 7.824934482574463, + "rewards/rejected": -6.375036716461182, + "step": 959 + }, + { + "epoch": 0.61, + "learning_rate": 3.460809373200636e-08, + "logits/chosen": -3.2368669509887695, + "logits/rejected": -3.146461009979248, + "logps/chosen": -255.337890625, + "logps/rejected": -583.4181518554688, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5226256847381592, + "rewards/margins": 4.842516899108887, + "rewards/rejected": -3.3198914527893066, + "step": 960 + }, + { + "epoch": 0.61, + "learning_rate": 3.4509867825114755e-08, + "logits/chosen": -3.2636187076568604, + "logits/rejected": -3.217644214630127, + "logps/chosen": -274.5438537597656, + "logps/rejected": -588.377685546875, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.259088158607483, + "rewards/margins": 4.132849216461182, + "rewards/rejected": -2.8737611770629883, + "step": 961 + }, + { + "epoch": 0.61, + "learning_rate": 3.4411708002217655e-08, + "logits/chosen": -3.2689948081970215, + "logits/rejected": -3.1892600059509277, + "logps/chosen": -260.8834533691406, + "logps/rejected": -964.9557495117188, + "loss": 0.2783, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4424560070037842, + "rewards/margins": 6.399878025054932, + "rewards/rejected": -4.957422256469727, + "step": 962 + }, + { + "epoch": 0.61, + "learning_rate": 3.431361468208448e-08, + "logits/chosen": -3.2630395889282227, + "logits/rejected": -3.1622190475463867, + "logps/chosen": -255.14462280273438, + "logps/rejected": -961.6602783203125, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1423592567443848, + "rewards/margins": 6.21880578994751, + "rewards/rejected": -5.076446533203125, + "step": 963 + }, + { + "epoch": 0.61, + "learning_rate": 3.421558828320085e-08, + "logits/chosen": -3.216426372528076, + "logits/rejected": -3.1024346351623535, + "logps/chosen": -277.79473876953125, + "logps/rejected": -942.32470703125, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4747345447540283, + "rewards/margins": 6.026202201843262, + "rewards/rejected": -4.5514678955078125, + "step": 964 + }, + { + "epoch": 0.62, + "learning_rate": 3.4117629223766966e-08, + "logits/chosen": -3.2257180213928223, + "logits/rejected": -3.094543933868408, + "logps/chosen": -266.76129150390625, + "logps/rejected": -3461.5078125, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.398176670074463, + "rewards/margins": 7.126906394958496, + "rewards/rejected": -5.728729724884033, + "step": 965 + }, + { + "epoch": 0.62, + "learning_rate": 3.4019737921695734e-08, + "logits/chosen": -3.232466697692871, + "logits/rejected": -3.030064105987549, + "logps/chosen": -263.3004455566406, + "logps/rejected": -830.6954345703125, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.345483422279358, + "rewards/margins": 5.260991096496582, + "rewards/rejected": -3.9155075550079346, + "step": 966 + }, + { + "epoch": 0.62, + "learning_rate": 3.392191479461096e-08, + "logits/chosen": -3.234419822692871, + "logits/rejected": -3.0399112701416016, + "logps/chosen": -243.48678588867188, + "logps/rejected": -604.219970703125, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.494855523109436, + "rewards/margins": 4.160712242126465, + "rewards/rejected": -2.6658568382263184, + "step": 967 + }, + { + "epoch": 0.62, + "learning_rate": 3.382416025984563e-08, + "logits/chosen": -3.178784132003784, + "logits/rejected": -3.1768722534179688, + "logps/chosen": -270.6385498046875, + "logps/rejected": -801.1585693359375, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4551925659179688, + "rewards/margins": 5.507108688354492, + "rewards/rejected": -4.051916599273682, + "step": 968 + }, + { + "epoch": 0.62, + "learning_rate": 3.372647473444011e-08, + "logits/chosen": -3.2129201889038086, + "logits/rejected": -3.0915348529815674, + "logps/chosen": -251.1027374267578, + "logps/rejected": -512.70703125, + "loss": 0.2664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1572418212890625, + "rewards/margins": 3.874926805496216, + "rewards/rejected": -2.7176849842071533, + "step": 969 + }, + { + "epoch": 0.62, + "learning_rate": 3.3628858635140314e-08, + "logits/chosen": -3.286194086074829, + "logits/rejected": -3.107189655303955, + "logps/chosen": -253.46524047851562, + "logps/rejected": -404.77423095703125, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4286422729492188, + "rewards/margins": 3.3484420776367188, + "rewards/rejected": -1.9197998046875, + "step": 970 + }, + { + "epoch": 0.62, + "learning_rate": 3.353131237839602e-08, + "logits/chosen": -3.2619853019714355, + "logits/rejected": -3.125434637069702, + "logps/chosen": -255.7034149169922, + "logps/rejected": -775.01806640625, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.505426049232483, + "rewards/margins": 5.799298286437988, + "rewards/rejected": -4.293872356414795, + "step": 971 + }, + { + "epoch": 0.62, + "learning_rate": 3.3433836380359014e-08, + "logits/chosen": -3.2323966026306152, + "logits/rejected": -3.2029054164886475, + "logps/chosen": -299.46484375, + "logps/rejected": -587.983154296875, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4491745233535767, + "rewards/margins": 4.378931999206543, + "rewards/rejected": -2.929757833480835, + "step": 972 + }, + { + "epoch": 0.62, + "learning_rate": 3.333643105688134e-08, + "logits/chosen": -3.247379779815674, + "logits/rejected": -3.051839828491211, + "logps/chosen": -275.05877685546875, + "logps/rejected": -364.3896484375, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5588699579238892, + "rewards/margins": 3.234462022781372, + "rewards/rejected": -1.675592064857483, + "step": 973 + }, + { + "epoch": 0.62, + "learning_rate": 3.3239096823513564e-08, + "logits/chosen": -3.322549343109131, + "logits/rejected": -3.057614803314209, + "logps/chosen": -257.1264343261719, + "logps/rejected": -350.0376892089844, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3094162940979004, + "rewards/margins": 2.90104079246521, + "rewards/rejected": -1.5916244983673096, + "step": 974 + }, + { + "epoch": 0.62, + "learning_rate": 3.3141834095502925e-08, + "logits/chosen": -3.2024402618408203, + "logits/rejected": -3.1302709579467773, + "logps/chosen": -232.44485473632812, + "logps/rejected": -469.83880615234375, + "loss": 0.2763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4504456520080566, + "rewards/margins": 4.071058750152588, + "rewards/rejected": -2.6206130981445312, + "step": 975 + }, + { + "epoch": 0.62, + "learning_rate": 3.304464328779164e-08, + "logits/chosen": -3.205430746078491, + "logits/rejected": -3.13813853263855, + "logps/chosen": -258.4405822753906, + "logps/rejected": -1541.724853515625, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7032463550567627, + "rewards/margins": 9.795660018920898, + "rewards/rejected": -8.092412948608398, + "step": 976 + }, + { + "epoch": 0.62, + "learning_rate": 3.294752481501511e-08, + "logits/chosen": -3.2173826694488525, + "logits/rejected": -3.0327606201171875, + "logps/chosen": -269.6688232421875, + "logps/rejected": -1139.122802734375, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3584076166152954, + "rewards/margins": 6.575933933258057, + "rewards/rejected": -5.217526435852051, + "step": 977 + }, + { + "epoch": 0.62, + "learning_rate": 3.2850479091500056e-08, + "logits/chosen": -3.3071751594543457, + "logits/rejected": -3.1318225860595703, + "logps/chosen": -261.9635925292969, + "logps/rejected": -635.4556274414062, + "loss": 0.282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4496818780899048, + "rewards/margins": 4.629894256591797, + "rewards/rejected": -3.1802124977111816, + "step": 978 + }, + { + "epoch": 0.62, + "learning_rate": 3.275350653126294e-08, + "logits/chosen": -3.3008127212524414, + "logits/rejected": -3.161782741546631, + "logps/chosen": -268.8299560546875, + "logps/rejected": -679.232666015625, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2612991333007812, + "rewards/margins": 5.399442672729492, + "rewards/rejected": -4.138144016265869, + "step": 979 + }, + { + "epoch": 0.62, + "learning_rate": 3.2656607548008065e-08, + "logits/chosen": -3.3171212673187256, + "logits/rejected": -3.165984630584717, + "logps/chosen": -245.52047729492188, + "logps/rejected": -3425.6259765625, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4017333984375, + "rewards/margins": 9.3792724609375, + "rewards/rejected": -7.9775390625, + "step": 980 + }, + { + "epoch": 0.63, + "learning_rate": 3.2559782555125785e-08, + "logits/chosen": -3.2454018592834473, + "logits/rejected": -3.1008830070495605, + "logps/chosen": -270.8109130859375, + "logps/rejected": -891.6793212890625, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.735039472579956, + "rewards/margins": 6.032100677490234, + "rewards/rejected": -4.297060966491699, + "step": 981 + }, + { + "epoch": 0.63, + "learning_rate": 3.246303196569089e-08, + "logits/chosen": -3.294137716293335, + "logits/rejected": -3.127366542816162, + "logps/chosen": -282.28594970703125, + "logps/rejected": -398.01031494140625, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2648468017578125, + "rewards/margins": 3.399038791656494, + "rewards/rejected": -2.1341919898986816, + "step": 982 + }, + { + "epoch": 0.63, + "learning_rate": 3.2366356192460644e-08, + "logits/chosen": -3.2529149055480957, + "logits/rejected": -3.181417942047119, + "logps/chosen": -283.3663635253906, + "logps/rejected": -600.4593505859375, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5154762268066406, + "rewards/margins": 5.059325218200684, + "rewards/rejected": -3.543849229812622, + "step": 983 + }, + { + "epoch": 0.63, + "learning_rate": 3.2269755647873216e-08, + "logits/chosen": -3.2883658409118652, + "logits/rejected": -3.2401671409606934, + "logps/chosen": -231.61795043945312, + "logps/rejected": -690.7061157226562, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2878844738006592, + "rewards/margins": 5.410693168640137, + "rewards/rejected": -4.122808933258057, + "step": 984 + }, + { + "epoch": 0.63, + "learning_rate": 3.217323074404582e-08, + "logits/chosen": -3.276672840118408, + "logits/rejected": -3.1256844997406006, + "logps/chosen": -260.78900146484375, + "logps/rejected": -581.5625, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2075257301330566, + "rewards/margins": 3.768850803375244, + "rewards/rejected": -2.5613250732421875, + "step": 985 + }, + { + "epoch": 0.63, + "learning_rate": 3.20767818927729e-08, + "logits/chosen": -3.2189083099365234, + "logits/rejected": -3.2404932975769043, + "logps/chosen": -232.41995239257812, + "logps/rejected": -975.22265625, + "loss": 0.2648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2339645624160767, + "rewards/margins": 7.334773063659668, + "rewards/rejected": -6.100808620452881, + "step": 986 + }, + { + "epoch": 0.63, + "learning_rate": 3.198040950552454e-08, + "logits/chosen": -3.29284930229187, + "logits/rejected": -3.128976345062256, + "logps/chosen": -251.05081176757812, + "logps/rejected": -469.4581604003906, + "loss": 0.2641, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6398811340332031, + "rewards/margins": 4.244478225708008, + "rewards/rejected": -2.6045966148376465, + "step": 987 + }, + { + "epoch": 0.63, + "learning_rate": 3.188411399344458e-08, + "logits/chosen": -3.2417588233947754, + "logits/rejected": -3.1625919342041016, + "logps/chosen": -250.9453887939453, + "logps/rejected": -433.61602783203125, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2148224115371704, + "rewards/margins": 3.441969394683838, + "rewards/rejected": -2.227147102355957, + "step": 988 + }, + { + "epoch": 0.63, + "learning_rate": 3.1787895767348855e-08, + "logits/chosen": -3.2330031394958496, + "logits/rejected": -3.120027542114258, + "logps/chosen": -306.1254577636719, + "logps/rejected": -693.0810546875, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4900909662246704, + "rewards/margins": 5.764697074890137, + "rewards/rejected": -4.274606704711914, + "step": 989 + }, + { + "epoch": 0.63, + "learning_rate": 3.169175523772353e-08, + "logits/chosen": -3.2906527519226074, + "logits/rejected": -3.1236844062805176, + "logps/chosen": -272.1529846191406, + "logps/rejected": -904.6876220703125, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3123016357421875, + "rewards/margins": 5.732623100280762, + "rewards/rejected": -4.420321464538574, + "step": 990 + }, + { + "epoch": 0.63, + "learning_rate": 3.159569281472332e-08, + "logits/chosen": -3.2070088386535645, + "logits/rejected": -3.102837085723877, + "logps/chosen": -240.0347900390625, + "logps/rejected": -552.8535766601562, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4738380908966064, + "rewards/margins": 4.461554527282715, + "rewards/rejected": -2.9877166748046875, + "step": 991 + }, + { + "epoch": 0.63, + "learning_rate": 3.149970890816963e-08, + "logits/chosen": -3.2786712646484375, + "logits/rejected": -3.1998372077941895, + "logps/chosen": -242.427978515625, + "logps/rejected": -698.4730834960938, + "loss": 0.2354, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2700088024139404, + "rewards/margins": 5.756247043609619, + "rewards/rejected": -4.4862380027771, + "step": 992 + }, + { + "epoch": 0.63, + "learning_rate": 3.1403803927549e-08, + "logits/chosen": -3.254197120666504, + "logits/rejected": -3.051417827606201, + "logps/chosen": -255.08489990234375, + "logps/rejected": -928.6258544921875, + "loss": 0.2867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5450477600097656, + "rewards/margins": 5.55120325088501, + "rewards/rejected": -4.006155490875244, + "step": 993 + }, + { + "epoch": 0.63, + "learning_rate": 3.130797828201119e-08, + "logits/chosen": -3.277763605117798, + "logits/rejected": -3.0176727771759033, + "logps/chosen": -285.5312805175781, + "logps/rejected": -466.8060302734375, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3679794073104858, + "rewards/margins": 3.3891663551330566, + "rewards/rejected": -2.0211868286132812, + "step": 994 + }, + { + "epoch": 0.63, + "learning_rate": 3.121223238036752e-08, + "logits/chosen": -3.2306058406829834, + "logits/rejected": -3.10544490814209, + "logps/chosen": -306.054443359375, + "logps/rejected": -432.76068115234375, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.125335693359375, + "rewards/margins": 3.5296647548675537, + "rewards/rejected": -2.4043290615081787, + "step": 995 + }, + { + "epoch": 0.63, + "learning_rate": 3.111656663108914e-08, + "logits/chosen": -3.2645931243896484, + "logits/rejected": -3.1018590927124023, + "logps/chosen": -255.68299865722656, + "logps/rejected": -609.1707763671875, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2923355102539062, + "rewards/margins": 4.424062728881836, + "rewards/rejected": -3.131727695465088, + "step": 996 + }, + { + "epoch": 0.64, + "learning_rate": 3.102098144230518e-08, + "logits/chosen": -3.2594780921936035, + "logits/rejected": -3.0645737648010254, + "logps/chosen": -292.510009765625, + "logps/rejected": -859.427734375, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6885788440704346, + "rewards/margins": 6.021932601928711, + "rewards/rejected": -4.3333539962768555, + "step": 997 + }, + { + "epoch": 0.64, + "learning_rate": 3.092547722180115e-08, + "logits/chosen": -3.280787467956543, + "logits/rejected": -3.1605241298675537, + "logps/chosen": -251.880859375, + "logps/rejected": -573.0541381835938, + "loss": 0.274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4224228858947754, + "rewards/margins": 4.606939792633057, + "rewards/rejected": -3.1845169067382812, + "step": 998 + }, + { + "epoch": 0.64, + "learning_rate": 3.083005437701715e-08, + "logits/chosen": -3.272928237915039, + "logits/rejected": -3.2006678581237793, + "logps/chosen": -226.6614990234375, + "logps/rejected": -500.1354064941406, + "loss": 0.2609, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3902069330215454, + "rewards/margins": 4.154364109039307, + "rewards/rejected": -2.7641570568084717, + "step": 999 + }, + { + "epoch": 0.64, + "learning_rate": 3.0734713315046e-08, + "logits/chosen": -3.1961350440979004, + "logits/rejected": -3.1389384269714355, + "logps/chosen": -248.31005859375, + "logps/rejected": -661.7349243164062, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.313875675201416, + "rewards/margins": 5.383477210998535, + "rewards/rejected": -4.069601535797119, + "step": 1000 + }, + { + "epoch": 0.64, + "learning_rate": 3.063945444263174e-08, + "logits/chosen": -3.214963912963867, + "logits/rejected": -3.0818538665771484, + "logps/chosen": -230.96194458007812, + "logps/rejected": -401.7847595214844, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3247833251953125, + "rewards/margins": 3.386648654937744, + "rewards/rejected": -2.0618653297424316, + "step": 1001 + }, + { + "epoch": 0.64, + "learning_rate": 3.054427816616772e-08, + "logits/chosen": -3.2429184913635254, + "logits/rejected": -3.1684160232543945, + "logps/chosen": -279.76629638671875, + "logps/rejected": -472.41357421875, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4604721069335938, + "rewards/margins": 3.6248016357421875, + "rewards/rejected": -2.1643295288085938, + "step": 1002 + }, + { + "epoch": 0.64, + "learning_rate": 3.044918489169491e-08, + "logits/chosen": -3.2606754302978516, + "logits/rejected": -3.1806206703186035, + "logps/chosen": -239.4521484375, + "logps/rejected": -698.1150512695312, + "loss": 0.255, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4149901866912842, + "rewards/margins": 5.400958061218262, + "rewards/rejected": -3.9859681129455566, + "step": 1003 + }, + { + "epoch": 0.64, + "learning_rate": 3.0354175024900216e-08, + "logits/chosen": -3.297257900238037, + "logits/rejected": -3.2026190757751465, + "logps/chosen": -262.0186462402344, + "logps/rejected": -1210.561767578125, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4093384742736816, + "rewards/margins": 7.608514785766602, + "rewards/rejected": -6.199175834655762, + "step": 1004 + }, + { + "epoch": 0.64, + "learning_rate": 3.025924897111466e-08, + "logits/chosen": -3.268949270248413, + "logits/rejected": -3.1637182235717773, + "logps/chosen": -229.33529663085938, + "logps/rejected": -559.1683349609375, + "loss": 0.258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0777428150177002, + "rewards/margins": 4.053193092346191, + "rewards/rejected": -2.975450038909912, + "step": 1005 + }, + { + "epoch": 0.64, + "learning_rate": 3.016440713531174e-08, + "logits/chosen": -3.195382595062256, + "logits/rejected": -3.0573086738586426, + "logps/chosen": -259.32647705078125, + "logps/rejected": -448.1634521484375, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1614899635314941, + "rewards/margins": 3.530259847640991, + "rewards/rejected": -2.368769884109497, + "step": 1006 + }, + { + "epoch": 0.64, + "learning_rate": 3.0069649922105664e-08, + "logits/chosen": -3.210540294647217, + "logits/rejected": -3.1998608112335205, + "logps/chosen": -283.1068420410156, + "logps/rejected": -974.4775390625, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4763259887695312, + "rewards/margins": 7.637723922729492, + "rewards/rejected": -6.161398410797119, + "step": 1007 + }, + { + "epoch": 0.64, + "learning_rate": 2.997497773574959e-08, + "logits/chosen": -3.2207393646240234, + "logits/rejected": -3.1621060371398926, + "logps/chosen": -252.45550537109375, + "logps/rejected": -744.7430419921875, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2874237298965454, + "rewards/margins": 5.161905288696289, + "rewards/rejected": -3.874481439590454, + "step": 1008 + }, + { + "epoch": 0.64, + "learning_rate": 2.988039098013395e-08, + "logits/chosen": -3.2786803245544434, + "logits/rejected": -3.1763927936553955, + "logps/chosen": -321.9887390136719, + "logps/rejected": -620.3213500976562, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.423213243484497, + "rewards/margins": 4.408017158508301, + "rewards/rejected": -2.9848039150238037, + "step": 1009 + }, + { + "epoch": 0.64, + "learning_rate": 2.9785890058784756e-08, + "logits/chosen": -3.278536319732666, + "logits/rejected": -2.9992029666900635, + "logps/chosen": -291.7578430175781, + "logps/rejected": -1093.849853515625, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.686171054840088, + "rewards/margins": 6.347365379333496, + "rewards/rejected": -4.66119384765625, + "step": 1010 + }, + { + "epoch": 0.64, + "learning_rate": 2.9691475374861747e-08, + "logits/chosen": -3.199047327041626, + "logits/rejected": -3.120877742767334, + "logps/chosen": -255.9266357421875, + "logps/rejected": -696.90185546875, + "loss": 0.284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2333862781524658, + "rewards/margins": 5.682519912719727, + "rewards/rejected": -4.449133396148682, + "step": 1011 + }, + { + "epoch": 0.64, + "learning_rate": 2.9597147331156812e-08, + "logits/chosen": -3.2554831504821777, + "logits/rejected": -3.0129990577697754, + "logps/chosen": -255.7074737548828, + "logps/rejected": -1363.75830078125, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.359754204750061, + "rewards/margins": 6.832042694091797, + "rewards/rejected": -5.472289085388184, + "step": 1012 + }, + { + "epoch": 0.65, + "learning_rate": 2.950290633009223e-08, + "logits/chosen": -3.1693201065063477, + "logits/rejected": -3.1447229385375977, + "logps/chosen": -275.7769775390625, + "logps/rejected": -864.147216796875, + "loss": 0.2807, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.300288438796997, + "rewards/margins": 6.04765510559082, + "rewards/rejected": -4.747366428375244, + "step": 1013 + }, + { + "epoch": 0.65, + "learning_rate": 2.9408752773718893e-08, + "logits/chosen": -3.1709790229797363, + "logits/rejected": -3.1616153717041016, + "logps/chosen": -236.92039489746094, + "logps/rejected": -957.3133544921875, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3681023120880127, + "rewards/margins": 7.190047264099121, + "rewards/rejected": -5.8219451904296875, + "step": 1014 + }, + { + "epoch": 0.65, + "learning_rate": 2.9314687063714676e-08, + "logits/chosen": -3.2366180419921875, + "logits/rejected": -3.0760183334350586, + "logps/chosen": -269.539306640625, + "logps/rejected": -669.7374267578125, + "loss": 0.2748, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2239768505096436, + "rewards/margins": 4.114213943481445, + "rewards/rejected": -2.890237331390381, + "step": 1015 + }, + { + "epoch": 0.65, + "learning_rate": 2.9220709601382643e-08, + "logits/chosen": -3.1992268562316895, + "logits/rejected": -3.1049978733062744, + "logps/chosen": -291.4393310546875, + "logps/rejected": -567.1357421875, + "loss": 0.2966, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.355902910232544, + "rewards/margins": 4.501191139221191, + "rewards/rejected": -3.1452882289886475, + "step": 1016 + }, + { + "epoch": 0.65, + "learning_rate": 2.91268207876494e-08, + "logits/chosen": -3.185873031616211, + "logits/rejected": -3.1662850379943848, + "logps/chosen": -246.12269592285156, + "logps/rejected": -1029.489013671875, + "loss": 0.2675, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6621429920196533, + "rewards/margins": 7.326062202453613, + "rewards/rejected": -5.663919448852539, + "step": 1017 + }, + { + "epoch": 0.65, + "learning_rate": 2.9033021023063403e-08, + "logits/chosen": -3.2365031242370605, + "logits/rejected": -3.117178440093994, + "logps/chosen": -249.15719604492188, + "logps/rejected": -591.3321533203125, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3428748846054077, + "rewards/margins": 4.764115333557129, + "rewards/rejected": -3.4212403297424316, + "step": 1018 + }, + { + "epoch": 0.65, + "learning_rate": 2.8939310707793097e-08, + "logits/chosen": -3.237967014312744, + "logits/rejected": -3.0605220794677734, + "logps/chosen": -283.17474365234375, + "logps/rejected": -683.2083740234375, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4168884754180908, + "rewards/margins": 4.657202243804932, + "rewards/rejected": -3.240313768386841, + "step": 1019 + }, + { + "epoch": 0.65, + "learning_rate": 2.8845690241625432e-08, + "logits/chosen": -3.194699764251709, + "logits/rejected": -3.1309807300567627, + "logps/chosen": -256.90179443359375, + "logps/rejected": -668.1158447265625, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2623611688613892, + "rewards/margins": 5.14975118637085, + "rewards/rejected": -3.88739013671875, + "step": 1020 + }, + { + "epoch": 0.65, + "learning_rate": 2.8752160023963988e-08, + "logits/chosen": -3.2641806602478027, + "logits/rejected": -3.1770830154418945, + "logps/chosen": -288.1644287109375, + "logps/rejected": -518.513916015625, + "loss": 0.3174, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.705141544342041, + "rewards/margins": 4.568422794342041, + "rewards/rejected": -2.86328125, + "step": 1021 + }, + { + "epoch": 0.65, + "learning_rate": 2.8658720453827328e-08, + "logits/chosen": -3.2707583904266357, + "logits/rejected": -3.1223177909851074, + "logps/chosen": -276.40032958984375, + "logps/rejected": -1079.0184326171875, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4017562866210938, + "rewards/margins": 6.604423522949219, + "rewards/rejected": -5.202667236328125, + "step": 1022 + }, + { + "epoch": 0.65, + "learning_rate": 2.8565371929847283e-08, + "logits/chosen": -3.232790946960449, + "logits/rejected": -3.232576370239258, + "logps/chosen": -252.0768280029297, + "logps/rejected": -1041.625732421875, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5924286842346191, + "rewards/margins": 7.170166015625, + "rewards/rejected": -5.577737808227539, + "step": 1023 + }, + { + "epoch": 0.65, + "learning_rate": 2.847211485026732e-08, + "logits/chosen": -3.2517623901367188, + "logits/rejected": -3.1032838821411133, + "logps/chosen": -245.16326904296875, + "logps/rejected": -621.90283203125, + "loss": 0.2632, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4024193286895752, + "rewards/margins": 4.442089080810547, + "rewards/rejected": -3.0396697521209717, + "step": 1024 + }, + { + "epoch": 0.65, + "learning_rate": 2.8378949612940727e-08, + "logits/chosen": -3.221869707107544, + "logits/rejected": -3.1321287155151367, + "logps/chosen": -259.69573974609375, + "logps/rejected": -728.81396484375, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4308364391326904, + "rewards/margins": 5.735762119293213, + "rewards/rejected": -4.304925918579102, + "step": 1025 + }, + { + "epoch": 0.65, + "learning_rate": 2.8285876615329006e-08, + "logits/chosen": -3.162947654724121, + "logits/rejected": -3.043811798095703, + "logps/chosen": -276.5559997558594, + "logps/rejected": -537.2838134765625, + "loss": 0.2765, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0671539306640625, + "rewards/margins": 3.6321182250976562, + "rewards/rejected": -2.5649642944335938, + "step": 1026 + }, + { + "epoch": 0.65, + "learning_rate": 2.819289625450012e-08, + "logits/chosen": -3.238320827484131, + "logits/rejected": -3.135589122772217, + "logps/chosen": -304.91827392578125, + "logps/rejected": -3890.730224609375, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2362412214279175, + "rewards/margins": 11.706657409667969, + "rewards/rejected": -10.470416069030762, + "step": 1027 + }, + { + "epoch": 0.66, + "learning_rate": 2.8100008927126824e-08, + "logits/chosen": -3.2557759284973145, + "logits/rejected": -3.2035107612609863, + "logps/chosen": -279.9940185546875, + "logps/rejected": -643.97412109375, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.486328125, + "rewards/margins": 4.758053779602051, + "rewards/rejected": -3.271725654602051, + "step": 1028 + }, + { + "epoch": 0.66, + "learning_rate": 2.8007215029485054e-08, + "logits/chosen": -3.269442081451416, + "logits/rejected": -3.243701934814453, + "logps/chosen": -251.98666381835938, + "logps/rejected": -537.6500854492188, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2767997980117798, + "rewards/margins": 4.362101078033447, + "rewards/rejected": -3.085301399230957, + "step": 1029 + }, + { + "epoch": 0.66, + "learning_rate": 2.7914514957452006e-08, + "logits/chosen": -3.258749008178711, + "logits/rejected": -3.088205575942993, + "logps/chosen": -269.4033203125, + "logps/rejected": -883.7349853515625, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.477447509765625, + "rewards/margins": 6.047515869140625, + "rewards/rejected": -4.570068359375, + "step": 1030 + }, + { + "epoch": 0.66, + "learning_rate": 2.7821909106504748e-08, + "logits/chosen": -3.2716445922851562, + "logits/rejected": -3.16414213180542, + "logps/chosen": -268.5147399902344, + "logps/rejected": -662.06689453125, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.386599063873291, + "rewards/margins": 4.5447258949279785, + "rewards/rejected": -3.1581268310546875, + "step": 1031 + }, + { + "epoch": 0.66, + "learning_rate": 2.7729397871718304e-08, + "logits/chosen": -3.2572519779205322, + "logits/rejected": -3.1185355186462402, + "logps/chosen": -270.6654357910156, + "logps/rejected": -1171.04736328125, + "loss": 0.307, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1997787952423096, + "rewards/margins": 8.093220710754395, + "rewards/rejected": -6.893442153930664, + "step": 1032 + }, + { + "epoch": 0.66, + "learning_rate": 2.7636981647764024e-08, + "logits/chosen": -3.216712474822998, + "logits/rejected": -3.0922605991363525, + "logps/chosen": -284.3121643066406, + "logps/rejected": -379.3660888671875, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3577194213867188, + "rewards/margins": 3.578521728515625, + "rewards/rejected": -2.2208023071289062, + "step": 1033 + }, + { + "epoch": 0.66, + "learning_rate": 2.7544660828908008e-08, + "logits/chosen": -3.19097900390625, + "logits/rejected": -3.0441179275512695, + "logps/chosen": -260.8632507324219, + "logps/rejected": -462.4663391113281, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.357393741607666, + "rewards/margins": 3.503826141357422, + "rewards/rejected": -2.146432399749756, + "step": 1034 + }, + { + "epoch": 0.66, + "learning_rate": 2.7452435809009267e-08, + "logits/chosen": -3.2374019622802734, + "logits/rejected": -3.0835177898406982, + "logps/chosen": -253.19332885742188, + "logps/rejected": -874.832275390625, + "loss": 0.2636, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3636994361877441, + "rewards/margins": 5.542327880859375, + "rewards/rejected": -4.178628444671631, + "step": 1035 + }, + { + "epoch": 0.66, + "learning_rate": 2.7360306981518145e-08, + "logits/chosen": -3.178560256958008, + "logits/rejected": -3.050813674926758, + "logps/chosen": -229.3177490234375, + "logps/rejected": -1463.719970703125, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.209075927734375, + "rewards/margins": 8.083930015563965, + "rewards/rejected": -6.874853610992432, + "step": 1036 + }, + { + "epoch": 0.66, + "learning_rate": 2.7268274739474584e-08, + "logits/chosen": -3.2680864334106445, + "logits/rejected": -3.1734588146209717, + "logps/chosen": -266.0797119140625, + "logps/rejected": -624.8013916015625, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.398625135421753, + "rewards/margins": 5.367588996887207, + "rewards/rejected": -3.968963623046875, + "step": 1037 + }, + { + "epoch": 0.66, + "learning_rate": 2.717633947550651e-08, + "logits/chosen": -3.2199172973632812, + "logits/rejected": -3.1886911392211914, + "logps/chosen": -253.71359252929688, + "logps/rejected": -593.5845947265625, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4242310523986816, + "rewards/margins": 4.424694061279297, + "rewards/rejected": -3.0004630088806152, + "step": 1038 + }, + { + "epoch": 0.66, + "learning_rate": 2.7084501581828073e-08, + "logits/chosen": -3.2931690216064453, + "logits/rejected": -3.0784173011779785, + "logps/chosen": -274.27093505859375, + "logps/rejected": -401.1853332519531, + "loss": 0.2998, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485377550125122, + "rewards/margins": 3.5982208251953125, + "rewards/rejected": -2.1128435134887695, + "step": 1039 + }, + { + "epoch": 0.66, + "learning_rate": 2.699276145023809e-08, + "logits/chosen": -3.231416940689087, + "logits/rejected": -3.020266056060791, + "logps/chosen": -238.87635803222656, + "logps/rejected": -1193.5428466796875, + "loss": 0.2794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2120590209960938, + "rewards/margins": 6.409158706665039, + "rewards/rejected": -5.197099685668945, + "step": 1040 + }, + { + "epoch": 0.66, + "learning_rate": 2.690111947211825e-08, + "logits/chosen": -3.2442173957824707, + "logits/rejected": -3.1874101161956787, + "logps/chosen": -224.51504516601562, + "logps/rejected": -690.404296875, + "loss": 0.2732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3871192932128906, + "rewards/margins": 5.245113372802734, + "rewards/rejected": -3.8579940795898438, + "step": 1041 + }, + { + "epoch": 0.66, + "learning_rate": 2.6809576038431503e-08, + "logits/chosen": -3.1852798461914062, + "logits/rejected": -3.1415464878082275, + "logps/chosen": -275.2054138183594, + "logps/rejected": -620.5103149414062, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.199140191078186, + "rewards/margins": 4.79890251159668, + "rewards/rejected": -3.599761962890625, + "step": 1042 + }, + { + "epoch": 0.66, + "learning_rate": 2.6718131539720444e-08, + "logits/chosen": -3.203083038330078, + "logits/rejected": -3.0340962409973145, + "logps/chosen": -274.8644714355469, + "logps/rejected": -378.94439697265625, + "loss": 0.2901, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4742447137832642, + "rewards/margins": 3.45758056640625, + "rewards/rejected": -1.9833358526229858, + "step": 1043 + }, + { + "epoch": 0.67, + "learning_rate": 2.6626786366105492e-08, + "logits/chosen": -3.20426082611084, + "logits/rejected": -3.137971878051758, + "logps/chosen": -239.92459106445312, + "logps/rejected": -788.2947998046875, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7106773853302002, + "rewards/margins": 6.653579235076904, + "rewards/rejected": -4.942901611328125, + "step": 1044 + }, + { + "epoch": 0.67, + "learning_rate": 2.6535540907283422e-08, + "logits/chosen": -3.23368239402771, + "logits/rejected": -3.0802745819091797, + "logps/chosen": -260.8359680175781, + "logps/rejected": -338.8902587890625, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5263153314590454, + "rewards/margins": 3.539128065109253, + "rewards/rejected": -2.012812852859497, + "step": 1045 + }, + { + "epoch": 0.67, + "learning_rate": 2.6444395552525556e-08, + "logits/chosen": -3.1918368339538574, + "logits/rejected": -3.13551926612854, + "logps/chosen": -256.77490234375, + "logps/rejected": -700.6769409179688, + "loss": 0.2669, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2070717811584473, + "rewards/margins": 5.88428258895874, + "rewards/rejected": -4.677210807800293, + "step": 1046 + }, + { + "epoch": 0.67, + "learning_rate": 2.6353350690676168e-08, + "logits/chosen": -3.254666328430176, + "logits/rejected": -3.1898140907287598, + "logps/chosen": -209.72616577148438, + "logps/rejected": -778.7623901367188, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.227294921875, + "rewards/margins": 5.695869445800781, + "rewards/rejected": -4.468574523925781, + "step": 1047 + }, + { + "epoch": 0.67, + "learning_rate": 2.626240671015081e-08, + "logits/chosen": -3.2856287956237793, + "logits/rejected": -3.1672306060791016, + "logps/chosen": -246.53042602539062, + "logps/rejected": -403.95587158203125, + "loss": 0.2733, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4010918140411377, + "rewards/margins": 3.5880799293518066, + "rewards/rejected": -2.186988115310669, + "step": 1048 + }, + { + "epoch": 0.67, + "learning_rate": 2.6171563998934602e-08, + "logits/chosen": -3.2476909160614014, + "logits/rejected": -3.041114568710327, + "logps/chosen": -260.6804504394531, + "logps/rejected": -1059.4842529296875, + "loss": 0.3012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4732964038848877, + "rewards/margins": 6.443553924560547, + "rewards/rejected": -4.970257759094238, + "step": 1049 + }, + { + "epoch": 0.67, + "learning_rate": 2.608082294458074e-08, + "logits/chosen": -3.2277536392211914, + "logits/rejected": -3.1390795707702637, + "logps/chosen": -231.0445556640625, + "logps/rejected": -843.8873291015625, + "loss": 0.2702, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4389381408691406, + "rewards/margins": 6.208826541900635, + "rewards/rejected": -4.769888401031494, + "step": 1050 + }, + { + "epoch": 0.67, + "learning_rate": 2.5990183934208644e-08, + "logits/chosen": -3.2836642265319824, + "logits/rejected": -3.138838052749634, + "logps/chosen": -227.05401611328125, + "logps/rejected": -456.0504150390625, + "loss": 0.2516, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.302642822265625, + "rewards/margins": 3.8527956008911133, + "rewards/rejected": -2.550152540206909, + "step": 1051 + }, + { + "epoch": 0.67, + "learning_rate": 2.5899647354502403e-08, + "logits/chosen": -3.232466220855713, + "logits/rejected": -3.127822160720825, + "logps/chosen": -267.540771484375, + "logps/rejected": -311.86041259765625, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2977707386016846, + "rewards/margins": 2.6323227882385254, + "rewards/rejected": -1.3345520496368408, + "step": 1052 + }, + { + "epoch": 0.67, + "learning_rate": 2.5809213591709124e-08, + "logits/chosen": -3.239609718322754, + "logits/rejected": -3.147719383239746, + "logps/chosen": -235.77188110351562, + "logps/rejected": -586.3744506835938, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3694100379943848, + "rewards/margins": 4.789917945861816, + "rewards/rejected": -3.4205079078674316, + "step": 1053 + }, + { + "epoch": 0.67, + "learning_rate": 2.5718883031637312e-08, + "logits/chosen": -3.229999542236328, + "logits/rejected": -2.9783339500427246, + "logps/chosen": -257.8480529785156, + "logps/rejected": -1139.065673828125, + "loss": 0.282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3076958656311035, + "rewards/margins": 6.102453231811523, + "rewards/rejected": -4.794756889343262, + "step": 1054 + }, + { + "epoch": 0.67, + "learning_rate": 2.562865605965515e-08, + "logits/chosen": -3.2437148094177246, + "logits/rejected": -3.1351847648620605, + "logps/chosen": -313.1807861328125, + "logps/rejected": -524.3189697265625, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2703460454940796, + "rewards/margins": 3.869969129562378, + "rewards/rejected": -2.599623203277588, + "step": 1055 + }, + { + "epoch": 0.67, + "learning_rate": 2.5538533060688876e-08, + "logits/chosen": -3.2561960220336914, + "logits/rejected": -3.1002702713012695, + "logps/chosen": -273.87188720703125, + "logps/rejected": -546.1318969726562, + "loss": 0.3197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5542290210723877, + "rewards/margins": 4.845958709716797, + "rewards/rejected": -3.291729688644409, + "step": 1056 + }, + { + "epoch": 0.67, + "learning_rate": 2.5448514419221245e-08, + "logits/chosen": -3.195068120956421, + "logits/rejected": -3.0918521881103516, + "logps/chosen": -274.7899169921875, + "logps/rejected": -344.38397216796875, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1463897228240967, + "rewards/margins": 2.945413112640381, + "rewards/rejected": -1.7990233898162842, + "step": 1057 + }, + { + "epoch": 0.67, + "learning_rate": 2.535860051928967e-08, + "logits/chosen": -3.2089109420776367, + "logits/rejected": -3.130746841430664, + "logps/chosen": -242.5246124267578, + "logps/rejected": -780.5794067382812, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3438819646835327, + "rewards/margins": 6.047306060791016, + "rewards/rejected": -4.703423976898193, + "step": 1058 + }, + { + "epoch": 0.67, + "learning_rate": 2.5268791744484864e-08, + "logits/chosen": -3.2929773330688477, + "logits/rejected": -3.1331627368927, + "logps/chosen": -233.8415985107422, + "logps/rejected": -472.8952331542969, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.684790015220642, + "rewards/margins": 3.7217819690704346, + "rewards/rejected": -2.036991834640503, + "step": 1059 + }, + { + "epoch": 0.68, + "learning_rate": 2.517908847794889e-08, + "logits/chosen": -3.2114152908325195, + "logits/rejected": -3.161473274230957, + "logps/chosen": -277.2535400390625, + "logps/rejected": -1230.0938720703125, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6833863258361816, + "rewards/margins": 7.206576824188232, + "rewards/rejected": -5.523190498352051, + "step": 1060 + }, + { + "epoch": 0.68, + "learning_rate": 2.508949110237385e-08, + "logits/chosen": -3.2401199340820312, + "logits/rejected": -3.128061294555664, + "logps/chosen": -272.87640380859375, + "logps/rejected": -405.84295654296875, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4837219715118408, + "rewards/margins": 3.603714942932129, + "rewards/rejected": -2.119992971420288, + "step": 1061 + }, + { + "epoch": 0.68, + "learning_rate": 2.500000000000001e-08, + "logits/chosen": -3.226944923400879, + "logits/rejected": -3.139995574951172, + "logps/chosen": -290.9541015625, + "logps/rejected": -636.8284301757812, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4763389825820923, + "rewards/margins": 4.400188446044922, + "rewards/rejected": -2.923849582672119, + "step": 1062 + }, + { + "epoch": 0.68, + "learning_rate": 2.4910615552614238e-08, + "logits/chosen": -3.279068946838379, + "logits/rejected": -3.150547504425049, + "logps/chosen": -261.13055419921875, + "logps/rejected": -757.0240478515625, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.441218614578247, + "rewards/margins": 6.338683128356934, + "rewards/rejected": -4.897464275360107, + "step": 1063 + }, + { + "epoch": 0.68, + "learning_rate": 2.4821338141548466e-08, + "logits/chosen": -3.2968008518218994, + "logits/rejected": -3.169801712036133, + "logps/chosen": -242.77000427246094, + "logps/rejected": -607.9412231445312, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2289215326309204, + "rewards/margins": 4.625035285949707, + "rewards/rejected": -3.396113634109497, + "step": 1064 + }, + { + "epoch": 0.68, + "learning_rate": 2.4732168147677925e-08, + "logits/chosen": -3.3317975997924805, + "logits/rejected": -3.133476495742798, + "logps/chosen": -265.6158752441406, + "logps/rejected": -321.23248291015625, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4034240245819092, + "rewards/margins": 2.941098213195801, + "rewards/rejected": -1.5376739501953125, + "step": 1065 + }, + { + "epoch": 0.68, + "learning_rate": 2.4643105951419592e-08, + "logits/chosen": -3.2889294624328613, + "logits/rejected": -3.1256628036499023, + "logps/chosen": -250.7743682861328, + "logps/rejected": -531.5292358398438, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1944580078125, + "rewards/margins": 4.083776950836182, + "rewards/rejected": -2.8893189430236816, + "step": 1066 + }, + { + "epoch": 0.68, + "learning_rate": 2.455415193273055e-08, + "logits/chosen": -3.2577667236328125, + "logits/rejected": -3.070207118988037, + "logps/chosen": -237.3124237060547, + "logps/rejected": -366.0386657714844, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3393852710723877, + "rewards/margins": 2.7176260948181152, + "rewards/rejected": -1.378240942955017, + "step": 1067 + }, + { + "epoch": 0.68, + "learning_rate": 2.4465306471106457e-08, + "logits/chosen": -3.2678701877593994, + "logits/rejected": -3.177614450454712, + "logps/chosen": -285.1624450683594, + "logps/rejected": -566.077880859375, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.448114037513733, + "rewards/margins": 4.339879035949707, + "rewards/rejected": -2.8917648792266846, + "step": 1068 + }, + { + "epoch": 0.68, + "learning_rate": 2.437656994557969e-08, + "logits/chosen": -3.2552809715270996, + "logits/rejected": -3.0943667888641357, + "logps/chosen": -251.80426025390625, + "logps/rejected": -1062.94970703125, + "loss": 0.2681, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3475990295410156, + "rewards/margins": 6.14372444152832, + "rewards/rejected": -4.796125888824463, + "step": 1069 + }, + { + "epoch": 0.68, + "learning_rate": 2.4287942734718043e-08, + "logits/chosen": -3.2215778827667236, + "logits/rejected": -3.007298469543457, + "logps/chosen": -234.8343505859375, + "logps/rejected": -364.3463439941406, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3834015130996704, + "rewards/margins": 3.2098419666290283, + "rewards/rejected": -1.826440453529358, + "step": 1070 + }, + { + "epoch": 0.68, + "learning_rate": 2.419942521662285e-08, + "logits/chosen": -3.2346906661987305, + "logits/rejected": -3.145440101623535, + "logps/chosen": -251.9417266845703, + "logps/rejected": -425.38897705078125, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3480942249298096, + "rewards/margins": 3.370295763015747, + "rewards/rejected": -2.0222015380859375, + "step": 1071 + }, + { + "epoch": 0.68, + "learning_rate": 2.411101776892749e-08, + "logits/chosen": -3.183957099914551, + "logits/rejected": -3.0808329582214355, + "logps/chosen": -273.02490234375, + "logps/rejected": -386.663818359375, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.465467929840088, + "rewards/margins": 3.407745361328125, + "rewards/rejected": -1.9422776699066162, + "step": 1072 + }, + { + "epoch": 0.68, + "learning_rate": 2.402272076879583e-08, + "logits/chosen": -3.3104004859924316, + "logits/rejected": -2.930398941040039, + "logps/chosen": -282.2784118652344, + "logps/rejected": -1664.32568359375, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.289149522781372, + "rewards/margins": 7.399439811706543, + "rewards/rejected": -6.11029052734375, + "step": 1073 + }, + { + "epoch": 0.68, + "learning_rate": 2.3934534592920412e-08, + "logits/chosen": -3.2531471252441406, + "logits/rejected": -3.112548351287842, + "logps/chosen": -292.6285095214844, + "logps/rejected": -1104.441650390625, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.39459228515625, + "rewards/margins": 7.009783744812012, + "rewards/rejected": -5.615191459655762, + "step": 1074 + }, + { + "epoch": 0.69, + "learning_rate": 2.3846459617521126e-08, + "logits/chosen": -3.223662853240967, + "logits/rejected": -3.2119123935699463, + "logps/chosen": -241.4521942138672, + "logps/rejected": -923.9464111328125, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.499169945716858, + "rewards/margins": 6.655145645141602, + "rewards/rejected": -5.155975341796875, + "step": 1075 + }, + { + "epoch": 0.69, + "learning_rate": 2.3758496218343355e-08, + "logits/chosen": -3.253541946411133, + "logits/rejected": -3.107827663421631, + "logps/chosen": -273.8805236816406, + "logps/rejected": -518.2312622070312, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3085618019104004, + "rewards/margins": 4.435194492340088, + "rewards/rejected": -3.1266326904296875, + "step": 1076 + }, + { + "epoch": 0.69, + "learning_rate": 2.3670644770656515e-08, + "logits/chosen": -3.215775966644287, + "logits/rejected": -3.0420870780944824, + "logps/chosen": -240.15390014648438, + "logps/rejected": -733.4999389648438, + "loss": 0.3043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3717209100723267, + "rewards/margins": 4.583348274230957, + "rewards/rejected": -3.211627244949341, + "step": 1077 + }, + { + "epoch": 0.69, + "learning_rate": 2.3582905649252405e-08, + "logits/chosen": -3.282747268676758, + "logits/rejected": -3.1807613372802734, + "logps/chosen": -259.18701171875, + "logps/rejected": -764.2049560546875, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5343270301818848, + "rewards/margins": 5.4334282875061035, + "rewards/rejected": -3.8991012573242188, + "step": 1078 + }, + { + "epoch": 0.69, + "learning_rate": 2.3495279228443644e-08, + "logits/chosen": -3.2062034606933594, + "logits/rejected": -3.086442470550537, + "logps/chosen": -247.55494689941406, + "logps/rejected": -563.898681640625, + "loss": 0.3096, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3887526988983154, + "rewards/margins": 4.714224338531494, + "rewards/rejected": -3.3254716396331787, + "step": 1079 + }, + { + "epoch": 0.69, + "learning_rate": 2.340776588206202e-08, + "logits/chosen": -3.27754545211792, + "logits/rejected": -3.1808576583862305, + "logps/chosen": -259.427490234375, + "logps/rejected": -832.4898681640625, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4864708185195923, + "rewards/margins": 6.570171356201172, + "rewards/rejected": -5.083700656890869, + "step": 1080 + }, + { + "epoch": 0.69, + "learning_rate": 2.3320365983456918e-08, + "logits/chosen": -3.182054281234741, + "logits/rejected": -3.1501519680023193, + "logps/chosen": -240.7193145751953, + "logps/rejected": -563.806884765625, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3309662342071533, + "rewards/margins": 4.640625, + "rewards/rejected": -3.309659004211426, + "step": 1081 + }, + { + "epoch": 0.69, + "learning_rate": 2.3233079905493747e-08, + "logits/chosen": -3.2544307708740234, + "logits/rejected": -3.120896339416504, + "logps/chosen": -280.874755859375, + "logps/rejected": -1227.506103515625, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0882400274276733, + "rewards/margins": 6.784975051879883, + "rewards/rejected": -5.69673490524292, + "step": 1082 + }, + { + "epoch": 0.69, + "learning_rate": 2.314590802055232e-08, + "logits/chosen": -3.1905882358551025, + "logits/rejected": -3.1104376316070557, + "logps/chosen": -274.76287841796875, + "logps/rejected": -550.3428344726562, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3426307439804077, + "rewards/margins": 3.8459556102752686, + "rewards/rejected": -2.5033249855041504, + "step": 1083 + }, + { + "epoch": 0.69, + "learning_rate": 2.305885070052534e-08, + "logits/chosen": -3.2224082946777344, + "logits/rejected": -3.036011219024658, + "logps/chosen": -277.1987609863281, + "logps/rejected": -1765.099609375, + "loss": 0.4143, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4704300165176392, + "rewards/margins": 9.152641296386719, + "rewards/rejected": -7.682211399078369, + "step": 1084 + }, + { + "epoch": 0.69, + "learning_rate": 2.2971908316816636e-08, + "logits/chosen": -3.2168126106262207, + "logits/rejected": -3.034311294555664, + "logps/chosen": -273.05670166015625, + "logps/rejected": -1693.152587890625, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5714492797851562, + "rewards/margins": 9.840909957885742, + "rewards/rejected": -8.269460678100586, + "step": 1085 + }, + { + "epoch": 0.69, + "learning_rate": 2.288508124033981e-08, + "logits/chosen": -3.294748067855835, + "logits/rejected": -3.1396384239196777, + "logps/chosen": -245.79156494140625, + "logps/rejected": -421.78851318359375, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6421539783477783, + "rewards/margins": 3.6845169067382812, + "rewards/rejected": -2.042362928390503, + "step": 1086 + }, + { + "epoch": 0.69, + "learning_rate": 2.2798369841516485e-08, + "logits/chosen": -3.3116984367370605, + "logits/rejected": -3.2104175090789795, + "logps/chosen": -269.2357482910156, + "logps/rejected": -432.22943115234375, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3307815790176392, + "rewards/margins": 3.9363250732421875, + "rewards/rejected": -2.605543613433838, + "step": 1087 + }, + { + "epoch": 0.69, + "learning_rate": 2.2711774490274766e-08, + "logits/chosen": -3.2737302780151367, + "logits/rejected": -3.1743154525756836, + "logps/chosen": -296.9239807128906, + "logps/rejected": -505.91461181640625, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.428929090499878, + "rewards/margins": 4.339411735534668, + "rewards/rejected": -2.910482883453369, + "step": 1088 + }, + { + "epoch": 0.69, + "learning_rate": 2.2625295556047736e-08, + "logits/chosen": -3.210817575454712, + "logits/rejected": -3.1417882442474365, + "logps/chosen": -249.18154907226562, + "logps/rejected": -3446.1865234375, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.300305962562561, + "rewards/margins": 8.331541061401367, + "rewards/rejected": -7.0312347412109375, + "step": 1089 + }, + { + "epoch": 0.69, + "learning_rate": 2.2538933407771765e-08, + "logits/chosen": -3.203826427459717, + "logits/rejected": -3.1089627742767334, + "logps/chosen": -263.42694091796875, + "logps/rejected": -440.150634765625, + "loss": 0.2678, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3628737926483154, + "rewards/margins": 3.9720613956451416, + "rewards/rejected": -2.609187364578247, + "step": 1090 + }, + { + "epoch": 0.7, + "learning_rate": 2.2452688413885013e-08, + "logits/chosen": -3.2463603019714355, + "logits/rejected": -3.108032703399658, + "logps/chosen": -231.5072021484375, + "logps/rejected": -437.3611145019531, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.151892900466919, + "rewards/margins": 3.284600257873535, + "rewards/rejected": -2.132707118988037, + "step": 1091 + }, + { + "epoch": 0.7, + "learning_rate": 2.236656094232583e-08, + "logits/chosen": -3.267301321029663, + "logits/rejected": -3.1469149589538574, + "logps/chosen": -245.8528289794922, + "logps/rejected": -498.38824462890625, + "loss": 0.2697, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1413331031799316, + "rewards/margins": 3.8932647705078125, + "rewards/rejected": -2.751931667327881, + "step": 1092 + }, + { + "epoch": 0.7, + "learning_rate": 2.228055136053117e-08, + "logits/chosen": -3.234609603881836, + "logits/rejected": -3.1352081298828125, + "logps/chosen": -231.99734497070312, + "logps/rejected": -491.5348815917969, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3605103492736816, + "rewards/margins": 4.47136116027832, + "rewards/rejected": -3.1108505725860596, + "step": 1093 + }, + { + "epoch": 0.7, + "learning_rate": 2.2194660035435116e-08, + "logits/chosen": -3.2487711906433105, + "logits/rejected": -3.1416501998901367, + "logps/chosen": -280.25689697265625, + "logps/rejected": -491.8830261230469, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4273972511291504, + "rewards/margins": 3.994246006011963, + "rewards/rejected": -2.5668487548828125, + "step": 1094 + }, + { + "epoch": 0.7, + "learning_rate": 2.210888733346717e-08, + "logits/chosen": -3.2390689849853516, + "logits/rejected": -3.10085129737854, + "logps/chosen": -258.24530029296875, + "logps/rejected": -285.8058166503906, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.696781873703003, + "rewards/margins": 2.9462761878967285, + "rewards/rejected": -1.249494194984436, + "step": 1095 + }, + { + "epoch": 0.7, + "learning_rate": 2.2023233620550796e-08, + "logits/chosen": -3.2476630210876465, + "logits/rejected": -3.155107021331787, + "logps/chosen": -254.05328369140625, + "logps/rejected": -665.0140380859375, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2068564891815186, + "rewards/margins": 4.3140740394592285, + "rewards/rejected": -3.107217311859131, + "step": 1096 + }, + { + "epoch": 0.7, + "learning_rate": 2.193769926210181e-08, + "logits/chosen": -3.2749099731445312, + "logits/rejected": -3.1077184677124023, + "logps/chosen": -266.4297180175781, + "logps/rejected": -347.779296875, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.431531548500061, + "rewards/margins": 3.2526144981384277, + "rewards/rejected": -1.8210830688476562, + "step": 1097 + }, + { + "epoch": 0.7, + "learning_rate": 2.1852284623026902e-08, + "logits/chosen": -3.2044081687927246, + "logits/rejected": -3.019416332244873, + "logps/chosen": -257.60205078125, + "logps/rejected": -547.885498046875, + "loss": 0.2985, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.770166039466858, + "rewards/margins": 4.841705322265625, + "rewards/rejected": -3.0715394020080566, + "step": 1098 + }, + { + "epoch": 0.7, + "learning_rate": 2.176699006772189e-08, + "logits/chosen": -3.2328481674194336, + "logits/rejected": -3.1543264389038086, + "logps/chosen": -281.44354248046875, + "logps/rejected": -801.032958984375, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.327056884765625, + "rewards/margins": 5.192126750946045, + "rewards/rejected": -3.86506986618042, + "step": 1099 + }, + { + "epoch": 0.7, + "learning_rate": 2.168181596007042e-08, + "logits/chosen": -3.237865924835205, + "logits/rejected": -3.1724371910095215, + "logps/chosen": -232.35296630859375, + "logps/rejected": -881.7750244140625, + "loss": 0.2856, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1940056085586548, + "rewards/margins": 6.254699230194092, + "rewards/rejected": -5.060693740844727, + "step": 1100 + }, + { + "epoch": 0.7, + "learning_rate": 2.1596762663442214e-08, + "logits/chosen": -3.1952648162841797, + "logits/rejected": -3.0818183422088623, + "logps/chosen": -296.8883056640625, + "logps/rejected": -1297.88916015625, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3195037841796875, + "rewards/margins": 7.052508354187012, + "rewards/rejected": -5.733005046844482, + "step": 1101 + }, + { + "epoch": 0.7, + "learning_rate": 2.151183054069159e-08, + "logits/chosen": -3.214306354522705, + "logits/rejected": -3.163227081298828, + "logps/chosen": -282.12408447265625, + "logps/rejected": -857.678955078125, + "loss": 0.2799, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.436579942703247, + "rewards/margins": 5.50124979019165, + "rewards/rejected": -4.064669609069824, + "step": 1102 + }, + { + "epoch": 0.7, + "learning_rate": 2.1427019954155977e-08, + "logits/chosen": -3.248995780944824, + "logits/rejected": -3.1457571983337402, + "logps/chosen": -249.02503967285156, + "logps/rejected": -687.0505981445312, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4215484857559204, + "rewards/margins": 5.044363021850586, + "rewards/rejected": -3.622814893722534, + "step": 1103 + }, + { + "epoch": 0.7, + "learning_rate": 2.134233126565419e-08, + "logits/chosen": -3.171994686126709, + "logits/rejected": -3.0180840492248535, + "logps/chosen": -243.62957763671875, + "logps/rejected": -834.6697998046875, + "loss": 0.259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3172919750213623, + "rewards/margins": 4.964304447174072, + "rewards/rejected": -3.647012233734131, + "step": 1104 + }, + { + "epoch": 0.7, + "learning_rate": 2.1257764836485126e-08, + "logits/chosen": -3.2058451175689697, + "logits/rejected": -3.1008481979370117, + "logps/chosen": -256.6712341308594, + "logps/rejected": -535.75341796875, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6692932844161987, + "rewards/margins": 4.111065864562988, + "rewards/rejected": -2.4417724609375, + "step": 1105 + }, + { + "epoch": 0.7, + "learning_rate": 2.1173321027426024e-08, + "logits/chosen": -3.2854037284851074, + "logits/rejected": -3.2060959339141846, + "logps/chosen": -231.098388671875, + "logps/rejected": -689.370361328125, + "loss": 0.2794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1328438520431519, + "rewards/margins": 5.310889720916748, + "rewards/rejected": -4.178045749664307, + "step": 1106 + }, + { + "epoch": 0.71, + "learning_rate": 2.1089000198731027e-08, + "logits/chosen": -3.176657199859619, + "logits/rejected": -3.1540985107421875, + "logps/chosen": -273.5524597167969, + "logps/rejected": -1072.25634765625, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4762864112854004, + "rewards/margins": 7.526982307434082, + "rewards/rejected": -6.050695896148682, + "step": 1107 + }, + { + "epoch": 0.71, + "learning_rate": 2.1004802710129594e-08, + "logits/chosen": -3.279965877532959, + "logits/rejected": -3.066880702972412, + "logps/chosen": -279.919677734375, + "logps/rejected": -1318.8563232421875, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6810767650604248, + "rewards/margins": 7.429403305053711, + "rewards/rejected": -5.748326301574707, + "step": 1108 + }, + { + "epoch": 0.71, + "learning_rate": 2.0920728920825043e-08, + "logits/chosen": -3.2338991165161133, + "logits/rejected": -3.1558878421783447, + "logps/chosen": -256.005615234375, + "logps/rejected": -702.5715942382812, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5095771551132202, + "rewards/margins": 5.937969207763672, + "rewards/rejected": -4.428391933441162, + "step": 1109 + }, + { + "epoch": 0.71, + "learning_rate": 2.083677918949292e-08, + "logits/chosen": -3.258871078491211, + "logits/rejected": -3.2118701934814453, + "logps/chosen": -276.7382507324219, + "logps/rejected": -840.1412963867188, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.365814208984375, + "rewards/margins": 4.948458671569824, + "rewards/rejected": -3.5826447010040283, + "step": 1110 + }, + { + "epoch": 0.71, + "learning_rate": 2.075295387427951e-08, + "logits/chosen": -3.2234745025634766, + "logits/rejected": -3.1056087017059326, + "logps/chosen": -290.0656433105469, + "logps/rejected": -717.8157958984375, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4875214099884033, + "rewards/margins": 5.0093560218811035, + "rewards/rejected": -3.5218346118927, + "step": 1111 + }, + { + "epoch": 0.71, + "learning_rate": 2.066925333280039e-08, + "logits/chosen": -3.2346577644348145, + "logits/rejected": -3.093306064605713, + "logps/chosen": -241.0120849609375, + "logps/rejected": -695.227783203125, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3001716136932373, + "rewards/margins": 5.417125701904297, + "rewards/rejected": -4.1169538497924805, + "step": 1112 + }, + { + "epoch": 0.71, + "learning_rate": 2.0585677922138693e-08, + "logits/chosen": -3.165544271469116, + "logits/rejected": -3.071711778640747, + "logps/chosen": -266.2682800292969, + "logps/rejected": -367.34228515625, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.592976450920105, + "rewards/margins": 3.4728331565856934, + "rewards/rejected": -1.879856824874878, + "step": 1113 + }, + { + "epoch": 0.71, + "learning_rate": 2.050222799884387e-08, + "logits/chosen": -3.269646167755127, + "logits/rejected": -3.1465036869049072, + "logps/chosen": -262.8902587890625, + "logps/rejected": -792.046630859375, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4131790399551392, + "rewards/margins": 5.724562168121338, + "rewards/rejected": -4.311383247375488, + "step": 1114 + }, + { + "epoch": 0.71, + "learning_rate": 2.0418903918929876e-08, + "logits/chosen": -3.236546516418457, + "logits/rejected": -3.1135807037353516, + "logps/chosen": -261.09564208984375, + "logps/rejected": -545.0960083007812, + "loss": 0.3307, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8392425775527954, + "rewards/margins": 5.233538627624512, + "rewards/rejected": -3.394296169281006, + "step": 1115 + }, + { + "epoch": 0.71, + "learning_rate": 2.0335706037873907e-08, + "logits/chosen": -3.3146910667419434, + "logits/rejected": -3.1123595237731934, + "logps/chosen": -249.48291015625, + "logps/rejected": -597.8258056640625, + "loss": 0.2773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5601669549942017, + "rewards/margins": 4.57229471206665, + "rewards/rejected": -3.0121278762817383, + "step": 1116 + }, + { + "epoch": 0.71, + "learning_rate": 2.0252634710614706e-08, + "logits/chosen": -3.228695869445801, + "logits/rejected": -3.1201446056365967, + "logps/chosen": -270.64154052734375, + "logps/rejected": -1354.737548828125, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6551895141601562, + "rewards/margins": 7.742384910583496, + "rewards/rejected": -6.087194919586182, + "step": 1117 + }, + { + "epoch": 0.71, + "learning_rate": 2.016969029155112e-08, + "logits/chosen": -3.3028573989868164, + "logits/rejected": -3.1494221687316895, + "logps/chosen": -269.23785400390625, + "logps/rejected": -505.6056823730469, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5144431591033936, + "rewards/margins": 4.271668434143066, + "rewards/rejected": -2.7572250366210938, + "step": 1118 + }, + { + "epoch": 0.71, + "learning_rate": 2.008687313454062e-08, + "logits/chosen": -3.2515268325805664, + "logits/rejected": -3.203207015991211, + "logps/chosen": -271.6136474609375, + "logps/rejected": -512.4083251953125, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4246238470077515, + "rewards/margins": 4.415183067321777, + "rewards/rejected": -2.9905595779418945, + "step": 1119 + }, + { + "epoch": 0.71, + "learning_rate": 2.0004183592897712e-08, + "logits/chosen": -3.1751179695129395, + "logits/rejected": -3.1112067699432373, + "logps/chosen": -290.74029541015625, + "logps/rejected": -495.9465637207031, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3213669061660767, + "rewards/margins": 3.657789468765259, + "rewards/rejected": -2.3364226818084717, + "step": 1120 + }, + { + "epoch": 0.71, + "learning_rate": 1.9921622019392465e-08, + "logits/chosen": -3.282064437866211, + "logits/rejected": -3.185620069503784, + "logps/chosen": -226.572265625, + "logps/rejected": -632.2816162109375, + "loss": 0.2767, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3105636835098267, + "rewards/margins": 5.162736415863037, + "rewards/rejected": -3.8521728515625, + "step": 1121 + }, + { + "epoch": 0.72, + "learning_rate": 1.983918876624902e-08, + "logits/chosen": -3.2590928077697754, + "logits/rejected": -3.049712657928467, + "logps/chosen": -252.2277374267578, + "logps/rejected": -1612.9351806640625, + "loss": 0.274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.594477891921997, + "rewards/margins": 9.479230880737305, + "rewards/rejected": -7.884753227233887, + "step": 1122 + }, + { + "epoch": 0.72, + "learning_rate": 1.9756884185144123e-08, + "logits/chosen": -3.282742500305176, + "logits/rejected": -3.1713716983795166, + "logps/chosen": -228.92190551757812, + "logps/rejected": -343.3427734375, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4932037591934204, + "rewards/margins": 3.444319248199463, + "rewards/rejected": -1.951115369796753, + "step": 1123 + }, + { + "epoch": 0.72, + "learning_rate": 1.9674708627205483e-08, + "logits/chosen": -3.232823371887207, + "logits/rejected": -3.0639772415161133, + "logps/chosen": -244.74832153320312, + "logps/rejected": -738.7805786132812, + "loss": 0.279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3161277770996094, + "rewards/margins": 5.267406463623047, + "rewards/rejected": -3.9512786865234375, + "step": 1124 + }, + { + "epoch": 0.72, + "learning_rate": 1.9592662443010467e-08, + "logits/chosen": -3.2333569526672363, + "logits/rejected": -3.0862317085266113, + "logps/chosen": -289.530517578125, + "logps/rejected": -240.6334686279297, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5326858758926392, + "rewards/margins": 2.592062473297119, + "rewards/rejected": -1.0593764781951904, + "step": 1125 + }, + { + "epoch": 0.72, + "learning_rate": 1.9510745982584454e-08, + "logits/chosen": -3.220444679260254, + "logits/rejected": -3.1428637504577637, + "logps/chosen": -280.28289794921875, + "logps/rejected": -570.5428466796875, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1638717651367188, + "rewards/margins": 4.401350498199463, + "rewards/rejected": -3.237478733062744, + "step": 1126 + }, + { + "epoch": 0.72, + "learning_rate": 1.9428959595399385e-08, + "logits/chosen": -3.241265296936035, + "logits/rejected": -3.165001392364502, + "logps/chosen": -261.2923583984375, + "logps/rejected": -931.5439453125, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.372441053390503, + "rewards/margins": 5.806549072265625, + "rewards/rejected": -4.434107780456543, + "step": 1127 + }, + { + "epoch": 0.72, + "learning_rate": 1.934730363037237e-08, + "logits/chosen": -3.221968650817871, + "logits/rejected": -3.1307425498962402, + "logps/chosen": -233.3523712158203, + "logps/rejected": -674.1755981445312, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.258704423904419, + "rewards/margins": 5.049221038818359, + "rewards/rejected": -3.7905166149139404, + "step": 1128 + }, + { + "epoch": 0.72, + "learning_rate": 1.9265778435863965e-08, + "logits/chosen": -3.2204017639160156, + "logits/rejected": -3.1350903511047363, + "logps/chosen": -250.21826171875, + "logps/rejected": -1721.8948974609375, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1350151300430298, + "rewards/margins": 7.464427947998047, + "rewards/rejected": -6.329412937164307, + "step": 1129 + }, + { + "epoch": 0.72, + "learning_rate": 1.9184384359676985e-08, + "logits/chosen": -3.209545135498047, + "logits/rejected": -3.1485595703125, + "logps/chosen": -299.53973388671875, + "logps/rejected": -386.49957275390625, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.299176812171936, + "rewards/margins": 3.513896942138672, + "rewards/rejected": -2.2147202491760254, + "step": 1130 + }, + { + "epoch": 0.72, + "learning_rate": 1.9103121749054768e-08, + "logits/chosen": -3.2004051208496094, + "logits/rejected": -3.1530990600585938, + "logps/chosen": -236.5009765625, + "logps/rejected": -708.8785400390625, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.290259599685669, + "rewards/margins": 4.93733024597168, + "rewards/rejected": -3.6470704078674316, + "step": 1131 + }, + { + "epoch": 0.72, + "learning_rate": 1.9021990950679823e-08, + "logits/chosen": -3.2559690475463867, + "logits/rejected": -3.1802568435668945, + "logps/chosen": -224.6979522705078, + "logps/rejected": -1363.86962890625, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.269801378250122, + "rewards/margins": 8.26820182800293, + "rewards/rejected": -6.998400688171387, + "step": 1132 + }, + { + "epoch": 0.72, + "learning_rate": 1.8940992310672317e-08, + "logits/chosen": -3.206540584564209, + "logits/rejected": -3.106659412384033, + "logps/chosen": -251.65423583984375, + "logps/rejected": -758.1539306640625, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3382766246795654, + "rewards/margins": 5.308290481567383, + "rewards/rejected": -3.9700136184692383, + "step": 1133 + }, + { + "epoch": 0.72, + "learning_rate": 1.8860126174588636e-08, + "logits/chosen": -3.18690824508667, + "logits/rejected": -3.1000256538391113, + "logps/chosen": -269.82733154296875, + "logps/rejected": -1339.9857177734375, + "loss": 0.265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7301833629608154, + "rewards/margins": 9.09779167175293, + "rewards/rejected": -7.367609024047852, + "step": 1134 + }, + { + "epoch": 0.72, + "learning_rate": 1.8779392887419844e-08, + "logits/chosen": -3.2617907524108887, + "logits/rejected": -3.1361265182495117, + "logps/chosen": -236.45849609375, + "logps/rejected": -640.5264892578125, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.320375919342041, + "rewards/margins": 4.765161991119385, + "rewards/rejected": -3.4447860717773438, + "step": 1135 + }, + { + "epoch": 0.72, + "learning_rate": 1.8698792793590235e-08, + "logits/chosen": -3.2203097343444824, + "logits/rejected": -3.1953439712524414, + "logps/chosen": -233.26805114746094, + "logps/rejected": -624.8253173828125, + "loss": 0.2834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3601486682891846, + "rewards/margins": 5.102067947387695, + "rewards/rejected": -3.7419190406799316, + "step": 1136 + }, + { + "epoch": 0.72, + "learning_rate": 1.8618326236955905e-08, + "logits/chosen": -3.218930244445801, + "logits/rejected": -3.1166484355926514, + "logps/chosen": -294.72283935546875, + "logps/rejected": -691.902587890625, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3673386573791504, + "rewards/margins": 4.2641801834106445, + "rewards/rejected": -2.896841526031494, + "step": 1137 + }, + { + "epoch": 0.73, + "learning_rate": 1.853799356080322e-08, + "logits/chosen": -3.2488789558410645, + "logits/rejected": -3.0931785106658936, + "logps/chosen": -258.47552490234375, + "logps/rejected": -712.4779052734375, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2421562671661377, + "rewards/margins": 4.871510982513428, + "rewards/rejected": -3.629354953765869, + "step": 1138 + }, + { + "epoch": 0.73, + "learning_rate": 1.8457795107847434e-08, + "logits/chosen": -3.2228810787200928, + "logits/rejected": -3.1336169242858887, + "logps/chosen": -285.17034912109375, + "logps/rejected": -746.9525146484375, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5900589227676392, + "rewards/margins": 6.162535190582275, + "rewards/rejected": -4.572476387023926, + "step": 1139 + }, + { + "epoch": 0.73, + "learning_rate": 1.837773122023114e-08, + "logits/chosen": -3.3175251483917236, + "logits/rejected": -3.1989614963531494, + "logps/chosen": -301.65081787109375, + "logps/rejected": -587.4364013671875, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3673293590545654, + "rewards/margins": 5.370251655578613, + "rewards/rejected": -4.002922058105469, + "step": 1140 + }, + { + "epoch": 0.73, + "learning_rate": 1.8297802239522848e-08, + "logits/chosen": -3.306105613708496, + "logits/rejected": -3.1385340690612793, + "logps/chosen": -248.207763671875, + "logps/rejected": -279.8619384765625, + "loss": 0.2761, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.543036699295044, + "rewards/margins": 3.0850839614868164, + "rewards/rejected": -1.542047142982483, + "step": 1141 + }, + { + "epoch": 0.73, + "learning_rate": 1.8218008506715544e-08, + "logits/chosen": -3.2398858070373535, + "logits/rejected": -3.096595525741577, + "logps/chosen": -272.7388000488281, + "logps/rejected": -778.4338989257812, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3710198402404785, + "rewards/margins": 5.03791618347168, + "rewards/rejected": -3.666896104812622, + "step": 1142 + }, + { + "epoch": 0.73, + "learning_rate": 1.8138350362225192e-08, + "logits/chosen": -3.202819347381592, + "logits/rejected": -3.1843457221984863, + "logps/chosen": -270.606201171875, + "logps/rejected": -1183.334716796875, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6537787914276123, + "rewards/margins": 8.23042106628418, + "rewards/rejected": -6.576642036437988, + "step": 1143 + }, + { + "epoch": 0.73, + "learning_rate": 1.8058828145889366e-08, + "logits/chosen": -3.238905429840088, + "logits/rejected": -3.1498074531555176, + "logps/chosen": -255.67784118652344, + "logps/rejected": -294.7753601074219, + "loss": 0.2888, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4992492198944092, + "rewards/margins": 3.2549126148223877, + "rewards/rejected": -1.7556633949279785, + "step": 1144 + }, + { + "epoch": 0.73, + "learning_rate": 1.7979442196965687e-08, + "logits/chosen": -3.2586934566497803, + "logits/rejected": -3.0817039012908936, + "logps/chosen": -227.28488159179688, + "logps/rejected": -588.29052734375, + "loss": 0.2608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5986747741699219, + "rewards/margins": 4.715638160705566, + "rewards/rejected": -3.1169631481170654, + "step": 1145 + }, + { + "epoch": 0.73, + "learning_rate": 1.7900192854130464e-08, + "logits/chosen": -3.2000882625579834, + "logits/rejected": -3.1032497882843018, + "logps/chosen": -255.67611694335938, + "logps/rejected": -361.5417175292969, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.336035132408142, + "rewards/margins": 3.2158355712890625, + "rewards/rejected": -1.8798004388809204, + "step": 1146 + }, + { + "epoch": 0.73, + "learning_rate": 1.7821080455477188e-08, + "logits/chosen": -3.3015666007995605, + "logits/rejected": -3.145040988922119, + "logps/chosen": -241.61106872558594, + "logps/rejected": -683.2824096679688, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.521763563156128, + "rewards/margins": 5.467074394226074, + "rewards/rejected": -3.9453110694885254, + "step": 1147 + }, + { + "epoch": 0.73, + "learning_rate": 1.7742105338515134e-08, + "logits/chosen": -3.298161029815674, + "logits/rejected": -3.192351818084717, + "logps/chosen": -233.20123291015625, + "logps/rejected": -697.605224609375, + "loss": 0.2672, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.519239902496338, + "rewards/margins": 5.657121658325195, + "rewards/rejected": -4.137881278991699, + "step": 1148 + }, + { + "epoch": 0.73, + "learning_rate": 1.7663267840167934e-08, + "logits/chosen": -3.2046306133270264, + "logits/rejected": -3.1045737266540527, + "logps/chosen": -250.58291625976562, + "logps/rejected": -408.28662109375, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3299720287322998, + "rewards/margins": 3.7146825790405273, + "rewards/rejected": -2.3847107887268066, + "step": 1149 + }, + { + "epoch": 0.73, + "learning_rate": 1.7584568296772075e-08, + "logits/chosen": -3.225717544555664, + "logits/rejected": -3.1314139366149902, + "logps/chosen": -266.6753845214844, + "logps/rejected": -523.41650390625, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3528587818145752, + "rewards/margins": 4.34972620010376, + "rewards/rejected": -2.9968674182891846, + "step": 1150 + }, + { + "epoch": 0.73, + "learning_rate": 1.7506007044075505e-08, + "logits/chosen": -3.2366886138916016, + "logits/rejected": -3.08848237991333, + "logps/chosen": -266.82763671875, + "logps/rejected": -685.2903442382812, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6229064464569092, + "rewards/margins": 5.062186241149902, + "rewards/rejected": -3.439279317855835, + "step": 1151 + }, + { + "epoch": 0.73, + "learning_rate": 1.742758441723619e-08, + "logits/chosen": -3.213723659515381, + "logits/rejected": -3.0666399002075195, + "logps/chosen": -256.31353759765625, + "logps/rejected": -282.59027099609375, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4452118873596191, + "rewards/margins": 2.884204864501953, + "rewards/rejected": -1.4389930963516235, + "step": 1152 + }, + { + "epoch": 0.73, + "learning_rate": 1.7349300750820756e-08, + "logits/chosen": -3.263113021850586, + "logits/rejected": -3.149796485900879, + "logps/chosen": -266.4593200683594, + "logps/rejected": -560.768798828125, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0973281860351562, + "rewards/margins": 4.3531999588012695, + "rewards/rejected": -3.2558717727661133, + "step": 1153 + }, + { + "epoch": 0.74, + "learning_rate": 1.727115637880287e-08, + "logits/chosen": -3.228640079498291, + "logits/rejected": -3.1311874389648438, + "logps/chosen": -233.3045654296875, + "logps/rejected": -803.1239624023438, + "loss": 0.2681, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4065101146697998, + "rewards/margins": 5.647761344909668, + "rewards/rejected": -4.241250991821289, + "step": 1154 + }, + { + "epoch": 0.74, + "learning_rate": 1.719315163456207e-08, + "logits/chosen": -3.303170680999756, + "logits/rejected": -3.188279867172241, + "logps/chosen": -234.33584594726562, + "logps/rejected": -351.09979248046875, + "loss": 0.2541, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.406059980392456, + "rewards/margins": 3.507901906967163, + "rewards/rejected": -2.101841688156128, + "step": 1155 + }, + { + "epoch": 0.74, + "learning_rate": 1.711528685088214e-08, + "logits/chosen": -3.0838634967803955, + "logits/rejected": -3.1175122261047363, + "logps/chosen": -281.01513671875, + "logps/rejected": -729.3819580078125, + "loss": 0.2863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.454829454421997, + "rewards/margins": 5.768004417419434, + "rewards/rejected": -4.313174724578857, + "step": 1156 + }, + { + "epoch": 0.74, + "learning_rate": 1.703756235994977e-08, + "logits/chosen": -3.298799991607666, + "logits/rejected": -3.143080472946167, + "logps/chosen": -259.9078369140625, + "logps/rejected": -388.341064453125, + "loss": 0.3043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5219597816467285, + "rewards/margins": 3.397721290588379, + "rewards/rejected": -1.8757615089416504, + "step": 1157 + }, + { + "epoch": 0.74, + "learning_rate": 1.695997849335319e-08, + "logits/chosen": -3.1888084411621094, + "logits/rejected": -3.1352081298828125, + "logps/chosen": -251.05931091308594, + "logps/rejected": -917.3384399414062, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3330268859863281, + "rewards/margins": 7.094635963439941, + "rewards/rejected": -5.761609077453613, + "step": 1158 + }, + { + "epoch": 0.74, + "learning_rate": 1.688253558208059e-08, + "logits/chosen": -3.2009029388427734, + "logits/rejected": -3.1488490104675293, + "logps/chosen": -239.39736938476562, + "logps/rejected": -1098.298828125, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3316818475723267, + "rewards/margins": 6.493239402770996, + "rewards/rejected": -5.161557197570801, + "step": 1159 + }, + { + "epoch": 0.74, + "learning_rate": 1.6805233956518933e-08, + "logits/chosen": -3.1981306076049805, + "logits/rejected": -3.0305981636047363, + "logps/chosen": -259.78228759765625, + "logps/rejected": -1164.36865234375, + "loss": 0.2918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.471022129058838, + "rewards/margins": 6.039119243621826, + "rewards/rejected": -4.568097114562988, + "step": 1160 + }, + { + "epoch": 0.74, + "learning_rate": 1.672807394645236e-08, + "logits/chosen": -3.2177505493164062, + "logits/rejected": -3.1058356761932373, + "logps/chosen": -269.71417236328125, + "logps/rejected": -808.4000244140625, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7321288585662842, + "rewards/margins": 5.92913818359375, + "rewards/rejected": -4.197009086608887, + "step": 1161 + }, + { + "epoch": 0.74, + "learning_rate": 1.6651055881060867e-08, + "logits/chosen": -3.1625165939331055, + "logits/rejected": -3.0891246795654297, + "logps/chosen": -275.6501159667969, + "logps/rejected": -251.39630126953125, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2896714210510254, + "rewards/margins": 2.381690263748169, + "rewards/rejected": -1.092018961906433, + "step": 1162 + }, + { + "epoch": 0.74, + "learning_rate": 1.657418008891887e-08, + "logits/chosen": -3.2444424629211426, + "logits/rejected": -3.1214747428894043, + "logps/chosen": -274.04766845703125, + "logps/rejected": -431.31170654296875, + "loss": 0.2738, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.298649549484253, + "rewards/margins": 3.566906690597534, + "rewards/rejected": -2.2682571411132812, + "step": 1163 + }, + { + "epoch": 0.74, + "learning_rate": 1.6497446897993882e-08, + "logits/chosen": -3.3237102031707764, + "logits/rejected": -3.1305742263793945, + "logps/chosen": -240.64413452148438, + "logps/rejected": -281.35784912109375, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2406845092773438, + "rewards/margins": 2.904815673828125, + "rewards/rejected": -1.6641311645507812, + "step": 1164 + }, + { + "epoch": 0.74, + "learning_rate": 1.6420856635644975e-08, + "logits/chosen": -3.2262182235717773, + "logits/rejected": -3.176065444946289, + "logps/chosen": -282.30352783203125, + "logps/rejected": -629.281005859375, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3875000476837158, + "rewards/margins": 4.735513210296631, + "rewards/rejected": -3.348013401031494, + "step": 1165 + }, + { + "epoch": 0.74, + "learning_rate": 1.634440962862148e-08, + "logits/chosen": -3.223360061645508, + "logits/rejected": -3.1290853023529053, + "logps/chosen": -251.94091796875, + "logps/rejected": -439.5893249511719, + "loss": 0.2845, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6650910377502441, + "rewards/margins": 4.059840679168701, + "rewards/rejected": -2.394749402999878, + "step": 1166 + }, + { + "epoch": 0.74, + "learning_rate": 1.6268106203061628e-08, + "logits/chosen": -3.2412052154541016, + "logits/rejected": -3.2177205085754395, + "logps/chosen": -257.98565673828125, + "logps/rejected": -634.6219482421875, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.162092685699463, + "rewards/margins": 5.592528343200684, + "rewards/rejected": -4.4304351806640625, + "step": 1167 + }, + { + "epoch": 0.74, + "learning_rate": 1.6191946684490994e-08, + "logits/chosen": -3.2213706970214844, + "logits/rejected": -3.2137155532836914, + "logps/chosen": -229.8275604248047, + "logps/rejected": -607.7182006835938, + "loss": 0.2743, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3628525733947754, + "rewards/margins": 5.110957145690918, + "rewards/rejected": -3.748105049133301, + "step": 1168 + }, + { + "epoch": 0.75, + "learning_rate": 1.6115931397821343e-08, + "logits/chosen": -3.317366600036621, + "logits/rejected": -3.1794369220733643, + "logps/chosen": -298.24517822265625, + "logps/rejected": -721.88623046875, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5015060901641846, + "rewards/margins": 6.218047142028809, + "rewards/rejected": -4.716540813446045, + "step": 1169 + }, + { + "epoch": 0.75, + "learning_rate": 1.6040060667348992e-08, + "logits/chosen": -3.2496471405029297, + "logits/rejected": -3.143620729446411, + "logps/chosen": -236.90382385253906, + "logps/rejected": -942.6639404296875, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4508002996444702, + "rewards/margins": 6.694397926330566, + "rewards/rejected": -5.243597507476807, + "step": 1170 + }, + { + "epoch": 0.75, + "learning_rate": 1.596433481675366e-08, + "logits/chosen": -3.2241506576538086, + "logits/rejected": -3.144721746444702, + "logps/chosen": -269.85125732421875, + "logps/rejected": -657.6609497070312, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4661239385604858, + "rewards/margins": 5.349267959594727, + "rewards/rejected": -3.883143663406372, + "step": 1171 + }, + { + "epoch": 0.75, + "learning_rate": 1.5888754169096925e-08, + "logits/chosen": -3.2839903831481934, + "logits/rejected": -3.1295828819274902, + "logps/chosen": -282.962158203125, + "logps/rejected": -697.3530883789062, + "loss": 0.2758, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.253807783126831, + "rewards/margins": 5.462279796600342, + "rewards/rejected": -4.208471775054932, + "step": 1172 + }, + { + "epoch": 0.75, + "learning_rate": 1.5813319046820887e-08, + "logits/chosen": -3.2812790870666504, + "logits/rejected": -3.0794835090637207, + "logps/chosen": -245.8475799560547, + "logps/rejected": -830.1863403320312, + "loss": 0.28, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5451805591583252, + "rewards/margins": 5.2594780921936035, + "rewards/rejected": -3.7142975330352783, + "step": 1173 + }, + { + "epoch": 0.75, + "learning_rate": 1.5738029771746863e-08, + "logits/chosen": -3.240030527114868, + "logits/rejected": -3.1381967067718506, + "logps/chosen": -280.7857971191406, + "logps/rejected": -494.3043518066406, + "loss": 0.3032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1930855512619019, + "rewards/margins": 3.6993508338928223, + "rewards/rejected": -2.506265163421631, + "step": 1174 + }, + { + "epoch": 0.75, + "learning_rate": 1.566288666507391e-08, + "logits/chosen": -3.2311604022979736, + "logits/rejected": -3.1093344688415527, + "logps/chosen": -243.15890502929688, + "logps/rejected": -971.8238525390625, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3542542457580566, + "rewards/margins": 6.200170516967773, + "rewards/rejected": -4.845916748046875, + "step": 1175 + }, + { + "epoch": 0.75, + "learning_rate": 1.5587890047377512e-08, + "logits/chosen": -3.2136189937591553, + "logits/rejected": -3.1309499740600586, + "logps/chosen": -261.45306396484375, + "logps/rejected": -542.706298828125, + "loss": 0.2792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.415608286857605, + "rewards/margins": 4.159100532531738, + "rewards/rejected": -2.7434921264648438, + "step": 1176 + }, + { + "epoch": 0.75, + "learning_rate": 1.551304023860819e-08, + "logits/chosen": -3.2892532348632812, + "logits/rejected": -3.161550760269165, + "logps/chosen": -237.20516967773438, + "logps/rejected": -505.4142150878906, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.579110026359558, + "rewards/margins": 3.984574317932129, + "rewards/rejected": -2.4054641723632812, + "step": 1177 + }, + { + "epoch": 0.75, + "learning_rate": 1.5438337558090203e-08, + "logits/chosen": -3.2934341430664062, + "logits/rejected": -3.186992645263672, + "logps/chosen": -228.52734375, + "logps/rejected": -927.2271118164062, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3557541370391846, + "rewards/margins": 5.6802215576171875, + "rewards/rejected": -4.324467658996582, + "step": 1178 + }, + { + "epoch": 0.75, + "learning_rate": 1.5363782324520032e-08, + "logits/chosen": -3.1606266498565674, + "logits/rejected": -3.0887107849121094, + "logps/chosen": -278.6351318359375, + "logps/rejected": -943.640869140625, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5260604619979858, + "rewards/margins": 5.936707973480225, + "rewards/rejected": -4.410647392272949, + "step": 1179 + }, + { + "epoch": 0.75, + "learning_rate": 1.5289374855965226e-08, + "logits/chosen": -3.2437400817871094, + "logits/rejected": -3.1192715167999268, + "logps/chosen": -261.58001708984375, + "logps/rejected": -409.9117431640625, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.471174716949463, + "rewards/margins": 4.2543230056762695, + "rewards/rejected": -2.7831482887268066, + "step": 1180 + }, + { + "epoch": 0.75, + "learning_rate": 1.521511546986288e-08, + "logits/chosen": -3.2067360877990723, + "logits/rejected": -3.128493309020996, + "logps/chosen": -275.15411376953125, + "logps/rejected": -639.2181396484375, + "loss": 0.2788, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4592605829238892, + "rewards/margins": 4.6733293533325195, + "rewards/rejected": -3.2140684127807617, + "step": 1181 + }, + { + "epoch": 0.75, + "learning_rate": 1.5141004483018322e-08, + "logits/chosen": -3.2297987937927246, + "logits/rejected": -3.11788272857666, + "logps/chosen": -270.7837829589844, + "logps/rejected": -781.4727172851562, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1512749195098877, + "rewards/margins": 5.880858421325684, + "rewards/rejected": -4.729583740234375, + "step": 1182 + }, + { + "epoch": 0.75, + "learning_rate": 1.5067042211603877e-08, + "logits/chosen": -3.1843318939208984, + "logits/rejected": -3.1631202697753906, + "logps/chosen": -275.3379211425781, + "logps/rejected": -672.2784423828125, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1813637018203735, + "rewards/margins": 5.086280345916748, + "rewards/rejected": -3.904916524887085, + "step": 1183 + }, + { + "epoch": 0.75, + "learning_rate": 1.4993228971157275e-08, + "logits/chosen": -3.1783595085144043, + "logits/rejected": -3.0861475467681885, + "logps/chosen": -233.4346160888672, + "logps/rejected": -725.7986450195312, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.42607581615448, + "rewards/margins": 5.52581787109375, + "rewards/rejected": -4.099742412567139, + "step": 1184 + }, + { + "epoch": 0.76, + "learning_rate": 1.4919565076580577e-08, + "logits/chosen": -3.288353204727173, + "logits/rejected": -3.099459171295166, + "logps/chosen": -241.45831298828125, + "logps/rejected": -581.9034423828125, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5137207508087158, + "rewards/margins": 4.691567897796631, + "rewards/rejected": -3.177847385406494, + "step": 1185 + }, + { + "epoch": 0.76, + "learning_rate": 1.484605084213864e-08, + "logits/chosen": -3.230280876159668, + "logits/rejected": -3.033745288848877, + "logps/chosen": -245.06858825683594, + "logps/rejected": -827.656494140625, + "loss": 0.2656, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5027985572814941, + "rewards/margins": 5.91827392578125, + "rewards/rejected": -4.415475368499756, + "step": 1186 + }, + { + "epoch": 0.76, + "learning_rate": 1.4772686581457855e-08, + "logits/chosen": -3.2341175079345703, + "logits/rejected": -3.0900797843933105, + "logps/chosen": -243.02386474609375, + "logps/rejected": -450.186767578125, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6387726068496704, + "rewards/margins": 3.9995131492614746, + "rewards/rejected": -2.3607406616210938, + "step": 1187 + }, + { + "epoch": 0.76, + "learning_rate": 1.4699472607524783e-08, + "logits/chosen": -3.2378957271575928, + "logits/rejected": -3.0622525215148926, + "logps/chosen": -267.33233642578125, + "logps/rejected": -482.1734619140625, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5262024402618408, + "rewards/margins": 3.5235824584960938, + "rewards/rejected": -1.9973801374435425, + "step": 1188 + }, + { + "epoch": 0.76, + "learning_rate": 1.4626409232684872e-08, + "logits/chosen": -3.2078280448913574, + "logits/rejected": -3.2102303504943848, + "logps/chosen": -276.15380859375, + "logps/rejected": -407.1802673339844, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4299026727676392, + "rewards/margins": 3.5501511096954346, + "rewards/rejected": -2.120248317718506, + "step": 1189 + }, + { + "epoch": 0.76, + "learning_rate": 1.455349676864105e-08, + "logits/chosen": -3.2411587238311768, + "logits/rejected": -3.0423314571380615, + "logps/chosen": -229.02684020996094, + "logps/rejected": -951.97705078125, + "loss": 0.2773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3869155645370483, + "rewards/margins": 7.187877178192139, + "rewards/rejected": -5.800961494445801, + "step": 1190 + }, + { + "epoch": 0.76, + "learning_rate": 1.4480735526452426e-08, + "logits/chosen": -3.2745423316955566, + "logits/rejected": -3.0548243522644043, + "logps/chosen": -256.314697265625, + "logps/rejected": -1359.0797119140625, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3419189453125, + "rewards/margins": 6.9425554275512695, + "rewards/rejected": -5.6006364822387695, + "step": 1191 + }, + { + "epoch": 0.76, + "learning_rate": 1.4408125816532979e-08, + "logits/chosen": -3.2742152214050293, + "logits/rejected": -3.1125593185424805, + "logps/chosen": -278.0633850097656, + "logps/rejected": -537.7471923828125, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4986236095428467, + "rewards/margins": 4.24087381362915, + "rewards/rejected": -2.7422502040863037, + "step": 1192 + }, + { + "epoch": 0.76, + "learning_rate": 1.433566794865021e-08, + "logits/chosen": -3.2435102462768555, + "logits/rejected": -3.0690932273864746, + "logps/chosen": -261.6742858886719, + "logps/rejected": -907.757080078125, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.597497582435608, + "rewards/margins": 6.099039077758789, + "rewards/rejected": -4.5015411376953125, + "step": 1193 + }, + { + "epoch": 0.76, + "learning_rate": 1.426336223192386e-08, + "logits/chosen": -3.283641815185547, + "logits/rejected": -3.1941049098968506, + "logps/chosen": -293.5975341796875, + "logps/rejected": -559.1239013671875, + "loss": 0.3116, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5895600318908691, + "rewards/margins": 4.403314590454102, + "rewards/rejected": -2.8137543201446533, + "step": 1194 + }, + { + "epoch": 0.76, + "learning_rate": 1.4191208974824536e-08, + "logits/chosen": -3.219849109649658, + "logits/rejected": -3.107072353363037, + "logps/chosen": -270.9596862792969, + "logps/rejected": -352.297607421875, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5863434076309204, + "rewards/margins": 3.503781318664551, + "rewards/rejected": -1.9174377918243408, + "step": 1195 + }, + { + "epoch": 0.76, + "learning_rate": 1.4119208485172418e-08, + "logits/chosen": -3.1412832736968994, + "logits/rejected": -3.12158465385437, + "logps/chosen": -265.1480407714844, + "logps/rejected": -1334.96337890625, + "loss": 0.2709, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.699211835861206, + "rewards/margins": 8.123772621154785, + "rewards/rejected": -6.424560546875, + "step": 1196 + }, + { + "epoch": 0.76, + "learning_rate": 1.4047361070135993e-08, + "logits/chosen": -3.2246603965759277, + "logits/rejected": -3.184199571609497, + "logps/chosen": -280.8448486328125, + "logps/rejected": -309.2801513671875, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7200897932052612, + "rewards/margins": 3.4825897216796875, + "rewards/rejected": -1.7625000476837158, + "step": 1197 + }, + { + "epoch": 0.76, + "learning_rate": 1.397566703623062e-08, + "logits/chosen": -3.2513234615325928, + "logits/rejected": -3.1068201065063477, + "logps/chosen": -286.2413635253906, + "logps/rejected": -863.4852294921875, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7689881324768066, + "rewards/margins": 6.461071968078613, + "rewards/rejected": -4.692083835601807, + "step": 1198 + }, + { + "epoch": 0.76, + "learning_rate": 1.3904126689317392e-08, + "logits/chosen": -3.254279136657715, + "logits/rejected": -3.111279010772705, + "logps/chosen": -245.4873046875, + "logps/rejected": -521.4475708007812, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1222259998321533, + "rewards/margins": 3.9369325637817383, + "rewards/rejected": -2.814706325531006, + "step": 1199 + }, + { + "epoch": 0.76, + "learning_rate": 1.3832740334601688e-08, + "logits/chosen": -3.1941640377044678, + "logits/rejected": -3.092827558517456, + "logps/chosen": -244.726318359375, + "logps/rejected": -1214.2242431640625, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.433750867843628, + "rewards/margins": 6.806572437286377, + "rewards/rejected": -5.37282133102417, + "step": 1200 + }, + { + "epoch": 0.77, + "learning_rate": 1.3761508276631944e-08, + "logits/chosen": -3.2609598636627197, + "logits/rejected": -3.1481361389160156, + "logps/chosen": -275.1768798828125, + "logps/rejected": -1123.3057861328125, + "loss": 0.2719, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2861549854278564, + "rewards/margins": 7.486093521118164, + "rewards/rejected": -6.199938774108887, + "step": 1201 + }, + { + "epoch": 0.77, + "learning_rate": 1.3690430819298333e-08, + "logits/chosen": -3.2795701026916504, + "logits/rejected": -3.149362564086914, + "logps/chosen": -245.60855102539062, + "logps/rejected": -439.5903625488281, + "loss": 0.2854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1821602582931519, + "rewards/margins": 3.6424202919006348, + "rewards/rejected": -2.4602599143981934, + "step": 1202 + }, + { + "epoch": 0.77, + "learning_rate": 1.3619508265831441e-08, + "logits/chosen": -3.2253799438476562, + "logits/rejected": -3.106919527053833, + "logps/chosen": -248.15341186523438, + "logps/rejected": -834.73486328125, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6444374322891235, + "rewards/margins": 5.935362815856934, + "rewards/rejected": -4.2909255027771, + "step": 1203 + }, + { + "epoch": 0.77, + "learning_rate": 1.3548740918801082e-08, + "logits/chosen": -3.2011077404022217, + "logits/rejected": -3.158996343612671, + "logps/chosen": -229.03292846679688, + "logps/rejected": -711.4102783203125, + "loss": 0.2532, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.304639458656311, + "rewards/margins": 6.097431182861328, + "rewards/rejected": -4.792791843414307, + "step": 1204 + }, + { + "epoch": 0.77, + "learning_rate": 1.3478129080114848e-08, + "logits/chosen": -3.2539186477661133, + "logits/rejected": -3.1179826259613037, + "logps/chosen": -276.47821044921875, + "logps/rejected": -689.9312744140625, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5340378284454346, + "rewards/margins": 5.293440341949463, + "rewards/rejected": -3.7594025135040283, + "step": 1205 + }, + { + "epoch": 0.77, + "learning_rate": 1.3407673051016938e-08, + "logits/chosen": -3.231739044189453, + "logits/rejected": -3.0497117042541504, + "logps/chosen": -276.0953674316406, + "logps/rejected": -291.5268859863281, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.600165605545044, + "rewards/margins": 2.900407314300537, + "rewards/rejected": -1.3002419471740723, + "step": 1206 + }, + { + "epoch": 0.77, + "learning_rate": 1.333737313208681e-08, + "logits/chosen": -3.248530864715576, + "logits/rejected": -3.1109461784362793, + "logps/chosen": -228.28042602539062, + "logps/rejected": -372.9896240234375, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.655116319656372, + "rewards/margins": 3.648562431335449, + "rewards/rejected": -1.9934463500976562, + "step": 1207 + }, + { + "epoch": 0.77, + "learning_rate": 1.3267229623237996e-08, + "logits/chosen": -3.281491279602051, + "logits/rejected": -3.058457851409912, + "logps/chosen": -257.42510986328125, + "logps/rejected": -702.170654296875, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4785652160644531, + "rewards/margins": 4.690959930419922, + "rewards/rejected": -3.2123947143554688, + "step": 1208 + }, + { + "epoch": 0.77, + "learning_rate": 1.3197242823716637e-08, + "logits/chosen": -3.2674384117126465, + "logits/rejected": -3.150575637817383, + "logps/chosen": -236.76502990722656, + "logps/rejected": -450.37628173828125, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6118850708007812, + "rewards/margins": 3.805633544921875, + "rewards/rejected": -2.1937484741210938, + "step": 1209 + }, + { + "epoch": 0.77, + "learning_rate": 1.3127413032100426e-08, + "logits/chosen": -3.257816791534424, + "logits/rejected": -3.0907726287841797, + "logps/chosen": -228.84060668945312, + "logps/rejected": -876.84423828125, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.289360761642456, + "rewards/margins": 6.624178409576416, + "rewards/rejected": -5.334817886352539, + "step": 1210 + }, + { + "epoch": 0.77, + "learning_rate": 1.305774054629718e-08, + "logits/chosen": -3.292769432067871, + "logits/rejected": -3.136615753173828, + "logps/chosen": -246.53921508789062, + "logps/rejected": -507.2054443359375, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3897552490234375, + "rewards/margins": 4.1569061279296875, + "rewards/rejected": -2.76715087890625, + "step": 1211 + }, + { + "epoch": 0.77, + "learning_rate": 1.29882256635436e-08, + "logits/chosen": -3.1867923736572266, + "logits/rejected": -3.160663604736328, + "logps/chosen": -274.05743408203125, + "logps/rejected": -939.4010009765625, + "loss": 0.2741, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.335059404373169, + "rewards/margins": 6.368531227111816, + "rewards/rejected": -5.033472061157227, + "step": 1212 + }, + { + "epoch": 0.77, + "learning_rate": 1.2918868680404099e-08, + "logits/chosen": -3.245342969894409, + "logits/rejected": -3.176365852355957, + "logps/chosen": -262.46881103515625, + "logps/rejected": -609.5055541992188, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5024681091308594, + "rewards/margins": 4.616029739379883, + "rewards/rejected": -3.1135621070861816, + "step": 1213 + }, + { + "epoch": 0.77, + "learning_rate": 1.2849669892769338e-08, + "logits/chosen": -3.1668434143066406, + "logits/rejected": -3.029355525970459, + "logps/chosen": -236.12161254882812, + "logps/rejected": -637.7626953125, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3827049732208252, + "rewards/margins": 4.6464104652404785, + "rewards/rejected": -3.2637057304382324, + "step": 1214 + }, + { + "epoch": 0.77, + "learning_rate": 1.2780629595855203e-08, + "logits/chosen": -3.2512106895446777, + "logits/rejected": -3.1968305110931396, + "logps/chosen": -234.51968383789062, + "logps/rejected": -663.6588745117188, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5885818004608154, + "rewards/margins": 6.001045227050781, + "rewards/rejected": -4.412463665008545, + "step": 1215 + }, + { + "epoch": 0.78, + "learning_rate": 1.2711748084201357e-08, + "logits/chosen": -3.189790964126587, + "logits/rejected": -3.1245200634002686, + "logps/chosen": -251.20425415039062, + "logps/rejected": -422.03021240234375, + "loss": 0.2691, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3527367115020752, + "rewards/margins": 3.2610955238342285, + "rewards/rejected": -1.9083588123321533, + "step": 1216 + }, + { + "epoch": 0.78, + "learning_rate": 1.264302565167007e-08, + "logits/chosen": -3.2557008266448975, + "logits/rejected": -3.0735650062561035, + "logps/chosen": -228.31549072265625, + "logps/rejected": -1106.534912109375, + "loss": 0.2597, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.460301160812378, + "rewards/margins": 5.959429740905762, + "rewards/rejected": -4.499128818511963, + "step": 1217 + }, + { + "epoch": 0.78, + "learning_rate": 1.257446259144494e-08, + "logits/chosen": -3.172497272491455, + "logits/rejected": -3.0518195629119873, + "logps/chosen": -252.6104736328125, + "logps/rejected": -701.0895385742188, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.423736572265625, + "rewards/margins": 4.889608860015869, + "rewards/rejected": -3.465872287750244, + "step": 1218 + }, + { + "epoch": 0.78, + "learning_rate": 1.2506059196029694e-08, + "logits/chosen": -3.223677635192871, + "logits/rejected": -3.1461055278778076, + "logps/chosen": -278.65704345703125, + "logps/rejected": -656.026611328125, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3791671991348267, + "rewards/margins": 5.271903991699219, + "rewards/rejected": -3.8927369117736816, + "step": 1219 + }, + { + "epoch": 0.78, + "learning_rate": 1.2437815757246845e-08, + "logits/chosen": -3.203819751739502, + "logits/rejected": -3.1117043495178223, + "logps/chosen": -327.30029296875, + "logps/rejected": -768.9362182617188, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2952086925506592, + "rewards/margins": 5.4228057861328125, + "rewards/rejected": -4.127597332000732, + "step": 1220 + }, + { + "epoch": 0.78, + "learning_rate": 1.2369732566236507e-08, + "logits/chosen": -3.2560973167419434, + "logits/rejected": -3.1163687705993652, + "logps/chosen": -275.4781799316406, + "logps/rejected": -1076.18310546875, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4970734119415283, + "rewards/margins": 7.0582733154296875, + "rewards/rejected": -5.561200141906738, + "step": 1221 + }, + { + "epoch": 0.78, + "learning_rate": 1.2301809913455202e-08, + "logits/chosen": -3.2305986881256104, + "logits/rejected": -3.0701663494110107, + "logps/chosen": -255.5634765625, + "logps/rejected": -545.1654052734375, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5546462535858154, + "rewards/margins": 4.787696838378906, + "rewards/rejected": -3.233050584793091, + "step": 1222 + }, + { + "epoch": 0.78, + "learning_rate": 1.2234048088674459e-08, + "logits/chosen": -3.2089571952819824, + "logits/rejected": -3.0874977111816406, + "logps/chosen": -247.50723266601562, + "logps/rejected": -447.5584411621094, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4233559370040894, + "rewards/margins": 4.020754814147949, + "rewards/rejected": -2.597398519515991, + "step": 1223 + }, + { + "epoch": 0.78, + "learning_rate": 1.2166447380979799e-08, + "logits/chosen": -3.2260708808898926, + "logits/rejected": -3.1290464401245117, + "logps/chosen": -287.3953857421875, + "logps/rejected": -364.010009765625, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5937293767929077, + "rewards/margins": 3.339257001876831, + "rewards/rejected": -1.745527744293213, + "step": 1224 + }, + { + "epoch": 0.78, + "learning_rate": 1.2099008078769274e-08, + "logits/chosen": -3.2896971702575684, + "logits/rejected": -3.064964532852173, + "logps/chosen": -276.406005859375, + "logps/rejected": -556.46337890625, + "loss": 0.2974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.404097080230713, + "rewards/margins": 4.399232864379883, + "rewards/rejected": -2.99513578414917, + "step": 1225 + }, + { + "epoch": 0.78, + "learning_rate": 1.2031730469752443e-08, + "logits/chosen": -3.2805323600769043, + "logits/rejected": -3.1820998191833496, + "logps/chosen": -258.85223388671875, + "logps/rejected": -488.07763671875, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4378563165664673, + "rewards/margins": 3.7995810508728027, + "rewards/rejected": -2.361724853515625, + "step": 1226 + }, + { + "epoch": 0.78, + "learning_rate": 1.1964614840949e-08, + "logits/chosen": -3.2105603218078613, + "logits/rejected": -3.1212220191955566, + "logps/chosen": -273.00579833984375, + "logps/rejected": -474.7818298339844, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3803832530975342, + "rewards/margins": 3.9041929244995117, + "rewards/rejected": -2.5238099098205566, + "step": 1227 + }, + { + "epoch": 0.78, + "learning_rate": 1.1897661478687594e-08, + "logits/chosen": -3.201493263244629, + "logits/rejected": -3.1083872318267822, + "logps/chosen": -261.31201171875, + "logps/rejected": -1214.0899658203125, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5731124877929688, + "rewards/margins": 6.886453151702881, + "rewards/rejected": -5.31334114074707, + "step": 1228 + }, + { + "epoch": 0.78, + "learning_rate": 1.1830870668604648e-08, + "logits/chosen": -3.232466697692871, + "logits/rejected": -3.1388163566589355, + "logps/chosen": -259.9477844238281, + "logps/rejected": -462.0195007324219, + "loss": 0.2866, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3941102027893066, + "rewards/margins": 4.268101692199707, + "rewards/rejected": -2.8739914894104004, + "step": 1229 + }, + { + "epoch": 0.78, + "learning_rate": 1.1764242695643073e-08, + "logits/chosen": -3.1991677284240723, + "logits/rejected": -3.1311697959899902, + "logps/chosen": -259.17657470703125, + "logps/rejected": -456.3876953125, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2776917219161987, + "rewards/margins": 3.884538173675537, + "rewards/rejected": -2.606846570968628, + "step": 1230 + }, + { + "epoch": 0.78, + "learning_rate": 1.1697777844051105e-08, + "logits/chosen": -3.2679033279418945, + "logits/rejected": -3.11934232711792, + "logps/chosen": -253.214599609375, + "logps/rejected": -812.0804443359375, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5938193798065186, + "rewards/margins": 5.796794891357422, + "rewards/rejected": -4.202975749969482, + "step": 1231 + }, + { + "epoch": 0.79, + "learning_rate": 1.1631476397381035e-08, + "logits/chosen": -3.2193098068237305, + "logits/rejected": -3.0680596828460693, + "logps/chosen": -255.87353515625, + "logps/rejected": -519.5704956054688, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4128272533416748, + "rewards/margins": 4.25499963760376, + "rewards/rejected": -2.842172384262085, + "step": 1232 + }, + { + "epoch": 0.79, + "learning_rate": 1.1565338638488115e-08, + "logits/chosen": -3.259382724761963, + "logits/rejected": -3.135148763656616, + "logps/chosen": -250.82553100585938, + "logps/rejected": -968.26171875, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3574905395507812, + "rewards/margins": 6.971010208129883, + "rewards/rejected": -5.613519668579102, + "step": 1233 + }, + { + "epoch": 0.79, + "learning_rate": 1.1499364849529158e-08, + "logits/chosen": -3.201193332672119, + "logits/rejected": -3.1505768299102783, + "logps/chosen": -278.377685546875, + "logps/rejected": -654.0526123046875, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.468824863433838, + "rewards/margins": 4.922889709472656, + "rewards/rejected": -3.4540648460388184, + "step": 1234 + }, + { + "epoch": 0.79, + "learning_rate": 1.1433555311961557e-08, + "logits/chosen": -3.2398018836975098, + "logits/rejected": -3.036611795425415, + "logps/chosen": -268.62139892578125, + "logps/rejected": -1008.501220703125, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4507019519805908, + "rewards/margins": 5.994725227355957, + "rewards/rejected": -4.544023036956787, + "step": 1235 + }, + { + "epoch": 0.79, + "learning_rate": 1.1367910306541917e-08, + "logits/chosen": -3.222717523574829, + "logits/rejected": -3.0224761962890625, + "logps/chosen": -220.01986694335938, + "logps/rejected": -454.6427001953125, + "loss": 0.2599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3260986804962158, + "rewards/margins": 3.6232712268829346, + "rewards/rejected": -2.2971725463867188, + "step": 1236 + }, + { + "epoch": 0.79, + "learning_rate": 1.1302430113324906e-08, + "logits/chosen": -3.182656764984131, + "logits/rejected": -3.0704498291015625, + "logps/chosen": -254.59002685546875, + "logps/rejected": -352.46533203125, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.489637851715088, + "rewards/margins": 3.4878830909729004, + "rewards/rejected": -1.9982452392578125, + "step": 1237 + }, + { + "epoch": 0.79, + "learning_rate": 1.1237115011662147e-08, + "logits/chosen": -3.236407995223999, + "logits/rejected": -3.130892038345337, + "logps/chosen": -264.3876953125, + "logps/rejected": -887.9002685546875, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1416549682617188, + "rewards/margins": 5.366859436035156, + "rewards/rejected": -4.2252044677734375, + "step": 1238 + }, + { + "epoch": 0.79, + "learning_rate": 1.117196528020083e-08, + "logits/chosen": -3.2390236854553223, + "logits/rejected": -3.097303867340088, + "logps/chosen": -245.120361328125, + "logps/rejected": -526.1536254882812, + "loss": 0.2567, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4631401300430298, + "rewards/margins": 4.727276802062988, + "rewards/rejected": -3.264136552810669, + "step": 1239 + }, + { + "epoch": 0.79, + "learning_rate": 1.1106981196882759e-08, + "logits/chosen": -3.234550952911377, + "logits/rejected": -3.150157928466797, + "logps/chosen": -259.9700927734375, + "logps/rejected": -706.50830078125, + "loss": 0.2727, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3431106805801392, + "rewards/margins": 4.931311130523682, + "rewards/rejected": -3.588200569152832, + "step": 1240 + }, + { + "epoch": 0.79, + "learning_rate": 1.1042163038942975e-08, + "logits/chosen": -3.30224609375, + "logits/rejected": -3.1399192810058594, + "logps/chosen": -263.9004821777344, + "logps/rejected": -563.9691772460938, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7348830699920654, + "rewards/margins": 5.306124687194824, + "rewards/rejected": -3.571241855621338, + "step": 1241 + }, + { + "epoch": 0.79, + "learning_rate": 1.0977511082908669e-08, + "logits/chosen": -3.1826095581054688, + "logits/rejected": -2.9827027320861816, + "logps/chosen": -290.22491455078125, + "logps/rejected": -1833.3099365234375, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4860275983810425, + "rewards/margins": 9.253202438354492, + "rewards/rejected": -7.767175197601318, + "step": 1242 + }, + { + "epoch": 0.79, + "learning_rate": 1.0913025604598002e-08, + "logits/chosen": -3.3289809226989746, + "logits/rejected": -3.1335980892181396, + "logps/chosen": -267.8013000488281, + "logps/rejected": -398.333984375, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6338669061660767, + "rewards/margins": 3.8031373023986816, + "rewards/rejected": -2.1692702770233154, + "step": 1243 + }, + { + "epoch": 0.79, + "learning_rate": 1.084870687911889e-08, + "logits/chosen": -3.290605068206787, + "logits/rejected": -3.1400184631347656, + "logps/chosen": -260.68902587890625, + "logps/rejected": -498.62384033203125, + "loss": 0.2849, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4697967767715454, + "rewards/margins": 4.3302836418151855, + "rewards/rejected": -2.8604867458343506, + "step": 1244 + }, + { + "epoch": 0.79, + "learning_rate": 1.0784555180867838e-08, + "logits/chosen": -3.2309067249298096, + "logits/rejected": -3.073669910430908, + "logps/chosen": -239.52000427246094, + "logps/rejected": -333.603759765625, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3564987182617188, + "rewards/margins": 2.647421360015869, + "rewards/rejected": -1.2909226417541504, + "step": 1245 + }, + { + "epoch": 0.79, + "learning_rate": 1.0720570783528798e-08, + "logits/chosen": -3.232815742492676, + "logits/rejected": -3.1503167152404785, + "logps/chosen": -258.9688720703125, + "logps/rejected": -720.6719970703125, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6203789710998535, + "rewards/margins": 5.6107048988342285, + "rewards/rejected": -3.990325927734375, + "step": 1246 + }, + { + "epoch": 0.79, + "learning_rate": 1.0656753960071969e-08, + "logits/chosen": -3.2863640785217285, + "logits/rejected": -3.136780261993408, + "logps/chosen": -267.1722412109375, + "logps/rejected": -1266.9149169921875, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2633178234100342, + "rewards/margins": 7.260702133178711, + "rewards/rejected": -5.997384548187256, + "step": 1247 + }, + { + "epoch": 0.8, + "learning_rate": 1.0593104982752644e-08, + "logits/chosen": -3.2052457332611084, + "logits/rejected": -3.1614298820495605, + "logps/chosen": -241.5478515625, + "logps/rejected": -565.3639526367188, + "loss": 0.2599, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4965423345565796, + "rewards/margins": 5.228765964508057, + "rewards/rejected": -3.7322235107421875, + "step": 1248 + }, + { + "epoch": 0.8, + "learning_rate": 1.0529624123110081e-08, + "logits/chosen": -3.1440281867980957, + "logits/rejected": -3.0823636054992676, + "logps/chosen": -310.88739013671875, + "logps/rejected": -827.1773681640625, + "loss": 0.3134, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3971450328826904, + "rewards/margins": 5.40641975402832, + "rewards/rejected": -4.009274482727051, + "step": 1249 + }, + { + "epoch": 0.8, + "learning_rate": 1.0466311651966286e-08, + "logits/chosen": -3.1669631004333496, + "logits/rejected": -3.116288661956787, + "logps/chosen": -271.7937316894531, + "logps/rejected": -563.1151733398438, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3986961841583252, + "rewards/margins": 4.890689849853516, + "rewards/rejected": -3.4919939041137695, + "step": 1250 + }, + { + "epoch": 0.8, + "learning_rate": 1.0403167839424882e-08, + "logits/chosen": -3.308590888977051, + "logits/rejected": -3.149601459503174, + "logps/chosen": -241.47830200195312, + "logps/rejected": -410.6441345214844, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3697525262832642, + "rewards/margins": 3.154493808746338, + "rewards/rejected": -1.7847411632537842, + "step": 1251 + }, + { + "epoch": 0.8, + "learning_rate": 1.0340192954870002e-08, + "logits/chosen": -3.238522529602051, + "logits/rejected": -3.188004732131958, + "logps/chosen": -253.78762817382812, + "logps/rejected": -1067.711181640625, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3730239868164062, + "rewards/margins": 7.007804870605469, + "rewards/rejected": -5.6347808837890625, + "step": 1252 + }, + { + "epoch": 0.8, + "learning_rate": 1.0277387266965026e-08, + "logits/chosen": -3.240966320037842, + "logits/rejected": -3.14432430267334, + "logps/chosen": -294.35467529296875, + "logps/rejected": -621.8787231445312, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.350917100906372, + "rewards/margins": 5.225142478942871, + "rewards/rejected": -3.87422513961792, + "step": 1253 + }, + { + "epoch": 0.8, + "learning_rate": 1.0214751043651582e-08, + "logits/chosen": -3.2806382179260254, + "logits/rejected": -3.160428285598755, + "logps/chosen": -278.839599609375, + "logps/rejected": -596.8544311523438, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5193252563476562, + "rewards/margins": 4.244485855102539, + "rewards/rejected": -2.7251603603363037, + "step": 1254 + }, + { + "epoch": 0.8, + "learning_rate": 1.0152284552148282e-08, + "logits/chosen": -3.2050538063049316, + "logits/rejected": -3.153907299041748, + "logps/chosen": -271.030029296875, + "logps/rejected": -977.0037231445312, + "loss": 0.2614, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4128921031951904, + "rewards/margins": 7.431187629699707, + "rewards/rejected": -6.0182952880859375, + "step": 1255 + }, + { + "epoch": 0.8, + "learning_rate": 1.008998805894965e-08, + "logits/chosen": -3.2088441848754883, + "logits/rejected": -3.1691040992736816, + "logps/chosen": -262.236572265625, + "logps/rejected": -1281.4747314453125, + "loss": 0.2551, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.59893798828125, + "rewards/margins": 9.706244468688965, + "rewards/rejected": -8.107306480407715, + "step": 1256 + }, + { + "epoch": 0.8, + "learning_rate": 1.0027861829824951e-08, + "logits/chosen": -3.1965842247009277, + "logits/rejected": -3.107607841491699, + "logps/chosen": -259.70428466796875, + "logps/rejected": -439.39312744140625, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5719093084335327, + "rewards/margins": 4.241265296936035, + "rewards/rejected": -2.669355869293213, + "step": 1257 + }, + { + "epoch": 0.8, + "learning_rate": 9.965906129817058e-09, + "logits/chosen": -3.229112386703491, + "logits/rejected": -3.121943473815918, + "logps/chosen": -263.42266845703125, + "logps/rejected": -976.552001953125, + "loss": 0.2799, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6080963611602783, + "rewards/margins": 6.9208221435546875, + "rewards/rejected": -5.312726020812988, + "step": 1258 + }, + { + "epoch": 0.8, + "learning_rate": 9.904121223241386e-09, + "logits/chosen": -3.2613987922668457, + "logits/rejected": -3.0598020553588867, + "logps/chosen": -280.18035888671875, + "logps/rejected": -573.739013671875, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7310059070587158, + "rewards/margins": 4.531225681304932, + "rewards/rejected": -2.800219774246216, + "step": 1259 + }, + { + "epoch": 0.8, + "learning_rate": 9.842507373684644e-09, + "logits/chosen": -3.1806745529174805, + "logits/rejected": -3.0753350257873535, + "logps/chosen": -284.9312744140625, + "logps/rejected": -960.8282470703125, + "loss": 0.2676, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.444197177886963, + "rewards/margins": 6.051174163818359, + "rewards/rejected": -4.606976509094238, + "step": 1260 + }, + { + "epoch": 0.8, + "learning_rate": 9.781064844003822e-09, + "logits/chosen": -3.2996878623962402, + "logits/rejected": -3.20426869392395, + "logps/chosen": -244.69189453125, + "logps/rejected": -582.45556640625, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2930923700332642, + "rewards/margins": 4.372000217437744, + "rewards/rejected": -3.0789079666137695, + "step": 1261 + }, + { + "epoch": 0.8, + "learning_rate": 9.71979389632499e-09, + "logits/chosen": -3.2813048362731934, + "logits/rejected": -3.1451447010040283, + "logps/chosen": -235.63848876953125, + "logps/rejected": -561.2001342773438, + "loss": 0.2628, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4550278186798096, + "rewards/margins": 4.622550010681152, + "rewards/rejected": -3.167522430419922, + "step": 1262 + }, + { + "epoch": 0.8, + "learning_rate": 9.658694792042282e-09, + "logits/chosen": -3.251737594604492, + "logits/rejected": -3.04339599609375, + "logps/chosen": -234.90252685546875, + "logps/rejected": -1356.90185546875, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7128196954727173, + "rewards/margins": 7.483706474304199, + "rewards/rejected": -5.770886421203613, + "step": 1263 + }, + { + "epoch": 0.81, + "learning_rate": 9.597767791816608e-09, + "logits/chosen": -3.2458672523498535, + "logits/rejected": -2.980632781982422, + "logps/chosen": -259.968994140625, + "logps/rejected": -568.6148071289062, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.700019121170044, + "rewards/margins": 4.088277339935303, + "rewards/rejected": -2.388258457183838, + "step": 1264 + }, + { + "epoch": 0.81, + "learning_rate": 9.537013155574747e-09, + "logits/chosen": -3.247988224029541, + "logits/rejected": -3.141536235809326, + "logps/chosen": -257.88043212890625, + "logps/rejected": -807.37939453125, + "loss": 0.2674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3971519470214844, + "rewards/margins": 6.23598575592041, + "rewards/rejected": -4.838833808898926, + "step": 1265 + }, + { + "epoch": 0.81, + "learning_rate": 9.476431142508095e-09, + "logits/chosen": -3.1970953941345215, + "logits/rejected": -3.0352041721343994, + "logps/chosen": -286.9366149902344, + "logps/rejected": -2532.14794921875, + "loss": 0.2781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.352726697921753, + "rewards/margins": 10.969752311706543, + "rewards/rejected": -9.617025375366211, + "step": 1266 + }, + { + "epoch": 0.81, + "learning_rate": 9.416022011071584e-09, + "logits/chosen": -3.2456297874450684, + "logits/rejected": -3.1329641342163086, + "logps/chosen": -252.3418426513672, + "logps/rejected": -342.99658203125, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.540357232093811, + "rewards/margins": 3.038571357727051, + "rewards/rejected": -1.4982140064239502, + "step": 1267 + }, + { + "epoch": 0.81, + "learning_rate": 9.355786018982664e-09, + "logits/chosen": -3.192073345184326, + "logits/rejected": -3.062930107116699, + "logps/chosen": -250.99090576171875, + "logps/rejected": -540.629150390625, + "loss": 0.2788, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1298866271972656, + "rewards/margins": 3.9228386878967285, + "rewards/rejected": -2.792952060699463, + "step": 1268 + }, + { + "epoch": 0.81, + "learning_rate": 9.295723423220047e-09, + "logits/chosen": -3.1963112354278564, + "logits/rejected": -3.1765408515930176, + "logps/chosen": -269.1126403808594, + "logps/rejected": -561.96826171875, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4505729675292969, + "rewards/margins": 4.814644813537598, + "rewards/rejected": -3.364071846008301, + "step": 1269 + }, + { + "epoch": 0.81, + "learning_rate": 9.235834480022787e-09, + "logits/chosen": -3.151010513305664, + "logits/rejected": -3.045901298522949, + "logps/chosen": -264.55206298828125, + "logps/rejected": -971.8627319335938, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.404808759689331, + "rewards/margins": 5.727144718170166, + "rewards/rejected": -4.322336196899414, + "step": 1270 + }, + { + "epoch": 0.81, + "learning_rate": 9.176119444889046e-09, + "logits/chosen": -3.2475059032440186, + "logits/rejected": -3.105921506881714, + "logps/chosen": -273.28375244140625, + "logps/rejected": -374.83282470703125, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3517990112304688, + "rewards/margins": 3.3591079711914062, + "rewards/rejected": -2.0073089599609375, + "step": 1271 + }, + { + "epoch": 0.81, + "learning_rate": 9.11657857257509e-09, + "logits/chosen": -3.2508604526519775, + "logits/rejected": -3.0750417709350586, + "logps/chosen": -230.46267700195312, + "logps/rejected": -581.3868408203125, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3944870233535767, + "rewards/margins": 4.180006504058838, + "rewards/rejected": -2.785519599914551, + "step": 1272 + }, + { + "epoch": 0.81, + "learning_rate": 9.057212117094143e-09, + "logits/chosen": -3.181652307510376, + "logits/rejected": -3.1344144344329834, + "logps/chosen": -272.0654296875, + "logps/rejected": -594.197265625, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2046279907226562, + "rewards/margins": 4.693791389465332, + "rewards/rejected": -3.489163398742676, + "step": 1273 + }, + { + "epoch": 0.81, + "learning_rate": 8.998020331715367e-09, + "logits/chosen": -3.273104667663574, + "logits/rejected": -3.1229782104492188, + "logps/chosen": -295.0111389160156, + "logps/rejected": -898.0579833984375, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2813339233398438, + "rewards/margins": 7.032994270324707, + "rewards/rejected": -5.751660346984863, + "step": 1274 + }, + { + "epoch": 0.81, + "learning_rate": 8.939003468962725e-09, + "logits/chosen": -3.2564167976379395, + "logits/rejected": -3.1680774688720703, + "logps/chosen": -265.9082946777344, + "logps/rejected": -750.6996459960938, + "loss": 0.265, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1793372631072998, + "rewards/margins": 5.490571022033691, + "rewards/rejected": -4.3112335205078125, + "step": 1275 + }, + { + "epoch": 0.81, + "learning_rate": 8.880161780613915e-09, + "logits/chosen": -3.246289014816284, + "logits/rejected": -3.186361789703369, + "logps/chosen": -258.41497802734375, + "logps/rejected": -668.5342407226562, + "loss": 0.284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2959740161895752, + "rewards/margins": 5.162607669830322, + "rewards/rejected": -3.866633892059326, + "step": 1276 + }, + { + "epoch": 0.81, + "learning_rate": 8.821495517699356e-09, + "logits/chosen": -3.2877378463745117, + "logits/rejected": -3.1391384601593018, + "logps/chosen": -275.2080078125, + "logps/rejected": -445.67779541015625, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4418503046035767, + "rewards/margins": 4.31869649887085, + "rewards/rejected": -2.8768463134765625, + "step": 1277 + }, + { + "epoch": 0.81, + "learning_rate": 8.763004930500978e-09, + "logits/chosen": -3.267639636993408, + "logits/rejected": -3.1969237327575684, + "logps/chosen": -260.2378234863281, + "logps/rejected": -484.44384765625, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8472588062286377, + "rewards/margins": 4.94184684753418, + "rewards/rejected": -3.094587802886963, + "step": 1278 + }, + { + "epoch": 0.82, + "learning_rate": 8.70469026855134e-09, + "logits/chosen": -3.238877773284912, + "logits/rejected": -3.1850709915161133, + "logps/chosen": -227.02981567382812, + "logps/rejected": -351.57989501953125, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4363380670547485, + "rewards/margins": 3.4208123683929443, + "rewards/rejected": -1.9844741821289062, + "step": 1279 + }, + { + "epoch": 0.82, + "learning_rate": 8.646551780632405e-09, + "logits/chosen": -3.2682697772979736, + "logits/rejected": -3.1830177307128906, + "logps/chosen": -304.098876953125, + "logps/rejected": -771.7028198242188, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.232195258140564, + "rewards/margins": 5.829634666442871, + "rewards/rejected": -4.597439765930176, + "step": 1280 + }, + { + "epoch": 0.82, + "learning_rate": 8.588589714774569e-09, + "logits/chosen": -3.281363010406494, + "logits/rejected": -3.0127267837524414, + "logps/chosen": -214.93824768066406, + "logps/rejected": -1202.384765625, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1391693353652954, + "rewards/margins": 6.4600982666015625, + "rewards/rejected": -5.320929050445557, + "step": 1281 + }, + { + "epoch": 0.82, + "learning_rate": 8.530804318255563e-09, + "logits/chosen": -3.273866891860962, + "logits/rejected": -3.1821374893188477, + "logps/chosen": -247.3997344970703, + "logps/rejected": -637.3958740234375, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4479385614395142, + "rewards/margins": 5.522253036499023, + "rewards/rejected": -4.074315071105957, + "step": 1282 + }, + { + "epoch": 0.82, + "learning_rate": 8.473195837599417e-09, + "logits/chosen": -3.2374660968780518, + "logits/rejected": -3.19342303276062, + "logps/chosen": -267.5799255371094, + "logps/rejected": -587.4420776367188, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.514077067375183, + "rewards/margins": 4.640316009521484, + "rewards/rejected": -3.12623929977417, + "step": 1283 + }, + { + "epoch": 0.82, + "learning_rate": 8.415764518575413e-09, + "logits/chosen": -3.247356414794922, + "logits/rejected": -3.086268424987793, + "logps/chosen": -284.5052795410156, + "logps/rejected": -837.5381469726562, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.339837670326233, + "rewards/margins": 5.767297267913818, + "rewards/rejected": -4.427459716796875, + "step": 1284 + }, + { + "epoch": 0.82, + "learning_rate": 8.358510606197017e-09, + "logits/chosen": -3.1437885761260986, + "logits/rejected": -3.192211151123047, + "logps/chosen": -240.552001953125, + "logps/rejected": -765.6962280273438, + "loss": 0.2642, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4811012744903564, + "rewards/margins": 6.032453536987305, + "rewards/rejected": -4.551352024078369, + "step": 1285 + }, + { + "epoch": 0.82, + "learning_rate": 8.301434344720837e-09, + "logits/chosen": -3.2053472995758057, + "logits/rejected": -3.137768030166626, + "logps/chosen": -270.3374328613281, + "logps/rejected": -1174.0987548828125, + "loss": 0.2743, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3942933082580566, + "rewards/margins": 7.155908107757568, + "rewards/rejected": -5.76161527633667, + "step": 1286 + }, + { + "epoch": 0.82, + "learning_rate": 8.244535977645584e-09, + "logits/chosen": -3.164336919784546, + "logits/rejected": -3.118617057800293, + "logps/chosen": -267.91436767578125, + "logps/rejected": -633.3570556640625, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.355494737625122, + "rewards/margins": 4.715827941894531, + "rewards/rejected": -3.3603334426879883, + "step": 1287 + }, + { + "epoch": 0.82, + "learning_rate": 8.187815747711069e-09, + "logits/chosen": -3.1912875175476074, + "logits/rejected": -3.1106584072113037, + "logps/chosen": -286.597900390625, + "logps/rejected": -387.8323669433594, + "loss": 0.3054, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.555821180343628, + "rewards/margins": 3.465555429458618, + "rewards/rejected": -1.9097342491149902, + "step": 1288 + }, + { + "epoch": 0.82, + "learning_rate": 8.13127389689709e-09, + "logits/chosen": -3.2289810180664062, + "logits/rejected": -3.0902903079986572, + "logps/chosen": -288.48016357421875, + "logps/rejected": -866.1597900390625, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3894150257110596, + "rewards/margins": 5.827620029449463, + "rewards/rejected": -4.438204765319824, + "step": 1289 + }, + { + "epoch": 0.82, + "learning_rate": 8.074910666422475e-09, + "logits/chosen": -3.3144004344940186, + "logits/rejected": -3.115628719329834, + "logps/chosen": -257.56103515625, + "logps/rejected": -403.28204345703125, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4131042957305908, + "rewards/margins": 3.6446428298950195, + "rewards/rejected": -2.2315385341644287, + "step": 1290 + }, + { + "epoch": 0.82, + "learning_rate": 8.018726296744e-09, + "logits/chosen": -3.252168655395508, + "logits/rejected": -3.2131152153015137, + "logps/chosen": -225.13356018066406, + "logps/rejected": -700.8529052734375, + "loss": 0.2556, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3757965564727783, + "rewards/margins": 5.6912126541137695, + "rewards/rejected": -4.31541633605957, + "step": 1291 + }, + { + "epoch": 0.82, + "learning_rate": 7.962721027555392e-09, + "logits/chosen": -3.2859532833099365, + "logits/rejected": -3.1635794639587402, + "logps/chosen": -228.2563934326172, + "logps/rejected": -533.0260009765625, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.60186767578125, + "rewards/margins": 4.500718593597412, + "rewards/rejected": -2.898850917816162, + "step": 1292 + }, + { + "epoch": 0.82, + "learning_rate": 7.906895097786336e-09, + "logits/chosen": -3.250566005706787, + "logits/rejected": -3.1099095344543457, + "logps/chosen": -247.03164672851562, + "logps/rejected": -457.3995361328125, + "loss": 0.2559, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5675239562988281, + "rewards/margins": 4.163393497467041, + "rewards/rejected": -2.595869541168213, + "step": 1293 + }, + { + "epoch": 0.82, + "learning_rate": 7.85124874560134e-09, + "logits/chosen": -3.2241344451904297, + "logits/rejected": -3.164275646209717, + "logps/chosen": -266.78759765625, + "logps/rejected": -630.4922485351562, + "loss": 0.278, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4410018920898438, + "rewards/margins": 5.415327548980713, + "rewards/rejected": -3.974325656890869, + "step": 1294 + }, + { + "epoch": 0.83, + "learning_rate": 7.795782208398882e-09, + "logits/chosen": -3.238896131515503, + "logits/rejected": -3.209324836730957, + "logps/chosen": -232.12937927246094, + "logps/rejected": -787.1959228515625, + "loss": 0.297, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1982688903808594, + "rewards/margins": 5.946825981140137, + "rewards/rejected": -4.748556613922119, + "step": 1295 + }, + { + "epoch": 0.83, + "learning_rate": 7.740495722810269e-09, + "logits/chosen": -3.2314319610595703, + "logits/rejected": -3.1077523231506348, + "logps/chosen": -273.0220031738281, + "logps/rejected": -875.2674560546875, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3747787475585938, + "rewards/margins": 5.396333694458008, + "rewards/rejected": -4.021554946899414, + "step": 1296 + }, + { + "epoch": 0.83, + "learning_rate": 7.68538952469866e-09, + "logits/chosen": -3.2500123977661133, + "logits/rejected": -3.2472782135009766, + "logps/chosen": -278.597900390625, + "logps/rejected": -718.8949584960938, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6047751903533936, + "rewards/margins": 5.567256927490234, + "rewards/rejected": -3.962481737136841, + "step": 1297 + }, + { + "epoch": 0.83, + "learning_rate": 7.630463849158115e-09, + "logits/chosen": -3.1858458518981934, + "logits/rejected": -3.0561108589172363, + "logps/chosen": -217.0699920654297, + "logps/rejected": -1200.9029541015625, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3546226024627686, + "rewards/margins": 6.895400047302246, + "rewards/rejected": -5.540777683258057, + "step": 1298 + }, + { + "epoch": 0.83, + "learning_rate": 7.575718930512515e-09, + "logits/chosen": -3.18139386177063, + "logits/rejected": -3.2667689323425293, + "logps/chosen": -289.61260986328125, + "logps/rejected": -647.6795654296875, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.396081566810608, + "rewards/margins": 4.970669746398926, + "rewards/rejected": -3.5745882987976074, + "step": 1299 + }, + { + "epoch": 0.83, + "learning_rate": 7.521155002314612e-09, + "logits/chosen": -3.1742374897003174, + "logits/rejected": -3.100399971008301, + "logps/chosen": -262.0772705078125, + "logps/rejected": -770.6436767578125, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4938522577285767, + "rewards/margins": 6.7430219650268555, + "rewards/rejected": -5.249170303344727, + "step": 1300 + }, + { + "epoch": 0.83, + "learning_rate": 7.466772297344997e-09, + "logits/chosen": -3.250908136367798, + "logits/rejected": -3.104994773864746, + "logps/chosen": -249.49424743652344, + "logps/rejected": -798.5369873046875, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6359620094299316, + "rewards/margins": 5.518628120422363, + "rewards/rejected": -3.8826661109924316, + "step": 1301 + }, + { + "epoch": 0.83, + "learning_rate": 7.412571047611155e-09, + "logits/chosen": -3.258932113647461, + "logits/rejected": -3.1462156772613525, + "logps/chosen": -253.5802001953125, + "logps/rejected": -368.0462646484375, + "loss": 0.2729, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4339706897735596, + "rewards/margins": 3.843080997467041, + "rewards/rejected": -2.4091103076934814, + "step": 1302 + }, + { + "epoch": 0.83, + "learning_rate": 7.358551484346409e-09, + "logits/chosen": -3.2940893173217773, + "logits/rejected": -3.226713180541992, + "logps/chosen": -258.9376220703125, + "logps/rejected": -399.85260009765625, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.695976972579956, + "rewards/margins": 3.7902398109436035, + "rewards/rejected": -2.0942625999450684, + "step": 1303 + }, + { + "epoch": 0.83, + "learning_rate": 7.304713838009025e-09, + "logits/chosen": -3.2566795349121094, + "logits/rejected": -3.0668013095855713, + "logps/chosen": -262.09515380859375, + "logps/rejected": -632.8870849609375, + "loss": 0.275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5244613885879517, + "rewards/margins": 4.937994480133057, + "rewards/rejected": -3.4135332107543945, + "step": 1304 + }, + { + "epoch": 0.83, + "learning_rate": 7.25105833828113e-09, + "logits/chosen": -3.273447036743164, + "logits/rejected": -3.1089587211608887, + "logps/chosen": -276.81658935546875, + "logps/rejected": -712.9462890625, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6327972412109375, + "rewards/margins": 5.461749076843262, + "rewards/rejected": -3.8289520740509033, + "step": 1305 + }, + { + "epoch": 0.83, + "learning_rate": 7.197585214067775e-09, + "logits/chosen": -3.2855968475341797, + "logits/rejected": -3.164198398590088, + "logps/chosen": -232.33645629882812, + "logps/rejected": -846.3826293945312, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2116379737854004, + "rewards/margins": 5.933280944824219, + "rewards/rejected": -4.721642971038818, + "step": 1306 + }, + { + "epoch": 0.83, + "learning_rate": 7.144294693496017e-09, + "logits/chosen": -3.178835868835449, + "logits/rejected": -3.175295114517212, + "logps/chosen": -258.5258483886719, + "logps/rejected": -569.765869140625, + "loss": 0.2643, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1361496448516846, + "rewards/margins": 4.8820390701293945, + "rewards/rejected": -3.745889186859131, + "step": 1307 + }, + { + "epoch": 0.83, + "learning_rate": 7.0911870039138014e-09, + "logits/chosen": -3.214509963989258, + "logits/rejected": -3.1955161094665527, + "logps/chosen": -276.51446533203125, + "logps/rejected": -605.4217529296875, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1215873956680298, + "rewards/margins": 5.0130839347839355, + "rewards/rejected": -3.891496419906616, + "step": 1308 + }, + { + "epoch": 0.83, + "learning_rate": 7.0382623718891586e-09, + "logits/chosen": -3.2138938903808594, + "logits/rejected": -3.187516927719116, + "logps/chosen": -263.5426940917969, + "logps/rejected": -838.9224853515625, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2869316339492798, + "rewards/margins": 7.372396469116211, + "rewards/rejected": -6.0854644775390625, + "step": 1309 + }, + { + "epoch": 0.83, + "learning_rate": 6.985521023209129e-09, + "logits/chosen": -3.2044878005981445, + "logits/rejected": -3.1479620933532715, + "logps/chosen": -253.70132446289062, + "logps/rejected": -890.9580078125, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5133278369903564, + "rewards/margins": 6.501557350158691, + "rewards/rejected": -4.988229751586914, + "step": 1310 + }, + { + "epoch": 0.84, + "learning_rate": 6.93296318287882e-09, + "logits/chosen": -3.2716474533081055, + "logits/rejected": -3.157042980194092, + "logps/chosen": -258.14642333984375, + "logps/rejected": -518.386474609375, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4389832019805908, + "rewards/margins": 4.311028957366943, + "rewards/rejected": -2.8720459938049316, + "step": 1311 + }, + { + "epoch": 0.84, + "learning_rate": 6.880589075120463e-09, + "logits/chosen": -3.2982120513916016, + "logits/rejected": -3.1537275314331055, + "logps/chosen": -232.908203125, + "logps/rejected": -542.7833251953125, + "loss": 0.2788, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.250431776046753, + "rewards/margins": 4.343245029449463, + "rewards/rejected": -3.09281325340271, + "step": 1312 + }, + { + "epoch": 0.84, + "learning_rate": 6.828398923372447e-09, + "logits/chosen": -3.209075450897217, + "logits/rejected": -3.0959134101867676, + "logps/chosen": -274.26068115234375, + "logps/rejected": -708.394287109375, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2130104303359985, + "rewards/margins": 5.103076934814453, + "rewards/rejected": -3.890066623687744, + "step": 1313 + }, + { + "epoch": 0.84, + "learning_rate": 6.776392950288396e-09, + "logits/chosen": -3.2859737873077393, + "logits/rejected": -3.199538230895996, + "logps/chosen": -215.58773803710938, + "logps/rejected": -617.7442016601562, + "loss": 0.2547, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2164162397384644, + "rewards/margins": 5.120928764343262, + "rewards/rejected": -3.9045121669769287, + "step": 1314 + }, + { + "epoch": 0.84, + "learning_rate": 6.724571377736149e-09, + "logits/chosen": -3.217491626739502, + "logits/rejected": -3.1496150493621826, + "logps/chosen": -251.32421875, + "logps/rejected": -449.0450439453125, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4197769165039062, + "rewards/margins": 3.5121490955352783, + "rewards/rejected": -2.092372179031372, + "step": 1315 + }, + { + "epoch": 0.84, + "learning_rate": 6.672934426796873e-09, + "logits/chosen": -3.257455825805664, + "logits/rejected": -3.201500415802002, + "logps/chosen": -281.4681396484375, + "logps/rejected": -633.9735107421875, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5503602027893066, + "rewards/margins": 5.170605659484863, + "rewards/rejected": -3.6202454566955566, + "step": 1316 + }, + { + "epoch": 0.84, + "learning_rate": 6.621482317764104e-09, + "logits/chosen": -3.2168726921081543, + "logits/rejected": -3.1027870178222656, + "logps/chosen": -254.4832763671875, + "logps/rejected": -511.932861328125, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6400840282440186, + "rewards/margins": 4.576315879821777, + "rewards/rejected": -2.936232089996338, + "step": 1317 + }, + { + "epoch": 0.84, + "learning_rate": 6.570215270142826e-09, + "logits/chosen": -3.276928424835205, + "logits/rejected": -3.1641111373901367, + "logps/chosen": -268.0289001464844, + "logps/rejected": -598.4056396484375, + "loss": 0.27, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.183660864830017, + "rewards/margins": 4.265389919281006, + "rewards/rejected": -3.0817291736602783, + "step": 1318 + }, + { + "epoch": 0.84, + "learning_rate": 6.519133502648461e-09, + "logits/chosen": -3.239201545715332, + "logits/rejected": -2.957834482192993, + "logps/chosen": -245.44122314453125, + "logps/rejected": -1169.2225341796875, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6585944890975952, + "rewards/margins": 6.834045886993408, + "rewards/rejected": -5.175451755523682, + "step": 1319 + }, + { + "epoch": 0.84, + "learning_rate": 6.468237233206042e-09, + "logits/chosen": -3.2398035526275635, + "logits/rejected": -3.095158576965332, + "logps/chosen": -252.10630798339844, + "logps/rejected": -724.9938354492188, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6821945905685425, + "rewards/margins": 6.105315208435059, + "rewards/rejected": -4.423120498657227, + "step": 1320 + }, + { + "epoch": 0.84, + "learning_rate": 6.4175266789492084e-09, + "logits/chosen": -3.2070553302764893, + "logits/rejected": -3.0167136192321777, + "logps/chosen": -260.74432373046875, + "logps/rejected": -716.6353759765625, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6910873651504517, + "rewards/margins": 5.725279331207275, + "rewards/rejected": -4.034192085266113, + "step": 1321 + }, + { + "epoch": 0.84, + "learning_rate": 6.367002056219284e-09, + "logits/chosen": -3.200166702270508, + "logits/rejected": -3.1655192375183105, + "logps/chosen": -283.0005187988281, + "logps/rejected": -3723.776611328125, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4495055675506592, + "rewards/margins": 8.420740127563477, + "rewards/rejected": -6.971234321594238, + "step": 1322 + }, + { + "epoch": 0.84, + "learning_rate": 6.3166635805644244e-09, + "logits/chosen": -3.22529935836792, + "logits/rejected": -3.116060733795166, + "logps/chosen": -272.56219482421875, + "logps/rejected": -519.399658203125, + "loss": 0.2966, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0675681829452515, + "rewards/margins": 3.733208656311035, + "rewards/rejected": -2.665640354156494, + "step": 1323 + }, + { + "epoch": 0.84, + "learning_rate": 6.266511466738572e-09, + "logits/chosen": -3.2992067337036133, + "logits/rejected": -3.229541301727295, + "logps/chosen": -257.4235534667969, + "logps/rejected": -555.6901245117188, + "loss": 0.2854, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.330396294593811, + "rewards/margins": 4.936369895935059, + "rewards/rejected": -3.605973720550537, + "step": 1324 + }, + { + "epoch": 0.84, + "learning_rate": 6.216545928700679e-09, + "logits/chosen": -3.3044748306274414, + "logits/rejected": -3.1722822189331055, + "logps/chosen": -258.0121765136719, + "logps/rejected": -1035.469482421875, + "loss": 0.2693, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3205918073654175, + "rewards/margins": 7.372649192810059, + "rewards/rejected": -6.052057266235352, + "step": 1325 + }, + { + "epoch": 0.85, + "learning_rate": 6.166767179613691e-09, + "logits/chosen": -3.2048492431640625, + "logits/rejected": -3.0354065895080566, + "logps/chosen": -295.4974060058594, + "logps/rejected": -2040.8359375, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1075851917266846, + "rewards/margins": 9.694652557373047, + "rewards/rejected": -8.587066650390625, + "step": 1326 + }, + { + "epoch": 0.85, + "learning_rate": 6.117175431843685e-09, + "logits/chosen": -3.2819881439208984, + "logits/rejected": -3.136280059814453, + "logps/chosen": -254.09210205078125, + "logps/rejected": -4963.99853515625, + "loss": 0.2786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3068726062774658, + "rewards/margins": 13.336095809936523, + "rewards/rejected": -12.029224395751953, + "step": 1327 + }, + { + "epoch": 0.85, + "learning_rate": 6.067770896958935e-09, + "logits/chosen": -3.241151809692383, + "logits/rejected": -3.1612677574157715, + "logps/chosen": -231.8736114501953, + "logps/rejected": -562.2897338867188, + "loss": 0.2716, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4379714727401733, + "rewards/margins": 5.089418411254883, + "rewards/rejected": -3.651446580886841, + "step": 1328 + }, + { + "epoch": 0.85, + "learning_rate": 6.018553785729075e-09, + "logits/chosen": -3.2942750453948975, + "logits/rejected": -3.1726255416870117, + "logps/chosen": -247.55520629882812, + "logps/rejected": -1001.1714477539062, + "loss": 0.259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4327316284179688, + "rewards/margins": 6.671675682067871, + "rewards/rejected": -5.238943576812744, + "step": 1329 + }, + { + "epoch": 0.85, + "learning_rate": 5.969524308124102e-09, + "logits/chosen": -3.2920002937316895, + "logits/rejected": -3.1822965145111084, + "logps/chosen": -243.0171661376953, + "logps/rejected": -1024.882568359375, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.356590986251831, + "rewards/margins": 6.789107322692871, + "rewards/rejected": -5.432516574859619, + "step": 1330 + }, + { + "epoch": 0.85, + "learning_rate": 5.920682673313543e-09, + "logits/chosen": -3.2596497535705566, + "logits/rejected": -3.062077045440674, + "logps/chosen": -283.21881103515625, + "logps/rejected": -276.0260009765625, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.852636694908142, + "rewards/margins": 2.911639451980591, + "rewards/rejected": -1.0590027570724487, + "step": 1331 + }, + { + "epoch": 0.85, + "learning_rate": 5.8720290896655865e-09, + "logits/chosen": -3.189570426940918, + "logits/rejected": -3.186631202697754, + "logps/chosen": -271.1937561035156, + "logps/rejected": -557.3671875, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.648834228515625, + "rewards/margins": 4.902478218078613, + "rewards/rejected": -3.2536439895629883, + "step": 1332 + }, + { + "epoch": 0.85, + "learning_rate": 5.823563764746092e-09, + "logits/chosen": -3.2257766723632812, + "logits/rejected": -3.15647029876709, + "logps/chosen": -258.135498046875, + "logps/rejected": -335.4718933105469, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3168303966522217, + "rewards/margins": 2.7314484119415283, + "rewards/rejected": -1.4146180152893066, + "step": 1333 + }, + { + "epoch": 0.85, + "learning_rate": 5.775286905317828e-09, + "logits/chosen": -3.273700714111328, + "logits/rejected": -3.0774993896484375, + "logps/chosen": -244.59730529785156, + "logps/rejected": -672.7139892578125, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3339524269104004, + "rewards/margins": 4.348459243774414, + "rewards/rejected": -3.0145065784454346, + "step": 1334 + }, + { + "epoch": 0.85, + "learning_rate": 5.72719871733951e-09, + "logits/chosen": -3.26469087600708, + "logits/rejected": -3.2090299129486084, + "logps/chosen": -272.21771240234375, + "logps/rejected": -591.13427734375, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4306931495666504, + "rewards/margins": 4.637347221374512, + "rewards/rejected": -3.2066545486450195, + "step": 1335 + }, + { + "epoch": 0.85, + "learning_rate": 5.679299405964921e-09, + "logits/chosen": -3.284911632537842, + "logits/rejected": -3.051924705505371, + "logps/chosen": -229.44100952148438, + "logps/rejected": -690.324951171875, + "loss": 0.258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3164069652557373, + "rewards/margins": 4.23455286026001, + "rewards/rejected": -2.9181458950042725, + "step": 1336 + }, + { + "epoch": 0.85, + "learning_rate": 5.631589175542117e-09, + "logits/chosen": -3.2444019317626953, + "logits/rejected": -3.1365208625793457, + "logps/chosen": -239.635986328125, + "logps/rejected": -538.6827392578125, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2311989068984985, + "rewards/margins": 4.49153995513916, + "rewards/rejected": -3.260340929031372, + "step": 1337 + }, + { + "epoch": 0.85, + "learning_rate": 5.584068229612421e-09, + "logits/chosen": -3.258817195892334, + "logits/rejected": -3.0560200214385986, + "logps/chosen": -257.3921203613281, + "logps/rejected": -1200.449951171875, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6047470569610596, + "rewards/margins": 7.412898063659668, + "rewards/rejected": -5.8081512451171875, + "step": 1338 + }, + { + "epoch": 0.85, + "learning_rate": 5.5367367709097005e-09, + "logits/chosen": -3.2489168643951416, + "logits/rejected": -3.198230266571045, + "logps/chosen": -266.8826904296875, + "logps/rejected": -931.2683715820312, + "loss": 0.2619, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6289246082305908, + "rewards/margins": 7.151251316070557, + "rewards/rejected": -5.522326469421387, + "step": 1339 + }, + { + "epoch": 0.85, + "learning_rate": 5.489595001359382e-09, + "logits/chosen": -3.282031536102295, + "logits/rejected": -3.1008262634277344, + "logps/chosen": -278.08758544921875, + "logps/rejected": -386.01824951171875, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6289703845977783, + "rewards/margins": 3.6039657592773438, + "rewards/rejected": -1.974995493888855, + "step": 1340 + }, + { + "epoch": 0.85, + "learning_rate": 5.442643122077672e-09, + "logits/chosen": -3.1678662300109863, + "logits/rejected": -3.0694527626037598, + "logps/chosen": -272.04412841796875, + "logps/rejected": -857.79638671875, + "loss": 0.2742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3377411365509033, + "rewards/margins": 6.440548896789551, + "rewards/rejected": -5.102807998657227, + "step": 1341 + }, + { + "epoch": 0.86, + "learning_rate": 5.395881333370644e-09, + "logits/chosen": -3.2919559478759766, + "logits/rejected": -3.166524887084961, + "logps/chosen": -247.34817504882812, + "logps/rejected": -635.58740234375, + "loss": 0.2775, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3057518005371094, + "rewards/margins": 5.376646995544434, + "rewards/rejected": -4.070895195007324, + "step": 1342 + }, + { + "epoch": 0.86, + "learning_rate": 5.349309834733435e-09, + "logits/chosen": -3.223064422607422, + "logits/rejected": -3.1780247688293457, + "logps/chosen": -283.02398681640625, + "logps/rejected": -630.1942749023438, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4008041620254517, + "rewards/margins": 5.3020830154418945, + "rewards/rejected": -3.9012789726257324, + "step": 1343 + }, + { + "epoch": 0.86, + "learning_rate": 5.3029288248493345e-09, + "logits/chosen": -3.201180934906006, + "logits/rejected": -3.092790365219116, + "logps/chosen": -249.25587463378906, + "logps/rejected": -468.1467590332031, + "loss": 0.2786, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4084320068359375, + "rewards/margins": 3.414965867996216, + "rewards/rejected": -2.0065338611602783, + "step": 1344 + }, + { + "epoch": 0.86, + "learning_rate": 5.256738501588998e-09, + "logits/chosen": -3.2302064895629883, + "logits/rejected": -2.9599862098693848, + "logps/chosen": -205.2128143310547, + "logps/rejected": -601.56396484375, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3615226745605469, + "rewards/margins": 4.409151077270508, + "rewards/rejected": -3.047628879547119, + "step": 1345 + }, + { + "epoch": 0.86, + "learning_rate": 5.2107390620095556e-09, + "logits/chosen": -3.2318193912506104, + "logits/rejected": -3.1469082832336426, + "logps/chosen": -234.32078552246094, + "logps/rejected": -513.490234375, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3891799449920654, + "rewards/margins": 3.986502170562744, + "rewards/rejected": -2.5973222255706787, + "step": 1346 + }, + { + "epoch": 0.86, + "learning_rate": 5.164930702353781e-09, + "logits/chosen": -3.2420101165771484, + "logits/rejected": -3.1135311126708984, + "logps/chosen": -248.31915283203125, + "logps/rejected": -408.1883850097656, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4624183177947998, + "rewards/margins": 3.498288154602051, + "rewards/rejected": -2.035869598388672, + "step": 1347 + }, + { + "epoch": 0.86, + "learning_rate": 5.119313618049309e-09, + "logits/chosen": -3.2509002685546875, + "logits/rejected": -3.0780673027038574, + "logps/chosen": -286.0362548828125, + "logps/rejected": -1224.6240234375, + "loss": 0.3039, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6970641613006592, + "rewards/margins": 7.090044021606445, + "rewards/rejected": -5.392979621887207, + "step": 1348 + }, + { + "epoch": 0.86, + "learning_rate": 5.073888003707683e-09, + "logits/chosen": -3.2216830253601074, + "logits/rejected": -3.0503287315368652, + "logps/chosen": -293.47125244140625, + "logps/rejected": -1528.658935546875, + "loss": 0.318, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5891189575195312, + "rewards/margins": 8.691261291503906, + "rewards/rejected": -7.102142333984375, + "step": 1349 + }, + { + "epoch": 0.86, + "learning_rate": 5.028654053123665e-09, + "logits/chosen": -3.2314014434814453, + "logits/rejected": -3.104518413543701, + "logps/chosen": -247.66082763671875, + "logps/rejected": -1042.8382568359375, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.771077036857605, + "rewards/margins": 7.570243835449219, + "rewards/rejected": -5.799166679382324, + "step": 1350 + }, + { + "epoch": 0.86, + "learning_rate": 4.983611959274303e-09, + "logits/chosen": -3.203416347503662, + "logits/rejected": -3.1398539543151855, + "logps/chosen": -270.16888427734375, + "logps/rejected": -694.6058349609375, + "loss": 0.2804, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5304336547851562, + "rewards/margins": 5.273155212402344, + "rewards/rejected": -3.7427215576171875, + "step": 1351 + }, + { + "epoch": 0.86, + "learning_rate": 4.938761914318151e-09, + "logits/chosen": -3.26520037651062, + "logits/rejected": -3.1037983894348145, + "logps/chosen": -242.91964721679688, + "logps/rejected": -1380.460693359375, + "loss": 0.2811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5874801874160767, + "rewards/margins": 7.5544233322143555, + "rewards/rejected": -5.966943740844727, + "step": 1352 + }, + { + "epoch": 0.86, + "learning_rate": 4.894104109594465e-09, + "logits/chosen": -3.265930652618408, + "logits/rejected": -3.1848483085632324, + "logps/chosen": -223.5933380126953, + "logps/rejected": -715.9207763671875, + "loss": 0.2506, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4275329113006592, + "rewards/margins": 5.688084602355957, + "rewards/rejected": -4.260551452636719, + "step": 1353 + }, + { + "epoch": 0.86, + "learning_rate": 4.849638735622346e-09, + "logits/chosen": -3.203425884246826, + "logits/rejected": -3.179464340209961, + "logps/chosen": -274.29498291015625, + "logps/rejected": -532.3313598632812, + "loss": 0.2781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4708023071289062, + "rewards/margins": 4.309638977050781, + "rewards/rejected": -2.838836669921875, + "step": 1354 + }, + { + "epoch": 0.86, + "learning_rate": 4.805365982099946e-09, + "logits/chosen": -3.1496191024780273, + "logits/rejected": -3.0509090423583984, + "logps/chosen": -250.33660888671875, + "logps/rejected": -626.1239013671875, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.596948266029358, + "rewards/margins": 4.313624382019043, + "rewards/rejected": -2.7166762351989746, + "step": 1355 + }, + { + "epoch": 0.86, + "learning_rate": 4.761286037903667e-09, + "logits/chosen": -3.1796610355377197, + "logits/rejected": -3.1053876876831055, + "logps/chosen": -257.69622802734375, + "logps/rejected": -364.26116943359375, + "loss": 0.288, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5381278991699219, + "rewards/margins": 3.474602460861206, + "rewards/rejected": -1.9364745616912842, + "step": 1356 + }, + { + "epoch": 0.86, + "learning_rate": 4.717399091087343e-09, + "logits/chosen": -3.2242860794067383, + "logits/rejected": -3.2017407417297363, + "logps/chosen": -315.30145263671875, + "logps/rejected": -777.685546875, + "loss": 0.3212, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6873794794082642, + "rewards/margins": 6.761946201324463, + "rewards/rejected": -5.074566841125488, + "step": 1357 + }, + { + "epoch": 0.87, + "learning_rate": 4.673705328881444e-09, + "logits/chosen": -3.2329652309417725, + "logits/rejected": -3.136815309524536, + "logps/chosen": -280.00079345703125, + "logps/rejected": -422.60284423828125, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6209458112716675, + "rewards/margins": 3.9790587425231934, + "rewards/rejected": -2.3581130504608154, + "step": 1358 + }, + { + "epoch": 0.87, + "learning_rate": 4.630204937692284e-09, + "logits/chosen": -3.261383533477783, + "logits/rejected": -3.1808948516845703, + "logps/chosen": -281.9703369140625, + "logps/rejected": -590.7283935546875, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6109223365783691, + "rewards/margins": 4.641730308532715, + "rewards/rejected": -3.0308074951171875, + "step": 1359 + }, + { + "epoch": 0.87, + "learning_rate": 4.586898103101211e-09, + "logits/chosen": -3.2518372535705566, + "logits/rejected": -3.138554096221924, + "logps/chosen": -258.4205627441406, + "logps/rejected": -569.546875, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4171119928359985, + "rewards/margins": 4.141953468322754, + "rewards/rejected": -2.724841356277466, + "step": 1360 + }, + { + "epoch": 0.87, + "learning_rate": 4.54378500986381e-09, + "logits/chosen": -3.2159581184387207, + "logits/rejected": -3.1061272621154785, + "logps/chosen": -270.54327392578125, + "logps/rejected": -806.6824951171875, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0754165649414062, + "rewards/margins": 6.133913040161133, + "rewards/rejected": -5.058496475219727, + "step": 1361 + }, + { + "epoch": 0.87, + "learning_rate": 4.500865841909168e-09, + "logits/chosen": -3.220405340194702, + "logits/rejected": -3.133481502532959, + "logps/chosen": -267.0755615234375, + "logps/rejected": -573.4050903320312, + "loss": 0.2783, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5623260736465454, + "rewards/margins": 4.454333782196045, + "rewards/rejected": -2.89200758934021, + "step": 1362 + }, + { + "epoch": 0.87, + "learning_rate": 4.458140782338981e-09, + "logits/chosen": -3.2749600410461426, + "logits/rejected": -3.167074680328369, + "logps/chosen": -232.958251953125, + "logps/rejected": -764.5751953125, + "loss": 0.2565, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4853485822677612, + "rewards/margins": 6.054715156555176, + "rewards/rejected": -4.569366455078125, + "step": 1363 + }, + { + "epoch": 0.87, + "learning_rate": 4.415610013426907e-09, + "logits/chosen": -3.234527587890625, + "logits/rejected": -3.14924955368042, + "logps/chosen": -209.35189819335938, + "logps/rejected": -833.5523681640625, + "loss": 0.2542, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2857575416564941, + "rewards/margins": 5.496264934539795, + "rewards/rejected": -4.210507392883301, + "step": 1364 + }, + { + "epoch": 0.87, + "learning_rate": 4.373273716617681e-09, + "logits/chosen": -3.3438937664031982, + "logits/rejected": -3.1876068115234375, + "logps/chosen": -249.44158935546875, + "logps/rejected": -820.0299072265625, + "loss": 0.2792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3632667064666748, + "rewards/margins": 5.961328983306885, + "rewards/rejected": -4.598062515258789, + "step": 1365 + }, + { + "epoch": 0.87, + "learning_rate": 4.331132072526389e-09, + "logits/chosen": -3.223371982574463, + "logits/rejected": -3.1256799697875977, + "logps/chosen": -262.27435302734375, + "logps/rejected": -607.1209716796875, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1349319219589233, + "rewards/margins": 5.161048889160156, + "rewards/rejected": -4.026116847991943, + "step": 1366 + }, + { + "epoch": 0.87, + "learning_rate": 4.2891852609377e-09, + "logits/chosen": -3.2628607749938965, + "logits/rejected": -3.1612303256988525, + "logps/chosen": -242.72706604003906, + "logps/rejected": -332.42974853515625, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2547317743301392, + "rewards/margins": 2.7682695388793945, + "rewards/rejected": -1.5135376453399658, + "step": 1367 + }, + { + "epoch": 0.87, + "learning_rate": 4.2474334608050665e-09, + "logits/chosen": -3.312314033508301, + "logits/rejected": -3.1465811729431152, + "logps/chosen": -271.9371337890625, + "logps/rejected": -513.0528564453125, + "loss": 0.2812, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4469925165176392, + "rewards/margins": 4.029698371887207, + "rewards/rejected": -2.5827057361602783, + "step": 1368 + }, + { + "epoch": 0.87, + "learning_rate": 4.205876850250023e-09, + "logits/chosen": -3.2273294925689697, + "logits/rejected": -3.1214113235473633, + "logps/chosen": -219.49240112304688, + "logps/rejected": -334.10357666015625, + "loss": 0.3043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2654578685760498, + "rewards/margins": 2.8821823596954346, + "rewards/rejected": -1.6167244911193848, + "step": 1369 + }, + { + "epoch": 0.87, + "learning_rate": 4.164515606561347e-09, + "logits/chosen": -3.2036657333374023, + "logits/rejected": -3.1192855834960938, + "logps/chosen": -256.3226013183594, + "logps/rejected": -710.798828125, + "loss": 0.2612, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5605446100234985, + "rewards/margins": 5.456275463104248, + "rewards/rejected": -3.89573073387146, + "step": 1370 + }, + { + "epoch": 0.87, + "learning_rate": 4.123349906194357e-09, + "logits/chosen": -3.282552719116211, + "logits/rejected": -3.068758487701416, + "logps/chosen": -286.61309814453125, + "logps/rejected": -459.3692932128906, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.418164849281311, + "rewards/margins": 3.6476173400878906, + "rewards/rejected": -2.229452610015869, + "step": 1371 + }, + { + "epoch": 0.87, + "learning_rate": 4.082379924770135e-09, + "logits/chosen": -3.2277305126190186, + "logits/rejected": -3.1968259811401367, + "logps/chosen": -252.74945068359375, + "logps/rejected": -764.0328369140625, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2657966613769531, + "rewards/margins": 5.743412017822266, + "rewards/rejected": -4.4776153564453125, + "step": 1372 + }, + { + "epoch": 0.88, + "learning_rate": 4.04160583707483e-09, + "logits/chosen": -3.181196689605713, + "logits/rejected": -3.0641889572143555, + "logps/chosen": -264.29632568359375, + "logps/rejected": -1126.92333984375, + "loss": 0.2888, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5893783569335938, + "rewards/margins": 7.826486587524414, + "rewards/rejected": -6.237107753753662, + "step": 1373 + }, + { + "epoch": 0.88, + "learning_rate": 4.001027817058789e-09, + "logits/chosen": -3.2263832092285156, + "logits/rejected": -3.104806661605835, + "logps/chosen": -255.23300170898438, + "logps/rejected": -642.4987182617188, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.265251874923706, + "rewards/margins": 3.824148654937744, + "rewards/rejected": -2.558896780014038, + "step": 1374 + }, + { + "epoch": 0.88, + "learning_rate": 3.960646037835979e-09, + "logits/chosen": -3.283914804458618, + "logits/rejected": -3.102552890777588, + "logps/chosen": -241.49549865722656, + "logps/rejected": -1604.7003173828125, + "loss": 0.2613, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5674309730529785, + "rewards/margins": 8.57937240600586, + "rewards/rejected": -7.011941432952881, + "step": 1375 + }, + { + "epoch": 0.88, + "learning_rate": 3.920460671683112e-09, + "logits/chosen": -3.273074150085449, + "logits/rejected": -3.1354002952575684, + "logps/chosen": -293.56787109375, + "logps/rejected": -396.71649169921875, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3868300914764404, + "rewards/margins": 3.534536838531494, + "rewards/rejected": -2.1477065086364746, + "step": 1376 + }, + { + "epoch": 0.88, + "learning_rate": 3.880471890038967e-09, + "logits/chosen": -3.2387313842773438, + "logits/rejected": -3.127063751220703, + "logps/chosen": -267.89044189453125, + "logps/rejected": -546.3735961914062, + "loss": 0.2781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.560359239578247, + "rewards/margins": 4.697728157043457, + "rewards/rejected": -3.13736891746521, + "step": 1377 + }, + { + "epoch": 0.88, + "learning_rate": 3.840679863503699e-09, + "logits/chosen": -3.301891803741455, + "logits/rejected": -3.1791341304779053, + "logps/chosen": -239.7596435546875, + "logps/rejected": -367.1826171875, + "loss": 0.2825, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5985809564590454, + "rewards/margins": 3.8776369094848633, + "rewards/rejected": -2.2790558338165283, + "step": 1378 + }, + { + "epoch": 0.88, + "learning_rate": 3.801084761837991e-09, + "logits/chosen": -3.2492775917053223, + "logits/rejected": -3.167006492614746, + "logps/chosen": -235.49490356445312, + "logps/rejected": -763.8894653320312, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5856101512908936, + "rewards/margins": 6.672445297241211, + "rewards/rejected": -5.086834907531738, + "step": 1379 + }, + { + "epoch": 0.88, + "learning_rate": 3.761686753962473e-09, + "logits/chosen": -3.2652082443237305, + "logits/rejected": -3.123814821243286, + "logps/chosen": -230.87948608398438, + "logps/rejected": -496.97064208984375, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.375483751296997, + "rewards/margins": 4.452969551086426, + "rewards/rejected": -3.0774857997894287, + "step": 1380 + }, + { + "epoch": 0.88, + "learning_rate": 3.7224860079569043e-09, + "logits/chosen": -3.268480062484741, + "logits/rejected": -3.132667064666748, + "logps/chosen": -261.9786682128906, + "logps/rejected": -826.2298583984375, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3388832807540894, + "rewards/margins": 4.944167613983154, + "rewards/rejected": -3.6052842140197754, + "step": 1381 + }, + { + "epoch": 0.88, + "learning_rate": 3.6834826910594806e-09, + "logits/chosen": -3.175999164581299, + "logits/rejected": -3.115290880203247, + "logps/chosen": -240.66946411132812, + "logps/rejected": -445.0247497558594, + "loss": 0.2928, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.773040771484375, + "rewards/margins": 4.472810745239258, + "rewards/rejected": -2.6997697353363037, + "step": 1382 + }, + { + "epoch": 0.88, + "learning_rate": 3.644676969666144e-09, + "logits/chosen": -3.269357681274414, + "logits/rejected": -3.2109551429748535, + "logps/chosen": -221.8704376220703, + "logps/rejected": -753.6265869140625, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.252784013748169, + "rewards/margins": 5.909096717834473, + "rewards/rejected": -4.656312465667725, + "step": 1383 + }, + { + "epoch": 0.88, + "learning_rate": 3.6060690093298427e-09, + "logits/chosen": -3.195936679840088, + "logits/rejected": -3.102362632751465, + "logps/chosen": -254.30043029785156, + "logps/rejected": -559.979736328125, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1650726795196533, + "rewards/margins": 3.881533622741699, + "rewards/rejected": -2.716461181640625, + "step": 1384 + }, + { + "epoch": 0.88, + "learning_rate": 3.5676589747598295e-09, + "logits/chosen": -3.202106475830078, + "logits/rejected": -3.137429714202881, + "logps/chosen": -244.83868408203125, + "logps/rejected": -540.50244140625, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.420732855796814, + "rewards/margins": 4.2923994064331055, + "rewards/rejected": -2.871666669845581, + "step": 1385 + }, + { + "epoch": 0.88, + "learning_rate": 3.529447029820981e-09, + "logits/chosen": -3.2918014526367188, + "logits/rejected": -3.2373046875, + "logps/chosen": -253.4680938720703, + "logps/rejected": -686.963134765625, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.682093858718872, + "rewards/margins": 5.603795051574707, + "rewards/rejected": -3.921700954437256, + "step": 1386 + }, + { + "epoch": 0.88, + "learning_rate": 3.4914333375330895e-09, + "logits/chosen": -3.239687919616699, + "logits/rejected": -3.0597381591796875, + "logps/chosen": -231.9370574951172, + "logps/rejected": -947.2160034179688, + "loss": 0.2662, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6082046031951904, + "rewards/margins": 7.526616096496582, + "rewards/rejected": -5.9184112548828125, + "step": 1387 + }, + { + "epoch": 0.88, + "learning_rate": 3.453618060070129e-09, + "logits/chosen": -3.302121162414551, + "logits/rejected": -3.135274648666382, + "logps/chosen": -250.18942260742188, + "logps/rejected": -861.358154296875, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2024322748184204, + "rewards/margins": 6.422033786773682, + "rewards/rejected": -5.219601631164551, + "step": 1388 + }, + { + "epoch": 0.89, + "learning_rate": 3.416001358759635e-09, + "logits/chosen": -3.2213797569274902, + "logits/rejected": -3.1069602966308594, + "logps/chosen": -230.14779663085938, + "logps/rejected": -691.2100219726562, + "loss": 0.2702, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4468475580215454, + "rewards/margins": 5.351880073547363, + "rewards/rejected": -3.905032157897949, + "step": 1389 + }, + { + "epoch": 0.89, + "learning_rate": 3.3785833940819574e-09, + "logits/chosen": -3.203214645385742, + "logits/rejected": -3.110703229904175, + "logps/chosen": -230.2122802734375, + "logps/rejected": -1411.266845703125, + "loss": 0.2693, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.723524570465088, + "rewards/margins": 8.490034103393555, + "rewards/rejected": -6.766510009765625, + "step": 1390 + }, + { + "epoch": 0.89, + "learning_rate": 3.3413643256695935e-09, + "logits/chosen": -3.188955307006836, + "logits/rejected": -3.1259727478027344, + "logps/chosen": -272.292724609375, + "logps/rejected": -489.3333740234375, + "loss": 0.3008, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.372826337814331, + "rewards/margins": 4.465666770935059, + "rewards/rejected": -3.0928406715393066, + "step": 1391 + }, + { + "epoch": 0.89, + "learning_rate": 3.3043443123065284e-09, + "logits/chosen": -3.2355117797851562, + "logits/rejected": -3.0664467811584473, + "logps/chosen": -266.5823669433594, + "logps/rejected": -574.5972900390625, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.410559892654419, + "rewards/margins": 4.771782875061035, + "rewards/rejected": -3.361222743988037, + "step": 1392 + }, + { + "epoch": 0.89, + "learning_rate": 3.267523511927506e-09, + "logits/chosen": -3.2529635429382324, + "logits/rejected": -3.1713452339172363, + "logps/chosen": -266.9620666503906, + "logps/rejected": -430.12811279296875, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3526054620742798, + "rewards/margins": 3.819361925125122, + "rewards/rejected": -2.4667563438415527, + "step": 1393 + }, + { + "epoch": 0.89, + "learning_rate": 3.2309020816174204e-09, + "logits/chosen": -3.2344484329223633, + "logits/rejected": -3.06079363822937, + "logps/chosen": -256.287841796875, + "logps/rejected": -631.3309936523438, + "loss": 0.3027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3386459350585938, + "rewards/margins": 4.854924201965332, + "rewards/rejected": -3.5162782669067383, + "step": 1394 + }, + { + "epoch": 0.89, + "learning_rate": 3.1944801776106035e-09, + "logits/chosen": -3.2250142097473145, + "logits/rejected": -3.0897226333618164, + "logps/chosen": -254.41641235351562, + "logps/rejected": -591.16064453125, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5511398315429688, + "rewards/margins": 4.843172073364258, + "rewards/rejected": -3.29203200340271, + "step": 1395 + }, + { + "epoch": 0.89, + "learning_rate": 3.1582579552901554e-09, + "logits/chosen": -3.2265515327453613, + "logits/rejected": -3.1159615516662598, + "logps/chosen": -262.06884765625, + "logps/rejected": -651.227783203125, + "loss": 0.2639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3799896240234375, + "rewards/margins": 5.226941108703613, + "rewards/rejected": -3.846951484680176, + "step": 1396 + }, + { + "epoch": 0.89, + "learning_rate": 3.122235569187298e-09, + "logits/chosen": -3.271845579147339, + "logits/rejected": -3.1479461193084717, + "logps/chosen": -266.0738525390625, + "logps/rejected": -466.35455322265625, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3064866065979004, + "rewards/margins": 3.6014466285705566, + "rewards/rejected": -2.2949600219726562, + "step": 1397 + }, + { + "epoch": 0.89, + "learning_rate": 3.0864131729807397e-09, + "logits/chosen": -3.2779736518859863, + "logits/rejected": -3.192565441131592, + "logps/chosen": -277.5279541015625, + "logps/rejected": -3855.20068359375, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6467255353927612, + "rewards/margins": 9.383676528930664, + "rewards/rejected": -7.736950874328613, + "step": 1398 + }, + { + "epoch": 0.89, + "learning_rate": 3.0507909194959545e-09, + "logits/chosen": -3.316474437713623, + "logits/rejected": -3.116682529449463, + "logps/chosen": -266.46771240234375, + "logps/rejected": -373.43878173828125, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4495148658752441, + "rewards/margins": 3.4539291858673096, + "rewards/rejected": -2.0044143199920654, + "step": 1399 + }, + { + "epoch": 0.89, + "learning_rate": 3.015368960704584e-09, + "logits/chosen": -3.23709774017334, + "logits/rejected": -3.201213836669922, + "logps/chosen": -272.713623046875, + "logps/rejected": -787.4038696289062, + "loss": 0.2645, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2379730939865112, + "rewards/margins": 5.969296455383301, + "rewards/rejected": -4.7313232421875, + "step": 1400 + }, + { + "epoch": 0.89, + "learning_rate": 2.9801474477237742e-09, + "logits/chosen": -3.266604423522949, + "logits/rejected": -3.1541588306427, + "logps/chosen": -264.4971923828125, + "logps/rejected": -645.5279541015625, + "loss": 0.2788, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.568598985671997, + "rewards/margins": 5.463969707489014, + "rewards/rejected": -3.8953704833984375, + "step": 1401 + }, + { + "epoch": 0.89, + "learning_rate": 2.9451265308155225e-09, + "logits/chosen": -3.2559878826141357, + "logits/rejected": -3.0877041816711426, + "logps/chosen": -248.48904418945312, + "logps/rejected": -398.96929931640625, + "loss": 0.2776, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1474380493164062, + "rewards/margins": 3.487143039703369, + "rewards/rejected": -2.339704990386963, + "step": 1402 + }, + { + "epoch": 0.89, + "learning_rate": 2.9103063593860665e-09, + "logits/chosen": -3.288008213043213, + "logits/rejected": -3.119649887084961, + "logps/chosen": -231.5196075439453, + "logps/rejected": -608.1072998046875, + "loss": 0.2641, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.438014268875122, + "rewards/margins": 5.206007480621338, + "rewards/rejected": -3.767993211746216, + "step": 1403 + }, + { + "epoch": 0.89, + "learning_rate": 2.875687081985173e-09, + "logits/chosen": -3.2478420734405518, + "logits/rejected": -3.210481643676758, + "logps/chosen": -243.67721557617188, + "logps/rejected": -553.4091796875, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.402498722076416, + "rewards/margins": 4.913046836853027, + "rewards/rejected": -3.5105485916137695, + "step": 1404 + }, + { + "epoch": 0.9, + "learning_rate": 2.8412688463056113e-09, + "logits/chosen": -3.2372593879699707, + "logits/rejected": -3.1032001972198486, + "logps/chosen": -266.25152587890625, + "logps/rejected": -721.9376220703125, + "loss": 0.2996, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4269013404846191, + "rewards/margins": 4.907580375671387, + "rewards/rejected": -3.4806792736053467, + "step": 1405 + }, + { + "epoch": 0.9, + "learning_rate": 2.8070517991824415e-09, + "logits/chosen": -3.2873964309692383, + "logits/rejected": -3.148679494857788, + "logps/chosen": -266.17987060546875, + "logps/rejected": -588.239013671875, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4288581609725952, + "rewards/margins": 4.460779666900635, + "rewards/rejected": -3.03192138671875, + "step": 1406 + }, + { + "epoch": 0.9, + "learning_rate": 2.773036086592395e-09, + "logits/chosen": -3.2121143341064453, + "logits/rejected": -3.179598808288574, + "logps/chosen": -247.2340545654297, + "logps/rejected": -907.994873046875, + "loss": 0.279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3937592506408691, + "rewards/margins": 7.211154460906982, + "rewards/rejected": -5.817395210266113, + "step": 1407 + }, + { + "epoch": 0.9, + "learning_rate": 2.739221853653306e-09, + "logits/chosen": -3.256629467010498, + "logits/rejected": -3.164217233657837, + "logps/chosen": -244.38711547851562, + "logps/rejected": -595.03466796875, + "loss": 0.2659, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4265724420547485, + "rewards/margins": 4.912853240966797, + "rewards/rejected": -3.486280918121338, + "step": 1408 + }, + { + "epoch": 0.9, + "learning_rate": 2.7056092446234413e-09, + "logits/chosen": -3.214000940322876, + "logits/rejected": -3.1454832553863525, + "logps/chosen": -242.73904418945312, + "logps/rejected": -914.56298828125, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5581252574920654, + "rewards/margins": 6.7582597732543945, + "rewards/rejected": -5.20013427734375, + "step": 1409 + }, + { + "epoch": 0.9, + "learning_rate": 2.6721984029008825e-09, + "logits/chosen": -3.2065792083740234, + "logits/rejected": -3.095498561859131, + "logps/chosen": -290.7386169433594, + "logps/rejected": -621.3687133789062, + "loss": 0.2698, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6410109996795654, + "rewards/margins": 5.139787673950195, + "rewards/rejected": -3.498776435852051, + "step": 1410 + }, + { + "epoch": 0.9, + "learning_rate": 2.638989471022962e-09, + "logits/chosen": -3.2401061058044434, + "logits/rejected": -3.0975582599639893, + "logps/chosen": -254.25497436523438, + "logps/rejected": -604.7949829101562, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2933449745178223, + "rewards/margins": 4.011933326721191, + "rewards/rejected": -2.718588352203369, + "step": 1411 + }, + { + "epoch": 0.9, + "learning_rate": 2.6059825906655953e-09, + "logits/chosen": -3.2053089141845703, + "logits/rejected": -3.1185998916625977, + "logps/chosen": -301.09478759765625, + "logps/rejected": -612.58056640625, + "loss": 0.2991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3119416236877441, + "rewards/margins": 5.113147258758545, + "rewards/rejected": -3.801205635070801, + "step": 1412 + }, + { + "epoch": 0.9, + "learning_rate": 2.573177902642726e-09, + "logits/chosen": -3.264408588409424, + "logits/rejected": -3.201300621032715, + "logps/chosen": -266.2650146484375, + "logps/rejected": -635.4539184570312, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5952179431915283, + "rewards/margins": 5.5041704177856445, + "rewards/rejected": -3.908952236175537, + "step": 1413 + }, + { + "epoch": 0.9, + "learning_rate": 2.540575546905699e-09, + "logits/chosen": -3.202801465988159, + "logits/rejected": -3.0438051223754883, + "logps/chosen": -273.7445068359375, + "logps/rejected": -913.5648803710938, + "loss": 0.3102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4192352294921875, + "rewards/margins": 6.023745536804199, + "rewards/rejected": -4.604510307312012, + "step": 1414 + }, + { + "epoch": 0.9, + "learning_rate": 2.508175662542672e-09, + "logits/chosen": -3.3350021839141846, + "logits/rejected": -3.0886826515197754, + "logps/chosen": -253.8868408203125, + "logps/rejected": -891.7869873046875, + "loss": 0.2974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5458801984786987, + "rewards/margins": 5.871298789978027, + "rewards/rejected": -4.325418472290039, + "step": 1415 + }, + { + "epoch": 0.9, + "learning_rate": 2.4759783877779993e-09, + "logits/chosen": -3.1876230239868164, + "logits/rejected": -3.1212594509124756, + "logps/chosen": -278.78900146484375, + "logps/rejected": -397.49346923828125, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4882659912109375, + "rewards/margins": 3.4603912830352783, + "rewards/rejected": -1.9721252918243408, + "step": 1416 + }, + { + "epoch": 0.9, + "learning_rate": 2.4439838599717157e-09, + "logits/chosen": -3.1941709518432617, + "logits/rejected": -3.160041332244873, + "logps/chosen": -275.5864562988281, + "logps/rejected": -638.3322143554688, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4741668701171875, + "rewards/margins": 4.725894451141357, + "rewards/rejected": -3.251727342605591, + "step": 1417 + }, + { + "epoch": 0.9, + "learning_rate": 2.412192215618819e-09, + "logits/chosen": -3.3052475452423096, + "logits/rejected": -3.189034938812256, + "logps/chosen": -243.72171020507812, + "logps/rejected": -1092.9019775390625, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5271821022033691, + "rewards/margins": 8.591967582702637, + "rewards/rejected": -7.064785957336426, + "step": 1418 + }, + { + "epoch": 0.9, + "learning_rate": 2.3806035903488286e-09, + "logits/chosen": -3.2736027240753174, + "logits/rejected": -3.1036930084228516, + "logps/chosen": -245.27810668945312, + "logps/rejected": -651.4462890625, + "loss": 0.2869, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3923401832580566, + "rewards/margins": 4.960723876953125, + "rewards/rejected": -3.5683836936950684, + "step": 1419 + }, + { + "epoch": 0.91, + "learning_rate": 2.3492181189251224e-09, + "logits/chosen": -3.242046594619751, + "logits/rejected": -3.171070098876953, + "logps/chosen": -267.3508605957031, + "logps/rejected": -724.4541015625, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2844581604003906, + "rewards/margins": 5.607177257537842, + "rewards/rejected": -4.322719097137451, + "step": 1420 + }, + { + "epoch": 0.91, + "learning_rate": 2.3180359352443833e-09, + "logits/chosen": -3.272514820098877, + "logits/rejected": -3.1524341106414795, + "logps/chosen": -249.18649291992188, + "logps/rejected": -620.4275512695312, + "loss": 0.3154, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.747868299484253, + "rewards/margins": 4.898232936859131, + "rewards/rejected": -3.150364875793457, + "step": 1421 + }, + { + "epoch": 0.91, + "learning_rate": 2.287057172336021e-09, + "logits/chosen": -3.2210330963134766, + "logits/rejected": -3.0283701419830322, + "logps/chosen": -268.9066162109375, + "logps/rejected": -389.72344970703125, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.610447645187378, + "rewards/margins": 3.5027527809143066, + "rewards/rejected": -1.8923048973083496, + "step": 1422 + }, + { + "epoch": 0.91, + "learning_rate": 2.2562819623616226e-09, + "logits/chosen": -3.257713794708252, + "logits/rejected": -3.1870293617248535, + "logps/chosen": -272.109375, + "logps/rejected": -706.9052734375, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.591692328453064, + "rewards/margins": 5.661965370178223, + "rewards/rejected": -4.070272922515869, + "step": 1423 + }, + { + "epoch": 0.91, + "learning_rate": 2.2257104366143755e-09, + "logits/chosen": -3.213862419128418, + "logits/rejected": -3.1334245204925537, + "logps/chosen": -274.6313171386719, + "logps/rejected": -1056.4034423828125, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4613754749298096, + "rewards/margins": 6.55967903137207, + "rewards/rejected": -5.098303318023682, + "step": 1424 + }, + { + "epoch": 0.91, + "learning_rate": 2.195342725518512e-09, + "logits/chosen": -3.19333553314209, + "logits/rejected": -3.146392822265625, + "logps/chosen": -247.0302734375, + "logps/rejected": -450.02960205078125, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.489881992340088, + "rewards/margins": 4.153440952301025, + "rewards/rejected": -2.6635589599609375, + "step": 1425 + }, + { + "epoch": 0.91, + "learning_rate": 2.165178958628744e-09, + "logits/chosen": -3.208909273147583, + "logits/rejected": -3.108358860015869, + "logps/chosen": -268.29736328125, + "logps/rejected": -518.976318359375, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2258628606796265, + "rewards/margins": 4.122580528259277, + "rewards/rejected": -2.8967177867889404, + "step": 1426 + }, + { + "epoch": 0.91, + "learning_rate": 2.1352192646297117e-09, + "logits/chosen": -3.197829246520996, + "logits/rejected": -3.1160571575164795, + "logps/chosen": -276.8955383300781, + "logps/rejected": -613.5720825195312, + "loss": 0.275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.160640001296997, + "rewards/margins": 3.7232284545898438, + "rewards/rejected": -2.562588691711426, + "step": 1427 + }, + { + "epoch": 0.91, + "learning_rate": 2.1054637713354582e-09, + "logits/chosen": -3.2224061489105225, + "logits/rejected": -3.1513171195983887, + "logps/chosen": -237.42919921875, + "logps/rejected": -743.533935546875, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6963738203048706, + "rewards/margins": 6.114886283874512, + "rewards/rejected": -4.418512344360352, + "step": 1428 + }, + { + "epoch": 0.91, + "learning_rate": 2.075912605688851e-09, + "logits/chosen": -3.2759251594543457, + "logits/rejected": -3.1436209678649902, + "logps/chosen": -258.3710021972656, + "logps/rejected": -801.3736572265625, + "loss": 0.3046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.393402099609375, + "rewards/margins": 6.228430271148682, + "rewards/rejected": -4.835028171539307, + "step": 1429 + }, + { + "epoch": 0.91, + "learning_rate": 2.0465658937610595e-09, + "logits/chosen": -3.2472288608551025, + "logits/rejected": -3.125274658203125, + "logps/chosen": -251.61276245117188, + "logps/rejected": -509.6595458984375, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4938697814941406, + "rewards/margins": 4.035386085510254, + "rewards/rejected": -2.541516065597534, + "step": 1430 + }, + { + "epoch": 0.91, + "learning_rate": 2.0174237607510135e-09, + "logits/chosen": -3.245767593383789, + "logits/rejected": -3.2373831272125244, + "logps/chosen": -206.8487548828125, + "logps/rejected": -4062.6826171875, + "loss": 0.271, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.518214464187622, + "rewards/margins": 12.153279304504395, + "rewards/rejected": -10.635065078735352, + "step": 1431 + }, + { + "epoch": 0.91, + "learning_rate": 1.9884863309848566e-09, + "logits/chosen": -3.2689030170440674, + "logits/rejected": -3.298482894897461, + "logps/chosen": -254.08377075195312, + "logps/rejected": -3600.828857421875, + "loss": 0.2741, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0960556268692017, + "rewards/margins": 11.106669425964355, + "rewards/rejected": -10.010614395141602, + "step": 1432 + }, + { + "epoch": 0.91, + "learning_rate": 1.9597537279154584e-09, + "logits/chosen": -3.1503891944885254, + "logits/rejected": -3.0499091148376465, + "logps/chosen": -272.6719055175781, + "logps/rejected": -749.6539306640625, + "loss": 0.2834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4100685119628906, + "rewards/margins": 5.8648786544799805, + "rewards/rejected": -4.454809665679932, + "step": 1433 + }, + { + "epoch": 0.91, + "learning_rate": 1.931226074121811e-09, + "logits/chosen": -3.1671829223632812, + "logits/rejected": -3.1461598873138428, + "logps/chosen": -297.50323486328125, + "logps/rejected": -540.693115234375, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.470147728919983, + "rewards/margins": 4.601872444152832, + "rewards/rejected": -3.1317245960235596, + "step": 1434 + }, + { + "epoch": 0.91, + "learning_rate": 1.902903491308594e-09, + "logits/chosen": -3.3328919410705566, + "logits/rejected": -3.1720333099365234, + "logps/chosen": -258.28668212890625, + "logps/rejected": -622.8314208984375, + "loss": 0.2763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3142852783203125, + "rewards/margins": 4.903058052062988, + "rewards/rejected": -3.5887725353240967, + "step": 1435 + }, + { + "epoch": 0.92, + "learning_rate": 1.8747861003055976e-09, + "logits/chosen": -3.266796588897705, + "logits/rejected": -3.073467254638672, + "logps/chosen": -240.48452758789062, + "logps/rejected": -248.18157958984375, + "loss": 0.2878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.463982343673706, + "rewards/margins": 2.5291190147399902, + "rewards/rejected": -1.0651366710662842, + "step": 1436 + }, + { + "epoch": 0.92, + "learning_rate": 1.8468740210672073e-09, + "logits/chosen": -3.2346351146698, + "logits/rejected": -3.0303425788879395, + "logps/chosen": -279.9599914550781, + "logps/rejected": -1986.8892822265625, + "loss": 0.2749, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3006408214569092, + "rewards/margins": 9.567337036132812, + "rewards/rejected": -8.266695976257324, + "step": 1437 + }, + { + "epoch": 0.92, + "learning_rate": 1.8191673726719426e-09, + "logits/chosen": -3.25594425201416, + "logits/rejected": -3.1146678924560547, + "logps/chosen": -235.9287872314453, + "logps/rejected": -479.2886047363281, + "loss": 0.2802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4419212341308594, + "rewards/margins": 3.80802845954895, + "rewards/rejected": -2.366107225418091, + "step": 1438 + }, + { + "epoch": 0.92, + "learning_rate": 1.7916662733218846e-09, + "logits/chosen": -3.2959232330322266, + "logits/rejected": -3.147294044494629, + "logps/chosen": -258.39697265625, + "logps/rejected": -655.23779296875, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5891907215118408, + "rewards/margins": 5.775390625, + "rewards/rejected": -4.186200141906738, + "step": 1439 + }, + { + "epoch": 0.92, + "learning_rate": 1.7643708403422052e-09, + "logits/chosen": -3.273709774017334, + "logits/rejected": -3.1686501502990723, + "logps/chosen": -268.8057556152344, + "logps/rejected": -656.096923828125, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7280151844024658, + "rewards/margins": 5.588940620422363, + "rewards/rejected": -3.8609251976013184, + "step": 1440 + }, + { + "epoch": 0.92, + "learning_rate": 1.7372811901806672e-09, + "logits/chosen": -3.2993788719177246, + "logits/rejected": -3.126063108444214, + "logps/chosen": -232.71392822265625, + "logps/rejected": -1046.788330078125, + "loss": 0.2787, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5141098499298096, + "rewards/margins": 7.650447368621826, + "rewards/rejected": -6.1363372802734375, + "step": 1441 + }, + { + "epoch": 0.92, + "learning_rate": 1.710397438407135e-09, + "logits/chosen": -3.2555954456329346, + "logits/rejected": -3.148170232772827, + "logps/chosen": -226.91690063476562, + "logps/rejected": -1118.4208984375, + "loss": 0.2805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3531821966171265, + "rewards/margins": 7.844869613647461, + "rewards/rejected": -6.491686820983887, + "step": 1442 + }, + { + "epoch": 0.92, + "learning_rate": 1.6837196997130431e-09, + "logits/chosen": -3.186453104019165, + "logits/rejected": -3.097132682800293, + "logps/chosen": -247.3768310546875, + "logps/rejected": -576.844482421875, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5498535633087158, + "rewards/margins": 4.662040710449219, + "rewards/rejected": -3.112187385559082, + "step": 1443 + }, + { + "epoch": 0.92, + "learning_rate": 1.6572480879109618e-09, + "logits/chosen": -3.2171857357025146, + "logits/rejected": -3.2081470489501953, + "logps/chosen": -226.1640625, + "logps/rejected": -638.6026000976562, + "loss": 0.2672, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4182953834533691, + "rewards/margins": 5.019739151000977, + "rewards/rejected": -3.6014437675476074, + "step": 1444 + }, + { + "epoch": 0.92, + "learning_rate": 1.6309827159340595e-09, + "logits/chosen": -3.205110549926758, + "logits/rejected": -3.11346697807312, + "logps/chosen": -260.29595947265625, + "logps/rejected": -972.53759765625, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3247382640838623, + "rewards/margins": 5.961564064025879, + "rewards/rejected": -4.6368255615234375, + "step": 1445 + }, + { + "epoch": 0.92, + "learning_rate": 1.6049236958356471e-09, + "logits/chosen": -3.2871828079223633, + "logits/rejected": -3.0634067058563232, + "logps/chosen": -273.7242431640625, + "logps/rejected": -4312.6640625, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2569336891174316, + "rewards/margins": 8.319131851196289, + "rewards/rejected": -7.062198162078857, + "step": 1446 + }, + { + "epoch": 0.92, + "learning_rate": 1.579071138788729e-09, + "logits/chosen": -3.1630306243896484, + "logits/rejected": -2.9928975105285645, + "logps/chosen": -308.060546875, + "logps/rejected": -978.882568359375, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4953429698944092, + "rewards/margins": 5.334898471832275, + "rewards/rejected": -3.839555263519287, + "step": 1447 + }, + { + "epoch": 0.92, + "learning_rate": 1.5534251550854417e-09, + "logits/chosen": -3.22770094871521, + "logits/rejected": -2.997124195098877, + "logps/chosen": -240.2987060546875, + "logps/rejected": -2361.996337890625, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5039459466934204, + "rewards/margins": 10.382254600524902, + "rewards/rejected": -8.878308296203613, + "step": 1448 + }, + { + "epoch": 0.92, + "learning_rate": 1.5279858541366874e-09, + "logits/chosen": -3.272545576095581, + "logits/rejected": -3.0866334438323975, + "logps/chosen": -231.36666870117188, + "logps/rejected": -660.1923217773438, + "loss": 0.2661, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3130683898925781, + "rewards/margins": 4.966864585876465, + "rewards/rejected": -3.653796672821045, + "step": 1449 + }, + { + "epoch": 0.92, + "learning_rate": 1.5027533444715967e-09, + "logits/chosen": -3.1850790977478027, + "logits/rejected": -3.10724139213562, + "logps/chosen": -276.99017333984375, + "logps/rejected": -497.1462707519531, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.406874179840088, + "rewards/margins": 3.8292510509490967, + "rewards/rejected": -2.422377109527588, + "step": 1450 + }, + { + "epoch": 0.92, + "learning_rate": 1.477727733737083e-09, + "logits/chosen": -3.250788688659668, + "logits/rejected": -3.1211018562316895, + "logps/chosen": -242.37606811523438, + "logps/rejected": -828.6224365234375, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.343031406402588, + "rewards/margins": 5.495271682739258, + "rewards/rejected": -4.15224027633667, + "step": 1451 + }, + { + "epoch": 0.93, + "learning_rate": 1.4529091286973993e-09, + "logits/chosen": -3.232320547103882, + "logits/rejected": -3.189371109008789, + "logps/chosen": -240.5208282470703, + "logps/rejected": -1227.2098388671875, + "loss": 0.2694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2736480236053467, + "rewards/margins": 7.128985404968262, + "rewards/rejected": -5.855337619781494, + "step": 1452 + }, + { + "epoch": 0.93, + "learning_rate": 1.4282976352336661e-09, + "logits/chosen": -3.223766326904297, + "logits/rejected": -3.1670479774475098, + "logps/chosen": -243.1537628173828, + "logps/rejected": -787.1038818359375, + "loss": 0.2674, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4180374145507812, + "rewards/margins": 5.741316795349121, + "rewards/rejected": -4.323278903961182, + "step": 1453 + }, + { + "epoch": 0.93, + "learning_rate": 1.4038933583434332e-09, + "logits/chosen": -3.245455741882324, + "logits/rejected": -3.174147129058838, + "logps/chosen": -233.22317504882812, + "logps/rejected": -632.0389404296875, + "loss": 0.2608, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.384425401687622, + "rewards/margins": 4.771009922027588, + "rewards/rejected": -3.386584520339966, + "step": 1454 + }, + { + "epoch": 0.93, + "learning_rate": 1.3796964021402068e-09, + "logits/chosen": -3.2373056411743164, + "logits/rejected": -3.0659618377685547, + "logps/chosen": -270.82916259765625, + "logps/rejected": -505.0613098144531, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5177116394042969, + "rewards/margins": 4.271590232849121, + "rewards/rejected": -2.7538788318634033, + "step": 1455 + }, + { + "epoch": 0.93, + "learning_rate": 1.3557068698530294e-09, + "logits/chosen": -3.297560453414917, + "logits/rejected": -3.2025108337402344, + "logps/chosen": -239.9299774169922, + "logps/rejected": -596.6071166992188, + "loss": 0.2836, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8066208362579346, + "rewards/margins": 5.344886779785156, + "rewards/rejected": -3.538266181945801, + "step": 1456 + }, + { + "epoch": 0.93, + "learning_rate": 1.3319248638260284e-09, + "logits/chosen": -3.2135818004608154, + "logits/rejected": -3.108755588531494, + "logps/chosen": -273.84259033203125, + "logps/rejected": -595.73486328125, + "loss": 0.2793, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.458378553390503, + "rewards/margins": 4.652833461761475, + "rewards/rejected": -3.1944549083709717, + "step": 1457 + }, + { + "epoch": 0.93, + "learning_rate": 1.3083504855180005e-09, + "logits/chosen": -3.1691806316375732, + "logits/rejected": -2.962728500366211, + "logps/chosen": -285.561279296875, + "logps/rejected": -1381.984375, + "loss": 0.2793, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2713019847869873, + "rewards/margins": 7.530248641967773, + "rewards/rejected": -6.258945941925049, + "step": 1458 + }, + { + "epoch": 0.93, + "learning_rate": 1.284983835501935e-09, + "logits/chosen": -3.187147855758667, + "logits/rejected": -3.125034809112549, + "logps/chosen": -259.0920715332031, + "logps/rejected": -433.3650207519531, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.461328148841858, + "rewards/margins": 3.81876540184021, + "rewards/rejected": -2.3574371337890625, + "step": 1459 + }, + { + "epoch": 0.93, + "learning_rate": 1.2618250134646291e-09, + "logits/chosen": -3.273898124694824, + "logits/rejected": -3.0060513019561768, + "logps/chosen": -293.3354187011719, + "logps/rejected": -1290.664794921875, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.513031005859375, + "rewards/margins": 7.374969482421875, + "rewards/rejected": -5.8619384765625, + "step": 1460 + }, + { + "epoch": 0.93, + "learning_rate": 1.2388741182062345e-09, + "logits/chosen": -3.212045431137085, + "logits/rejected": -3.0471434593200684, + "logps/chosen": -265.20721435546875, + "logps/rejected": -778.5882568359375, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4289932250976562, + "rewards/margins": 4.827359199523926, + "rewards/rejected": -3.3983657360076904, + "step": 1461 + }, + { + "epoch": 0.93, + "learning_rate": 1.2161312476398455e-09, + "logits/chosen": -3.2528038024902344, + "logits/rejected": -3.201659679412842, + "logps/chosen": -291.30279541015625, + "logps/rejected": -834.5791015625, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.562757968902588, + "rewards/margins": 6.652244567871094, + "rewards/rejected": -5.089486598968506, + "step": 1462 + }, + { + "epoch": 0.93, + "learning_rate": 1.1935964987910995e-09, + "logits/chosen": -3.2574479579925537, + "logits/rejected": -3.1702041625976562, + "logps/chosen": -247.2434844970703, + "logps/rejected": -719.81396484375, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5114761590957642, + "rewards/margins": 5.3190813064575195, + "rewards/rejected": -3.807605266571045, + "step": 1463 + }, + { + "epoch": 0.93, + "learning_rate": 1.1712699677977223e-09, + "logits/chosen": -3.213648796081543, + "logits/rejected": -3.1816015243530273, + "logps/chosen": -290.11328125, + "logps/rejected": -982.9921875, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6234756708145142, + "rewards/margins": 7.211698532104492, + "rewards/rejected": -5.588223457336426, + "step": 1464 + }, + { + "epoch": 0.93, + "learning_rate": 1.1491517499091496e-09, + "logits/chosen": -3.242830276489258, + "logits/rejected": -3.0550599098205566, + "logps/chosen": -244.01483154296875, + "logps/rejected": -346.1458740234375, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4616813659667969, + "rewards/margins": 2.9179739952087402, + "rewards/rejected": -1.456292748451233, + "step": 1465 + }, + { + "epoch": 0.93, + "learning_rate": 1.127241939486112e-09, + "logits/chosen": -3.312448501586914, + "logits/rejected": -3.0771751403808594, + "logps/chosen": -255.44215393066406, + "logps/rejected": -372.762451171875, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6308609247207642, + "rewards/margins": 3.6474335193634033, + "rewards/rejected": -2.0165727138519287, + "step": 1466 + }, + { + "epoch": 0.93, + "learning_rate": 1.1055406300002345e-09, + "logits/chosen": -3.212679624557495, + "logits/rejected": -3.1005921363830566, + "logps/chosen": -304.1378173828125, + "logps/rejected": -1291.281494140625, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4186569452285767, + "rewards/margins": 7.434992790222168, + "rewards/rejected": -6.016335964202881, + "step": 1467 + }, + { + "epoch": 0.94, + "learning_rate": 1.084047914033631e-09, + "logits/chosen": -3.2530016899108887, + "logits/rejected": -3.0578484535217285, + "logps/chosen": -253.00167846679688, + "logps/rejected": -533.0003051757812, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.573817491531372, + "rewards/margins": 4.589422702789307, + "rewards/rejected": -3.0156052112579346, + "step": 1468 + }, + { + "epoch": 0.94, + "learning_rate": 1.0627638832785168e-09, + "logits/chosen": -3.2684264183044434, + "logits/rejected": -3.127201557159424, + "logps/chosen": -244.83135986328125, + "logps/rejected": -597.9493408203125, + "loss": 0.2769, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4918487071990967, + "rewards/margins": 4.608098030090332, + "rewards/rejected": -3.1162490844726562, + "step": 1469 + }, + { + "epoch": 0.94, + "learning_rate": 1.0416886285368187e-09, + "logits/chosen": -3.2039690017700195, + "logits/rejected": -3.0333056449890137, + "logps/chosen": -265.2021484375, + "logps/rejected": -544.479736328125, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4753891229629517, + "rewards/margins": 4.156897068023682, + "rewards/rejected": -2.6815080642700195, + "step": 1470 + }, + { + "epoch": 0.94, + "learning_rate": 1.0208222397197707e-09, + "logits/chosen": -3.2622427940368652, + "logits/rejected": -3.1355929374694824, + "logps/chosen": -265.4500732421875, + "logps/rejected": -513.8612060546875, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.086817979812622, + "rewards/margins": 3.7853503227233887, + "rewards/rejected": -2.6985321044921875, + "step": 1471 + }, + { + "epoch": 0.94, + "learning_rate": 1.0001648058475586e-09, + "logits/chosen": -3.2979207038879395, + "logits/rejected": -3.0791139602661133, + "logps/chosen": -240.0386962890625, + "logps/rejected": -392.6369934082031, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.668999433517456, + "rewards/margins": 3.7268714904785156, + "rewards/rejected": -2.0578720569610596, + "step": 1472 + }, + { + "epoch": 0.94, + "learning_rate": 9.797164150489034e-10, + "logits/chosen": -3.1702346801757812, + "logits/rejected": -3.166888475418091, + "logps/chosen": -283.70013427734375, + "logps/rejected": -671.7501831054688, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6604995727539062, + "rewards/margins": 5.538146018981934, + "rewards/rejected": -3.877645969390869, + "step": 1473 + }, + { + "epoch": 0.94, + "learning_rate": 9.594771545607395e-10, + "logits/chosen": -3.2521471977233887, + "logits/rejected": -3.0408244132995605, + "logps/chosen": -252.7026824951172, + "logps/rejected": -400.54736328125, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3951606750488281, + "rewards/margins": 3.160996437072754, + "rewards/rejected": -1.7658355236053467, + "step": 1474 + }, + { + "epoch": 0.94, + "learning_rate": 9.394471107277757e-10, + "logits/chosen": -3.2396669387817383, + "logits/rejected": -3.1813457012176514, + "logps/chosen": -240.5196075439453, + "logps/rejected": -754.5047607421875, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3878166675567627, + "rewards/margins": 5.452630043029785, + "rewards/rejected": -4.064813613891602, + "step": 1475 + }, + { + "epoch": 0.94, + "learning_rate": 9.196263690021799e-10, + "logits/chosen": -3.140829563140869, + "logits/rejected": -3.0458483695983887, + "logps/chosen": -276.2494812011719, + "logps/rejected": -591.3054809570312, + "loss": 0.2791, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2145538330078125, + "rewards/margins": 4.385004997253418, + "rewards/rejected": -3.1704514026641846, + "step": 1476 + }, + { + "epoch": 0.94, + "learning_rate": 9.000150139431895e-10, + "logits/chosen": -3.2235817909240723, + "logits/rejected": -3.1182827949523926, + "logps/chosen": -239.29833984375, + "logps/rejected": -496.97943115234375, + "loss": 0.2655, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3179810047149658, + "rewards/margins": 3.6540207862854004, + "rewards/rejected": -2.3360397815704346, + "step": 1477 + }, + { + "epoch": 0.94, + "learning_rate": 8.806131292167618e-10, + "logits/chosen": -3.203022003173828, + "logits/rejected": -3.070709466934204, + "logps/chosen": -220.66070556640625, + "logps/rejected": -1095.0042724609375, + "loss": 0.2717, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.596379041671753, + "rewards/margins": 6.905856609344482, + "rewards/rejected": -5.30947732925415, + "step": 1478 + }, + { + "epoch": 0.94, + "learning_rate": 8.614207975952082e-10, + "logits/chosen": -3.289518356323242, + "logits/rejected": -3.133366107940674, + "logps/chosen": -208.9696502685547, + "logps/rejected": -1189.1693115234375, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1302094459533691, + "rewards/margins": 6.879155158996582, + "rewards/rejected": -5.748945713043213, + "step": 1479 + }, + { + "epoch": 0.94, + "learning_rate": 8.424381009568382e-10, + "logits/chosen": -3.280907154083252, + "logits/rejected": -3.1587231159210205, + "logps/chosen": -239.9948272705078, + "logps/rejected": -487.2708740234375, + "loss": 0.282, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1499801874160767, + "rewards/margins": 3.995997667312622, + "rewards/rejected": -2.846017599105835, + "step": 1480 + }, + { + "epoch": 0.94, + "learning_rate": 8.236651202856326e-10, + "logits/chosen": -3.1281659603118896, + "logits/rejected": -3.032721757888794, + "logps/chosen": -286.98516845703125, + "logps/rejected": -527.4246826171875, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.528804063796997, + "rewards/margins": 3.7103257179260254, + "rewards/rejected": -2.1815216541290283, + "step": 1481 + }, + { + "epoch": 0.94, + "learning_rate": 8.051019356708599e-10, + "logits/chosen": -3.247509002685547, + "logits/rejected": -3.077986717224121, + "logps/chosen": -206.0643310546875, + "logps/rejected": -535.849853515625, + "loss": 0.263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5405685901641846, + "rewards/margins": 4.097410678863525, + "rewards/rejected": -2.556842088699341, + "step": 1482 + }, + { + "epoch": 0.95, + "learning_rate": 7.867486263067824e-10, + "logits/chosen": -3.247361660003662, + "logits/rejected": -3.0735201835632324, + "logps/chosen": -233.06256103515625, + "logps/rejected": -1029.66015625, + "loss": 0.27, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4813880920410156, + "rewards/margins": 6.849714279174805, + "rewards/rejected": -5.368326187133789, + "step": 1483 + }, + { + "epoch": 0.95, + "learning_rate": 7.68605270492273e-10, + "logits/chosen": -3.2618918418884277, + "logits/rejected": -3.1259312629699707, + "logps/chosen": -237.79702758789062, + "logps/rejected": -534.37109375, + "loss": 0.2763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2312675714492798, + "rewards/margins": 4.140715599060059, + "rewards/rejected": -2.9094481468200684, + "step": 1484 + }, + { + "epoch": 0.95, + "learning_rate": 7.5067194563051e-10, + "logits/chosen": -3.2717039585113525, + "logits/rejected": -3.1207308769226074, + "logps/chosen": -282.68231201171875, + "logps/rejected": -1031.8441162109375, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1184319257736206, + "rewards/margins": 6.784517765045166, + "rewards/rejected": -5.666085720062256, + "step": 1485 + }, + { + "epoch": 0.95, + "learning_rate": 7.329487282286384e-10, + "logits/chosen": -3.2365710735321045, + "logits/rejected": -3.1679086685180664, + "logps/chosen": -223.64356994628906, + "logps/rejected": -499.93536376953125, + "loss": 0.2737, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2824974060058594, + "rewards/margins": 4.051119327545166, + "rewards/rejected": -2.7686219215393066, + "step": 1486 + }, + { + "epoch": 0.95, + "learning_rate": 7.15435693897426e-10, + "logits/chosen": -3.3013925552368164, + "logits/rejected": -3.192678928375244, + "logps/chosen": -277.50897216796875, + "logps/rejected": -392.62261962890625, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4188294410705566, + "rewards/margins": 3.4184889793395996, + "rewards/rejected": -1.999659776687622, + "step": 1487 + }, + { + "epoch": 0.95, + "learning_rate": 6.981329173509909e-10, + "logits/chosen": -3.293715476989746, + "logits/rejected": -3.1051135063171387, + "logps/chosen": -226.66143798828125, + "logps/rejected": -1459.3258056640625, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3173942565917969, + "rewards/margins": 8.367809295654297, + "rewards/rejected": -7.0504150390625, + "step": 1488 + }, + { + "epoch": 0.95, + "learning_rate": 6.810404724064133e-10, + "logits/chosen": -3.245698928833008, + "logits/rejected": -3.0914201736450195, + "logps/chosen": -274.28369140625, + "logps/rejected": -405.6763916015625, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3297843933105469, + "rewards/margins": 3.2232720851898193, + "rewards/rejected": -1.893487572669983, + "step": 1489 + }, + { + "epoch": 0.95, + "learning_rate": 6.641584319834859e-10, + "logits/chosen": -3.240818500518799, + "logits/rejected": -3.0494284629821777, + "logps/chosen": -259.07275390625, + "logps/rejected": -1329.744873046875, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.624079942703247, + "rewards/margins": 7.912925720214844, + "rewards/rejected": -6.288846015930176, + "step": 1490 + }, + { + "epoch": 0.95, + "learning_rate": 6.474868681043578e-10, + "logits/chosen": -3.230112075805664, + "logits/rejected": -3.1142168045043945, + "logps/chosen": -247.86512756347656, + "logps/rejected": -833.3143310546875, + "loss": 0.2449, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2228119373321533, + "rewards/margins": 5.756054878234863, + "rewards/rejected": -4.533242702484131, + "step": 1491 + }, + { + "epoch": 0.95, + "learning_rate": 6.31025851893241e-10, + "logits/chosen": -3.208061695098877, + "logits/rejected": -3.1754112243652344, + "logps/chosen": -233.58609008789062, + "logps/rejected": -466.4940185546875, + "loss": 0.2901, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.629878282546997, + "rewards/margins": 3.9191360473632812, + "rewards/rejected": -2.2892580032348633, + "step": 1492 + }, + { + "epoch": 0.95, + "learning_rate": 6.147754535761218e-10, + "logits/chosen": -3.270866632461548, + "logits/rejected": -3.121920585632324, + "logps/chosen": -262.1346435546875, + "logps/rejected": -711.083984375, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.324964165687561, + "rewards/margins": 4.198032379150391, + "rewards/rejected": -2.873068332672119, + "step": 1493 + }, + { + "epoch": 0.95, + "learning_rate": 5.987357424804441e-10, + "logits/chosen": -3.134392738342285, + "logits/rejected": -3.139681816101074, + "logps/chosen": -278.7801818847656, + "logps/rejected": -802.4969482421875, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3377364873886108, + "rewards/margins": 6.180790901184082, + "rewards/rejected": -4.843054294586182, + "step": 1494 + }, + { + "epoch": 0.95, + "learning_rate": 5.829067870348093e-10, + "logits/chosen": -3.2291440963745117, + "logits/rejected": -3.1722850799560547, + "logps/chosen": -276.6527099609375, + "logps/rejected": -523.822265625, + "loss": 0.2811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4506782293319702, + "rewards/margins": 4.746012210845947, + "rewards/rejected": -3.2953341007232666, + "step": 1495 + }, + { + "epoch": 0.95, + "learning_rate": 5.672886547686994e-10, + "logits/chosen": -3.2051916122436523, + "logits/rejected": -3.174407482147217, + "logps/chosen": -285.9737548828125, + "logps/rejected": -528.199951171875, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.458953857421875, + "rewards/margins": 4.899432182312012, + "rewards/rejected": -3.440478563308716, + "step": 1496 + }, + { + "epoch": 0.95, + "learning_rate": 5.518814123121884e-10, + "logits/chosen": -3.256467342376709, + "logits/rejected": -3.089320659637451, + "logps/chosen": -239.00852966308594, + "logps/rejected": -574.90869140625, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3277679681777954, + "rewards/margins": 4.449914932250977, + "rewards/rejected": -3.1221466064453125, + "step": 1497 + }, + { + "epoch": 0.95, + "learning_rate": 5.366851253956362e-10, + "logits/chosen": -3.181152820587158, + "logits/rejected": -3.154961109161377, + "logps/chosen": -277.6485595703125, + "logps/rejected": -828.363525390625, + "loss": 0.2807, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5161300897598267, + "rewards/margins": 6.228404521942139, + "rewards/rejected": -4.712274551391602, + "step": 1498 + }, + { + "epoch": 0.96, + "learning_rate": 5.216998588494504e-10, + "logits/chosen": -3.2913942337036133, + "logits/rejected": -3.1320247650146484, + "logps/chosen": -253.62283325195312, + "logps/rejected": -270.3194885253906, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.657759189605713, + "rewards/margins": 3.4118669033050537, + "rewards/rejected": -1.7541077136993408, + "step": 1499 + }, + { + "epoch": 0.96, + "learning_rate": 5.069256766037533e-10, + "logits/chosen": -3.2413535118103027, + "logits/rejected": -3.117129325866699, + "logps/chosen": -288.8326416015625, + "logps/rejected": -1043.2486572265625, + "loss": 0.2855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6621322631835938, + "rewards/margins": 7.0584821701049805, + "rewards/rejected": -5.396349906921387, + "step": 1500 + }, + { + "epoch": 0.96, + "learning_rate": 4.923626416881654e-10, + "logits/chosen": -3.2122833728790283, + "logits/rejected": -3.1027910709381104, + "logps/chosen": -256.17181396484375, + "logps/rejected": -777.1171264648438, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5589537620544434, + "rewards/margins": 5.319923400878906, + "rewards/rejected": -3.760969638824463, + "step": 1501 + }, + { + "epoch": 0.96, + "learning_rate": 4.780108162314944e-10, + "logits/chosen": -3.1489663124084473, + "logits/rejected": -3.084223747253418, + "logps/chosen": -252.0438232421875, + "logps/rejected": -796.5040283203125, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5975501537322998, + "rewards/margins": 6.06458044052124, + "rewards/rejected": -4.4670305252075195, + "step": 1502 + }, + { + "epoch": 0.96, + "learning_rate": 4.6387026146148534e-10, + "logits/chosen": -3.2756552696228027, + "logits/rejected": -3.136821746826172, + "logps/chosen": -235.6328125, + "logps/rejected": -691.230712890625, + "loss": 0.253, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.454064965248108, + "rewards/margins": 5.682761192321777, + "rewards/rejected": -4.228695869445801, + "step": 1503 + }, + { + "epoch": 0.96, + "learning_rate": 4.4994103770457647e-10, + "logits/chosen": -3.202326536178589, + "logits/rejected": -3.117892265319824, + "logps/chosen": -248.66525268554688, + "logps/rejected": -507.9218444824219, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4497696161270142, + "rewards/margins": 4.246447563171387, + "rewards/rejected": -2.796678066253662, + "step": 1504 + }, + { + "epoch": 0.96, + "learning_rate": 4.3622320438561065e-10, + "logits/chosen": -3.269503116607666, + "logits/rejected": -3.1543731689453125, + "logps/chosen": -230.2566680908203, + "logps/rejected": -879.3492431640625, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6087204217910767, + "rewards/margins": 6.79426383972168, + "rewards/rejected": -5.185544013977051, + "step": 1505 + }, + { + "epoch": 0.96, + "learning_rate": 4.227168200276077e-10, + "logits/chosen": -3.2602665424346924, + "logits/rejected": -3.162348747253418, + "logps/chosen": -268.94091796875, + "logps/rejected": -842.3548583984375, + "loss": 0.2996, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2631638050079346, + "rewards/margins": 6.4152116775512695, + "rewards/rejected": -5.152048110961914, + "step": 1506 + }, + { + "epoch": 0.96, + "learning_rate": 4.0942194225148107e-10, + "logits/chosen": -3.215181350708008, + "logits/rejected": -3.155590295791626, + "logps/chosen": -310.49285888671875, + "logps/rejected": -652.169677734375, + "loss": 0.3276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.064863681793213, + "rewards/margins": 5.4766340255737305, + "rewards/rejected": -4.411770820617676, + "step": 1507 + }, + { + "epoch": 0.96, + "learning_rate": 3.9633862777585515e-10, + "logits/chosen": -3.2111284732818604, + "logits/rejected": -3.22672176361084, + "logps/chosen": -237.94244384765625, + "logps/rejected": -664.5126342773438, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3274338245391846, + "rewards/margins": 4.908511161804199, + "rewards/rejected": -3.5810775756835938, + "step": 1508 + }, + { + "epoch": 0.96, + "learning_rate": 3.834669324167428e-10, + "logits/chosen": -3.2288293838500977, + "logits/rejected": -3.1343183517456055, + "logps/chosen": -257.45831298828125, + "logps/rejected": -964.0646362304688, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.809478759765625, + "rewards/margins": 7.047296047210693, + "rewards/rejected": -5.237817764282227, + "step": 1509 + }, + { + "epoch": 0.96, + "learning_rate": 3.708069110873735e-10, + "logits/chosen": -3.280550956726074, + "logits/rejected": -3.1066970825195312, + "logps/chosen": -271.17291259765625, + "logps/rejected": -818.7871704101562, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6086959838867188, + "rewards/margins": 5.34450101852417, + "rewards/rejected": -3.735804796218872, + "step": 1510 + }, + { + "epoch": 0.96, + "learning_rate": 3.5835861779792144e-10, + "logits/chosen": -3.1948838233947754, + "logits/rejected": -3.08561110496521, + "logps/chosen": -249.06834411621094, + "logps/rejected": -385.58331298828125, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.494306206703186, + "rewards/margins": 3.8073158264160156, + "rewards/rejected": -2.313009738922119, + "step": 1511 + }, + { + "epoch": 0.96, + "learning_rate": 3.4612210565528324e-10, + "logits/chosen": -3.237698554992676, + "logits/rejected": -3.0589728355407715, + "logps/chosen": -247.08908081054688, + "logps/rejected": -500.156494140625, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4058380126953125, + "rewards/margins": 3.9522294998168945, + "rewards/rejected": -2.546391248703003, + "step": 1512 + }, + { + "epoch": 0.96, + "learning_rate": 3.340974268628727e-10, + "logits/chosen": -3.265549659729004, + "logits/rejected": -3.2191667556762695, + "logps/chosen": -210.38681030273438, + "logps/rejected": -636.1539916992188, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.290826439857483, + "rewards/margins": 5.379896640777588, + "rewards/rejected": -4.0890703201293945, + "step": 1513 + }, + { + "epoch": 0.96, + "learning_rate": 3.222846327203599e-10, + "logits/chosen": -3.237156867980957, + "logits/rejected": -3.1044681072235107, + "logps/chosen": -282.8118896484375, + "logps/rejected": -1246.06298828125, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4243667125701904, + "rewards/margins": 6.950614929199219, + "rewards/rejected": -5.526248455047607, + "step": 1514 + }, + { + "epoch": 0.97, + "learning_rate": 3.10683773623488e-10, + "logits/chosen": -3.238893508911133, + "logits/rejected": -3.1340606212615967, + "logps/chosen": -278.8346862792969, + "logps/rejected": -559.3914184570312, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.387915015220642, + "rewards/margins": 4.916883945465088, + "rewards/rejected": -3.5289688110351562, + "step": 1515 + }, + { + "epoch": 0.97, + "learning_rate": 2.9929489906383443e-10, + "logits/chosen": -3.234623908996582, + "logits/rejected": -3.1191964149475098, + "logps/chosen": -280.2784729003906, + "logps/rejected": -817.4154052734375, + "loss": 0.2798, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4558074474334717, + "rewards/margins": 5.816205024719238, + "rewards/rejected": -4.3603973388671875, + "step": 1516 + }, + { + "epoch": 0.97, + "learning_rate": 2.8811805762860577e-10, + "logits/chosen": -3.1863555908203125, + "logits/rejected": -3.1769728660583496, + "logps/chosen": -298.6859436035156, + "logps/rejected": -674.6365966796875, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8966217041015625, + "rewards/margins": 6.618274211883545, + "rewards/rejected": -4.721652507781982, + "step": 1517 + }, + { + "epoch": 0.97, + "learning_rate": 2.771532970004431e-10, + "logits/chosen": -3.275937080383301, + "logits/rejected": -3.098238229751587, + "logps/chosen": -287.19464111328125, + "logps/rejected": -693.62646484375, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.418786644935608, + "rewards/margins": 4.941459655761719, + "rewards/rejected": -3.5226731300354004, + "step": 1518 + }, + { + "epoch": 0.97, + "learning_rate": 2.664006639572003e-10, + "logits/chosen": -3.1973514556884766, + "logits/rejected": -3.112494945526123, + "logps/chosen": -284.1156005859375, + "logps/rejected": -611.890869140625, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5839524269104004, + "rewards/margins": 5.531794548034668, + "rewards/rejected": -3.947842597961426, + "step": 1519 + }, + { + "epoch": 0.97, + "learning_rate": 2.5586020437175505e-10, + "logits/chosen": -3.2896666526794434, + "logits/rejected": -2.996668815612793, + "logps/chosen": -271.21630859375, + "logps/rejected": -1608.686767578125, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4085006713867188, + "rewards/margins": 8.918861389160156, + "rewards/rejected": -7.5103607177734375, + "step": 1520 + }, + { + "epoch": 0.97, + "learning_rate": 2.455319632118147e-10, + "logits/chosen": -3.2666077613830566, + "logits/rejected": -3.097524404525757, + "logps/chosen": -250.45486450195312, + "logps/rejected": -373.80572509765625, + "loss": 0.2964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.449434757232666, + "rewards/margins": 3.7903666496276855, + "rewards/rejected": -2.3409318923950195, + "step": 1521 + }, + { + "epoch": 0.97, + "learning_rate": 2.354159845397108e-10, + "logits/chosen": -3.2066214084625244, + "logits/rejected": -3.11795711517334, + "logps/chosen": -260.0987854003906, + "logps/rejected": -294.7159729003906, + "loss": 0.2811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3897933959960938, + "rewards/margins": 2.6550612449645996, + "rewards/rejected": -1.2652679681777954, + "step": 1522 + }, + { + "epoch": 0.97, + "learning_rate": 2.255123115122326e-10, + "logits/chosen": -3.2459635734558105, + "logits/rejected": -3.1786351203918457, + "logps/chosen": -233.66233825683594, + "logps/rejected": -700.528076171875, + "loss": 0.2504, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4365463256835938, + "rewards/margins": 5.229097366333008, + "rewards/rejected": -3.792550563812256, + "step": 1523 + }, + { + "epoch": 0.97, + "learning_rate": 2.1582098638042168e-10, + "logits/chosen": -3.25543212890625, + "logits/rejected": -3.1400108337402344, + "logps/chosen": -241.5826873779297, + "logps/rejected": -393.84320068359375, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6388946771621704, + "rewards/margins": 4.167407035827637, + "rewards/rejected": -2.528512477874756, + "step": 1524 + }, + { + "epoch": 0.97, + "learning_rate": 2.0634205048941089e-10, + "logits/chosen": -3.190990924835205, + "logits/rejected": -3.1189427375793457, + "logps/chosen": -240.35008239746094, + "logps/rejected": -936.81201171875, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4204514026641846, + "rewards/margins": 6.7946577072143555, + "rewards/rejected": -5.37420654296875, + "step": 1525 + }, + { + "epoch": 0.97, + "learning_rate": 1.9707554427821904e-10, + "logits/chosen": -3.2729601860046387, + "logits/rejected": -3.052100658416748, + "logps/chosen": -263.03851318359375, + "logps/rejected": -1150.535400390625, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6340408325195312, + "rewards/margins": 6.666590690612793, + "rewards/rejected": -5.032549858093262, + "step": 1526 + }, + { + "epoch": 0.97, + "learning_rate": 1.8802150727962873e-10, + "logits/chosen": -3.252980947494507, + "logits/rejected": -3.200500011444092, + "logps/chosen": -232.7654266357422, + "logps/rejected": -1146.3939208984375, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4094688892364502, + "rewards/margins": 8.259099960327148, + "rewards/rejected": -6.849630355834961, + "step": 1527 + }, + { + "epoch": 0.97, + "learning_rate": 1.7917997811994767e-10, + "logits/chosen": -3.222682237625122, + "logits/rejected": -3.089733362197876, + "logps/chosen": -253.88258361816406, + "logps/rejected": -655.3397827148438, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3805084228515625, + "rewards/margins": 5.194531440734863, + "rewards/rejected": -3.814023017883301, + "step": 1528 + }, + { + "epoch": 0.97, + "learning_rate": 1.7055099451891985e-10, + "logits/chosen": -3.2455224990844727, + "logits/rejected": -3.1141157150268555, + "logps/chosen": -278.4193115234375, + "logps/rejected": -497.26715087890625, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4916138648986816, + "rewards/margins": 4.389585494995117, + "rewards/rejected": -2.8979721069335938, + "step": 1529 + }, + { + "epoch": 0.98, + "learning_rate": 1.6213459328950352e-10, + "logits/chosen": -3.2214536666870117, + "logits/rejected": -3.0955820083618164, + "logps/chosen": -296.225830078125, + "logps/rejected": -664.6341552734375, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.489851474761963, + "rewards/margins": 5.000512599945068, + "rewards/rejected": -3.5106613636016846, + "step": 1530 + }, + { + "epoch": 0.98, + "learning_rate": 1.5393081033774347e-10, + "logits/chosen": -3.22884202003479, + "logits/rejected": -3.18353271484375, + "logps/chosen": -252.43946838378906, + "logps/rejected": -511.8540344238281, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.385810136795044, + "rewards/margins": 3.9941277503967285, + "rewards/rejected": -2.6083176136016846, + "step": 1531 + }, + { + "epoch": 0.98, + "learning_rate": 1.4593968066262118e-10, + "logits/chosen": -3.2378430366516113, + "logits/rejected": -3.1626763343811035, + "logps/chosen": -275.2149353027344, + "logps/rejected": -754.5364379882812, + "loss": 0.2648, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2432677745819092, + "rewards/margins": 5.038330078125, + "rewards/rejected": -3.795062303543091, + "step": 1532 + }, + { + "epoch": 0.98, + "learning_rate": 1.3816123835588833e-10, + "logits/chosen": -3.246687412261963, + "logits/rejected": -3.1258316040039062, + "logps/chosen": -267.4573974609375, + "logps/rejected": -690.9946899414062, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2407844066619873, + "rewards/margins": 5.158585548400879, + "rewards/rejected": -3.9178009033203125, + "step": 1533 + }, + { + "epoch": 0.98, + "learning_rate": 1.3059551660192792e-10, + "logits/chosen": -3.267087936401367, + "logits/rejected": -3.0984103679656982, + "logps/chosen": -263.6104736328125, + "logps/rejected": -290.08624267578125, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6541184186935425, + "rewards/margins": 3.2703590393066406, + "rewards/rejected": -1.6162407398223877, + "step": 1534 + }, + { + "epoch": 0.98, + "learning_rate": 1.232425476776211e-10, + "logits/chosen": -3.2475924491882324, + "logits/rejected": -3.144298791885376, + "logps/chosen": -273.0535888671875, + "logps/rejected": -693.7423706054688, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3985763788223267, + "rewards/margins": 5.1617937088012695, + "rewards/rejected": -3.7632172107696533, + "step": 1535 + }, + { + "epoch": 0.98, + "learning_rate": 1.1610236295220289e-10, + "logits/chosen": -3.2859466075897217, + "logits/rejected": -3.09342360496521, + "logps/chosen": -295.0076904296875, + "logps/rejected": -347.6279296875, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4265350103378296, + "rewards/margins": 3.28240966796875, + "rewards/rejected": -1.8558746576309204, + "step": 1536 + }, + { + "epoch": 0.98, + "learning_rate": 1.0917499288712329e-10, + "logits/chosen": -3.2261455059051514, + "logits/rejected": -3.113769292831421, + "logps/chosen": -272.55364990234375, + "logps/rejected": -897.0953369140625, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4642120599746704, + "rewards/margins": 5.882627964019775, + "rewards/rejected": -4.4184160232543945, + "step": 1537 + }, + { + "epoch": 0.98, + "learning_rate": 1.0246046703592526e-10, + "logits/chosen": -3.2717232704162598, + "logits/rejected": -3.217860221862793, + "logps/chosen": -273.4052734375, + "logps/rejected": -501.5158996582031, + "loss": 0.2848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5212814807891846, + "rewards/margins": 4.844316482543945, + "rewards/rejected": -3.3230347633361816, + "step": 1538 + }, + { + "epoch": 0.98, + "learning_rate": 9.595881404411143e-11, + "logits/chosen": -3.2637274265289307, + "logits/rejected": -3.049621105194092, + "logps/chosen": -243.19772338867188, + "logps/rejected": -1188.8935546875, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3530479669570923, + "rewards/margins": 6.449878692626953, + "rewards/rejected": -5.09683084487915, + "step": 1539 + }, + { + "epoch": 0.98, + "learning_rate": 8.967006164903867e-11, + "logits/chosen": -3.330801486968994, + "logits/rejected": -3.0949974060058594, + "logps/chosen": -275.32318115234375, + "logps/rejected": -420.3628234863281, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5802032947540283, + "rewards/margins": 3.6895034313201904, + "rewards/rejected": -2.109300136566162, + "step": 1540 + }, + { + "epoch": 0.98, + "learning_rate": 8.359423667977927e-11, + "logits/chosen": -3.2353436946868896, + "logits/rejected": -3.145285129547119, + "logps/chosen": -277.17242431640625, + "logps/rejected": -710.7813110351562, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4567031860351562, + "rewards/margins": 5.3717851638793945, + "rewards/rejected": -3.9150819778442383, + "step": 1541 + }, + { + "epoch": 0.98, + "learning_rate": 7.773136505700994e-11, + "logits/chosen": -3.2884631156921387, + "logits/rejected": -3.2140655517578125, + "logps/chosen": -260.4361572265625, + "logps/rejected": -765.7421875, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4355759620666504, + "rewards/margins": 6.265815734863281, + "rewards/rejected": -4.830240249633789, + "step": 1542 + }, + { + "epoch": 0.98, + "learning_rate": 7.20814717929119e-11, + "logits/chosen": -3.278841018676758, + "logits/rejected": -3.100405693054199, + "logps/chosen": -302.9730224609375, + "logps/rejected": -352.51287841796875, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.610975742340088, + "rewards/margins": 3.4320030212402344, + "rewards/rejected": -1.821027398109436, + "step": 1543 + }, + { + "epoch": 0.98, + "learning_rate": 6.66445809910543e-11, + "logits/chosen": -3.303974151611328, + "logits/rejected": -3.0849719047546387, + "logps/chosen": -248.06605529785156, + "logps/rejected": -1002.4603881835938, + "loss": 0.2679, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1485512256622314, + "rewards/margins": 6.160202980041504, + "rewards/rejected": -5.011651992797852, + "step": 1544 + }, + { + "epoch": 0.98, + "learning_rate": 6.142071584630538e-11, + "logits/chosen": -3.227545738220215, + "logits/rejected": -3.2157416343688965, + "logps/chosen": -237.3262176513672, + "logps/rejected": -732.614501953125, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.22003173828125, + "rewards/margins": 5.715933322906494, + "rewards/rejected": -4.495901584625244, + "step": 1545 + }, + { + "epoch": 0.99, + "learning_rate": 5.6409898644710355e-11, + "logits/chosen": -3.2071118354797363, + "logits/rejected": -3.078026533126831, + "logps/chosen": -211.26979064941406, + "logps/rejected": -562.7764892578125, + "loss": 0.2623, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.426914930343628, + "rewards/margins": 4.9198102951049805, + "rewards/rejected": -3.4928956031799316, + "step": 1546 + }, + { + "epoch": 0.99, + "learning_rate": 5.161215076341374e-11, + "logits/chosen": -3.2061471939086914, + "logits/rejected": -3.1013195514678955, + "logps/chosen": -270.0359802246094, + "logps/rejected": -1446.46728515625, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6610504388809204, + "rewards/margins": 8.489957809448242, + "rewards/rejected": -6.828906536102295, + "step": 1547 + }, + { + "epoch": 0.99, + "learning_rate": 4.7027492670576043e-11, + "logits/chosen": -3.1818737983703613, + "logits/rejected": -3.112541913986206, + "logps/chosen": -300.6280517578125, + "logps/rejected": -454.68060302734375, + "loss": 0.2914, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7594208717346191, + "rewards/margins": 4.099761962890625, + "rewards/rejected": -2.340341091156006, + "step": 1548 + }, + { + "epoch": 0.99, + "learning_rate": 4.2655943925251626e-11, + "logits/chosen": -3.240518569946289, + "logits/rejected": -3.1295485496520996, + "logps/chosen": -298.412841796875, + "logps/rejected": -494.28192138671875, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5100387334823608, + "rewards/margins": 3.988539218902588, + "rewards/rejected": -2.4785003662109375, + "step": 1549 + }, + { + "epoch": 0.99, + "learning_rate": 3.849752317734434e-11, + "logits/chosen": -3.267077922821045, + "logits/rejected": -3.0626821517944336, + "logps/chosen": -263.0639953613281, + "logps/rejected": -553.3114013671875, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4285492897033691, + "rewards/margins": 4.976290702819824, + "rewards/rejected": -3.547741651535034, + "step": 1550 + }, + { + "epoch": 0.99, + "learning_rate": 3.455224816750757e-11, + "logits/chosen": -3.261350631713867, + "logits/rejected": -3.1302452087402344, + "logps/chosen": -248.92282104492188, + "logps/rejected": -655.7862548828125, + "loss": 0.2626, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3861335515975952, + "rewards/margins": 5.014387130737305, + "rewards/rejected": -3.628253221511841, + "step": 1551 + }, + { + "epoch": 0.99, + "learning_rate": 3.0820135727066546e-11, + "logits/chosen": -3.262666940689087, + "logits/rejected": -3.1256532669067383, + "logps/chosen": -254.66000366210938, + "logps/rejected": -505.1566162109375, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5962936878204346, + "rewards/margins": 4.704168796539307, + "rewards/rejected": -3.107875108718872, + "step": 1552 + }, + { + "epoch": 0.99, + "learning_rate": 2.7301201777968352e-11, + "logits/chosen": -3.2700400352478027, + "logits/rejected": -3.1591858863830566, + "logps/chosen": -259.09906005859375, + "logps/rejected": -818.0596923828125, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4167404174804688, + "rewards/margins": 6.175297737121582, + "rewards/rejected": -4.758557319641113, + "step": 1553 + }, + { + "epoch": 0.99, + "learning_rate": 2.3995461332676492e-11, + "logits/chosen": -3.2354183197021484, + "logits/rejected": -3.0755715370178223, + "logps/chosen": -287.3794860839844, + "logps/rejected": -611.09814453125, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.246174693107605, + "rewards/margins": 4.059694290161133, + "rewards/rejected": -2.813519239425659, + "step": 1554 + }, + { + "epoch": 0.99, + "learning_rate": 2.0902928494143103e-11, + "logits/chosen": -3.2577569484710693, + "logits/rejected": -3.1698999404907227, + "logps/chosen": -237.67379760742188, + "logps/rejected": -657.27685546875, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5281472206115723, + "rewards/margins": 5.35427188873291, + "rewards/rejected": -3.826124668121338, + "step": 1555 + }, + { + "epoch": 0.99, + "learning_rate": 1.8023616455731248e-11, + "logits/chosen": -3.233638286590576, + "logits/rejected": -3.0751943588256836, + "logps/chosen": -269.190673828125, + "logps/rejected": -894.7037353515625, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.448022484779358, + "rewards/margins": 5.79824686050415, + "rewards/rejected": -4.350224494934082, + "step": 1556 + }, + { + "epoch": 0.99, + "learning_rate": 1.5357537501159422e-11, + "logits/chosen": -3.2978038787841797, + "logits/rejected": -3.1459031105041504, + "logps/chosen": -245.05648803710938, + "logps/rejected": -695.1279296875, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5988892316818237, + "rewards/margins": 5.863227844238281, + "rewards/rejected": -4.264338493347168, + "step": 1557 + }, + { + "epoch": 0.99, + "learning_rate": 1.2904703004451567e-11, + "logits/chosen": -3.211029052734375, + "logits/rejected": -3.1961371898651123, + "logps/chosen": -271.49041748046875, + "logps/rejected": -787.3267211914062, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6088249683380127, + "rewards/margins": 6.258940696716309, + "rewards/rejected": -4.650115966796875, + "step": 1558 + }, + { + "epoch": 0.99, + "learning_rate": 1.0665123429892675e-11, + "logits/chosen": -3.2900993824005127, + "logits/rejected": -3.1327567100524902, + "logps/chosen": -211.9005126953125, + "logps/rejected": -399.8061218261719, + "loss": 0.2676, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4238426685333252, + "rewards/margins": 3.7498116493225098, + "rewards/rejected": -2.3259689807891846, + "step": 1559 + }, + { + "epoch": 0.99, + "learning_rate": 8.63880833197328e-12, + "logits/chosen": -3.2286229133605957, + "logits/rejected": -3.114499568939209, + "logps/chosen": -214.48983764648438, + "logps/rejected": -450.8634033203125, + "loss": 0.2452, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.455052137374878, + "rewards/margins": 3.907672166824341, + "rewards/rejected": -2.452620029449463, + "step": 1560 + }, + { + "epoch": 0.99, + "learning_rate": 6.825766355356144e-12, + "logits/chosen": -3.1987533569335938, + "logits/rejected": -3.031480312347412, + "logps/chosen": -255.11204528808594, + "logps/rejected": -2666.82373046875, + "loss": 0.2823, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2079330682754517, + "rewards/margins": 11.235008239746094, + "rewards/rejected": -10.02707576751709, + "step": 1561 + }, + { + "epoch": 1.0, + "learning_rate": 5.226005234842956e-12, + "logits/chosen": -3.2096142768859863, + "logits/rejected": -3.0549635887145996, + "logps/chosen": -243.41307067871094, + "logps/rejected": -317.17218017578125, + "loss": 0.2777, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4637161493301392, + "rewards/margins": 3.1257071495056152, + "rewards/rejected": -1.6619911193847656, + "step": 1562 + }, + { + "epoch": 1.0, + "learning_rate": 3.839531795335471e-12, + "logits/chosen": -3.2648234367370605, + "logits/rejected": -3.1886138916015625, + "logps/chosen": -241.34353637695312, + "logps/rejected": -616.2481689453125, + "loss": 0.2776, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.367649793624878, + "rewards/margins": 5.193821907043457, + "rewards/rejected": -3.826171875, + "step": 1563 + }, + { + "epoch": 1.0, + "learning_rate": 2.6663519518022038e-12, + "logits/chosen": -3.261533737182617, + "logits/rejected": -3.146815776824951, + "logps/chosen": -208.288330078125, + "logps/rejected": -783.7484741210938, + "loss": 0.2502, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2280967235565186, + "rewards/margins": 5.900756359100342, + "rewards/rejected": -4.672659397125244, + "step": 1564 + }, + { + "epoch": 1.0, + "learning_rate": 1.7064707092617758e-12, + "logits/chosen": -3.2271065711975098, + "logits/rejected": -2.992547035217285, + "logps/chosen": -254.07716369628906, + "logps/rejected": -1489.093994140625, + "loss": 0.269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3836395740509033, + "rewards/margins": 7.539007663726807, + "rewards/rejected": -6.155368328094482, + "step": 1565 + }, + { + "epoch": 1.0, + "learning_rate": 9.598921627607114e-13, + "logits/chosen": -3.2226531505584717, + "logits/rejected": -2.918135643005371, + "logps/chosen": -227.79286193847656, + "logps/rejected": -2193.462890625, + "loss": 0.2694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4411743879318237, + "rewards/margins": 10.937530517578125, + "rewards/rejected": -9.496356964111328, + "step": 1566 + }, + { + "epoch": 1.0, + "learning_rate": 4.266194973567838e-13, + "logits/chosen": -3.2561206817626953, + "logits/rejected": -3.2313899993896484, + "logps/chosen": -269.33245849609375, + "logps/rejected": -963.0504760742188, + "loss": 0.2816, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5193496942520142, + "rewards/margins": 7.930861473083496, + "rewards/rejected": -6.411511421203613, + "step": 1567 + }, + { + "epoch": 1.0, + "learning_rate": 1.0665498809125928e-13, + "logits/chosen": -3.242306709289551, + "logits/rejected": -3.1190357208251953, + "logps/chosen": -250.67996215820312, + "logps/rejected": -804.0322265625, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6941803693771362, + "rewards/margins": 6.422581672668457, + "rewards/rejected": -4.728401184082031, + "step": 1568 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -3.2550458908081055, + "logits/rejected": -3.188502073287964, + "logps/chosen": -261.6180419921875, + "logps/rejected": -458.85205078125, + "loss": 0.269, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6236786842346191, + "rewards/margins": 4.3691511154174805, + "rewards/rejected": -2.7454726696014404, + "step": 1569 + }, + { + "epoch": 1.0, + "step": 1569, + "total_flos": 0.0, + "train_loss": 0.3536820619291679, + "train_runtime": 3744.6669, + "train_samples_per_second": 3.352, + "train_steps_per_second": 0.419 + } + ], + "logging_steps": 1.0, + "max_steps": 1569, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5de4729f207cc279c77ae3ce459bc5923fe0239 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4466b234fae03ef51fdfb54f8744d57ac243773a --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde28b34ecae115c82dcec86eda8b6e9af34200249bfeea3ad9b8779760322a8 +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ae7ea4bdea494283c0825470b9f1b970a1f5e2fc --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora/trainer_state.json @@ -0,0 +1,11006 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9993626513702996, + "eval_steps": 500, + "global_step": 784, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.166666666666666e-09, + "logits/chosen": -3.2160720825195312, + "logits/rejected": -3.116873264312744, + "logps/chosen": -228.813232421875, + "logps/rejected": -513.1785888671875, + "loss": 0.9784, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.038027193397283554, + "rewards/margins": -0.09404220432043076, + "rewards/rejected": 0.0560150146484375, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -3.262388229370117, + "logits/rejected": -3.188769817352295, + "logps/chosen": -260.372314453125, + "logps/rejected": -616.3881225585938, + "loss": 0.9729, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.016930390149354935, + "rewards/margins": 0.035233307629823685, + "rewards/rejected": -0.05216369777917862, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-08, + "logits/chosen": -3.1628284454345703, + "logits/rejected": -3.056243896484375, + "logps/chosen": -303.4222412109375, + "logps/rejected": -502.05816650390625, + "loss": 0.9592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00014343298971652985, + "rewards/margins": 0.04321899265050888, + "rewards/rejected": -0.0430755615234375, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -3.1565537452697754, + "logits/rejected": -3.0980849266052246, + "logps/chosen": -300.78948974609375, + "logps/rejected": -648.1358032226562, + "loss": 1.0334, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04700469970703125, + "rewards/margins": -0.02518768236041069, + "rewards/rejected": -0.02181701734662056, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333335e-08, + "logits/chosen": -3.233736991882324, + "logits/rejected": -3.1037347316741943, + "logps/chosen": -279.86785888671875, + "logps/rejected": -546.0887451171875, + "loss": 0.9777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02123718336224556, + "rewards/margins": 0.10942230373620987, + "rewards/rejected": -0.08818511664867401, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-08, + "logits/chosen": -3.1711623668670654, + "logits/rejected": -3.0330991744995117, + "logps/chosen": -270.4840087890625, + "logps/rejected": -458.84625244140625, + "loss": 0.9771, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.012899779714643955, + "rewards/margins": -0.01574249193072319, + "rewards/rejected": 0.02864227257668972, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.9166666666666666e-08, + "logits/chosen": -3.1388931274414062, + "logits/rejected": -3.0820374488830566, + "logps/chosen": -315.3365478515625, + "logps/rejected": -437.7580871582031, + "loss": 0.9932, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0290679931640625, + "rewards/margins": -0.09704285115003586, + "rewards/rejected": 0.06797485053539276, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -3.2251648902893066, + "logits/rejected": -3.07142972946167, + "logps/chosen": -283.0133056640625, + "logps/rejected": -554.4439086914062, + "loss": 1.0006, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02800140529870987, + "rewards/margins": 0.002264399081468582, + "rewards/rejected": -0.03026580810546875, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 3.75e-08, + "logits/chosen": -3.153505802154541, + "logits/rejected": -3.1210711002349854, + "logps/chosen": -308.5958557128906, + "logps/rejected": -500.2611389160156, + "loss": 0.9904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013346862979233265, + "rewards/margins": 0.06808013468980789, + "rewards/rejected": -0.0547332763671875, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -3.177821636199951, + "logits/rejected": -3.117086410522461, + "logps/chosen": -274.899658203125, + "logps/rejected": -719.2900390625, + "loss": 0.9871, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01338501088321209, + "rewards/margins": 0.03161315247416496, + "rewards/rejected": -0.0449981652200222, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.583333333333333e-08, + "logits/chosen": -3.211191177368164, + "logits/rejected": -3.042813539505005, + "logps/chosen": -242.435791015625, + "logps/rejected": -992.1142578125, + "loss": 0.9606, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.008935547433793545, + "rewards/margins": 0.08786620944738388, + "rewards/rejected": -0.0968017578125, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 5e-08, + "logits/chosen": -3.214766502380371, + "logits/rejected": -3.0631484985351562, + "logps/chosen": -254.51382446289062, + "logps/rejected": -437.329345703125, + "loss": 0.9829, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011109163984656334, + "rewards/margins": -0.03873825445771217, + "rewards/rejected": 0.02762908861041069, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 5.416666666666666e-08, + "logits/chosen": -3.2042415142059326, + "logits/rejected": -3.075075149536133, + "logps/chosen": -250.3650665283203, + "logps/rejected": -528.236328125, + "loss": 0.9554, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00742645189166069, + "rewards/margins": 0.07012481987476349, + "rewards/rejected": -0.07755126804113388, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -3.2371461391448975, + "logits/rejected": -3.0458128452301025, + "logps/chosen": -251.41680908203125, + "logps/rejected": -768.4645385742188, + "loss": 0.9795, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.00521011371165514, + "rewards/margins": -0.018463894724845886, + "rewards/rejected": 0.01325378380715847, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 6.25e-08, + "logits/chosen": -3.172666549682617, + "logits/rejected": -3.062541961669922, + "logps/chosen": -320.47900390625, + "logps/rejected": -499.32952880859375, + "loss": 0.9498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03406829759478569, + "rewards/margins": 0.07825469970703125, + "rewards/rejected": -0.04418640211224556, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -3.18975567817688, + "logits/rejected": -3.0830211639404297, + "logps/chosen": -261.47515869140625, + "logps/rejected": -541.064208984375, + "loss": 0.9867, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.00612640380859375, + "rewards/margins": -0.06964416801929474, + "rewards/rejected": 0.07577057182788849, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 7.083333333333334e-08, + "logits/chosen": -3.1648292541503906, + "logits/rejected": -3.092946767807007, + "logps/chosen": -286.6661376953125, + "logps/rejected": -1414.825439453125, + "loss": 0.9973, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05277862772345543, + "rewards/margins": -0.04539337009191513, + "rewards/rejected": -0.007385254837572575, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-08, + "logits/chosen": -3.156536340713501, + "logits/rejected": -2.97780442237854, + "logps/chosen": -279.8757629394531, + "logps/rejected": -1318.1156005859375, + "loss": 0.9056, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.007740020751953125, + "rewards/margins": 0.053789518773555756, + "rewards/rejected": -0.04604949802160263, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 7.916666666666665e-08, + "logits/chosen": -3.110651969909668, + "logits/rejected": -3.0994374752044678, + "logps/chosen": -305.49493408203125, + "logps/rejected": -656.1889038085938, + "loss": 0.9731, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00022277841344475746, + "rewards/margins": 0.06550445407629013, + "rewards/rejected": -0.06528167426586151, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -3.1836562156677246, + "logits/rejected": -3.1436734199523926, + "logps/chosen": -264.1539306640625, + "logps/rejected": -442.2093200683594, + "loss": 0.9736, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04139404371380806, + "rewards/margins": 0.02370605617761612, + "rewards/rejected": 0.01768798753619194, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 8.75e-08, + "logits/chosen": -3.1375787258148193, + "logits/rejected": -3.1219282150268555, + "logps/chosen": -267.9791564941406, + "logps/rejected": -444.4863586425781, + "loss": 0.9417, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.011225891299545765, + "rewards/margins": 0.03378906100988388, + "rewards/rejected": -0.04501495510339737, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -3.215904712677002, + "logits/rejected": -3.0797348022460938, + "logps/chosen": -272.66815185546875, + "logps/rejected": -631.1685791015625, + "loss": 0.9528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061243437230587006, + "rewards/margins": 0.1032203659415245, + "rewards/rejected": -0.0419769287109375, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 9.583333333333334e-08, + "logits/chosen": -3.229501724243164, + "logits/rejected": -3.065044403076172, + "logps/chosen": -261.38885498046875, + "logps/rejected": -1408.642578125, + "loss": 0.9364, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0032333373092114925, + "rewards/margins": -0.03120880201458931, + "rewards/rejected": 0.03444213792681694, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -3.2050721645355225, + "logits/rejected": -3.148730516433716, + "logps/chosen": -255.07168579101562, + "logps/rejected": -559.8580932617188, + "loss": 0.9628, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.048764802515506744, + "rewards/margins": -0.11533431708812714, + "rewards/rejected": 0.06656952202320099, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 9.999957281897734e-08, + "logits/chosen": -3.2020888328552246, + "logits/rejected": -3.1628832817077637, + "logps/chosen": -255.50250244140625, + "logps/rejected": -526.62890625, + "loss": 0.9401, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02172393910586834, + "rewards/margins": -0.01593322679400444, + "rewards/rejected": 0.03765716776251793, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 9.999829128320872e-08, + "logits/chosen": -3.1430835723876953, + "logits/rejected": -3.024179458618164, + "logps/chosen": -286.6328430175781, + "logps/rejected": -1003.1669921875, + "loss": 0.9446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014549256302416325, + "rewards/margins": 0.11952667683362961, + "rewards/rejected": -0.10497741401195526, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 9.999615541459205e-08, + "logits/chosen": -3.1040163040161133, + "logits/rejected": -3.0408525466918945, + "logps/chosen": -267.335693359375, + "logps/rejected": -483.9801025390625, + "loss": 0.9302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00772247277200222, + "rewards/margins": 0.06426391750574112, + "rewards/rejected": -0.05654144287109375, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 9.999316524962345e-08, + "logits/chosen": -3.1886560916900635, + "logits/rejected": -3.0970265865325928, + "logps/chosen": -318.0135498046875, + "logps/rejected": -503.2415466308594, + "loss": 0.9377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.004170226864516735, + "rewards/margins": 0.08445052802562714, + "rewards/rejected": -0.08028030395507812, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 9.998932083939655e-08, + "logits/chosen": -3.1369075775146484, + "logits/rejected": -3.071481227874756, + "logps/chosen": -281.07855224609375, + "logps/rejected": -327.6930236816406, + "loss": 0.9593, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.04752349853515625, + "rewards/margins": -0.041417695581912994, + "rewards/rejected": -0.0061058043502271175, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 9.998462224960173e-08, + "logits/chosen": -3.1423468589782715, + "logits/rejected": -3.120021343231201, + "logps/chosen": -252.58253479003906, + "logps/rejected": -481.6125183105469, + "loss": 0.9125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01428680494427681, + "rewards/margins": 0.08368378132581711, + "rewards/rejected": -0.06939697265625, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 9.997906956052494e-08, + "logits/chosen": -3.1977224349975586, + "logits/rejected": -3.099289894104004, + "logps/chosen": -277.295654296875, + "logps/rejected": -456.43896484375, + "loss": 0.8903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014601899310946465, + "rewards/margins": 0.10611648857593536, + "rewards/rejected": -0.09151458740234375, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 9.99726628670463e-08, + "logits/chosen": -3.1856377124786377, + "logits/rejected": -3.1025264263153076, + "logps/chosen": -257.6718444824219, + "logps/rejected": -303.3497009277344, + "loss": 0.9413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04535675048828125, + "rewards/margins": 0.07125091552734375, + "rewards/rejected": -0.0258941650390625, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 9.996540227863853e-08, + "logits/chosen": -3.2358977794647217, + "logits/rejected": -3.1445164680480957, + "logps/chosen": -273.57177734375, + "logps/rejected": -592.3344116210938, + "loss": 0.9001, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03141326829791069, + "rewards/margins": 0.11333923786878586, + "rewards/rejected": -0.08192596584558487, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 9.995728791936504e-08, + "logits/chosen": -3.16359806060791, + "logits/rejected": -3.1609606742858887, + "logps/chosen": -268.3714599609375, + "logps/rejected": -911.5850830078125, + "loss": 0.885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0166168212890625, + "rewards/margins": 0.31348878145217896, + "rewards/rejected": -0.29687196016311646, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 9.994831992787787e-08, + "logits/chosen": -3.1417484283447266, + "logits/rejected": -3.1001663208007812, + "logps/chosen": -339.365234375, + "logps/rejected": -1250.6190185546875, + "loss": 0.895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15254363417625427, + "rewards/margins": 0.5067947506904602, + "rewards/rejected": -0.35425108671188354, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 9.993849845741523e-08, + "logits/chosen": -3.1884336471557617, + "logits/rejected": -3.0462398529052734, + "logps/chosen": -300.37860107421875, + "logps/rejected": -512.791015625, + "loss": 0.8767, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03133697435259819, + "rewards/margins": 0.09881286323070526, + "rewards/rejected": -0.13014984130859375, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 9.992782367579898e-08, + "logits/chosen": -3.2259180545806885, + "logits/rejected": -3.1259334087371826, + "logps/chosen": -289.2379150390625, + "logps/rejected": -1005.3299560546875, + "loss": 0.8184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03432159498333931, + "rewards/margins": 0.3717696964740753, + "rewards/rejected": -0.4060913026332855, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 9.991629576543162e-08, + "logits/chosen": -3.2453582286834717, + "logits/rejected": -3.1148874759674072, + "logps/chosen": -227.9901123046875, + "logps/rejected": -466.27117919921875, + "loss": 0.8538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05539245903491974, + "rewards/margins": 0.15534821152687073, + "rewards/rejected": -0.09995575249195099, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 9.99039149232934e-08, + "logits/chosen": -3.1385059356689453, + "logits/rejected": -3.0952847003936768, + "logps/chosen": -259.82342529296875, + "logps/rejected": -329.1350402832031, + "loss": 0.8574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.058658599853515625, + "rewards/margins": 0.057256314903497696, + "rewards/rejected": 0.0014022821560502052, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 9.989068136093872e-08, + "logits/chosen": -3.1561312675476074, + "logits/rejected": -3.064603090286255, + "logps/chosen": -281.70965576171875, + "logps/rejected": -419.3515319824219, + "loss": 0.8512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03509826585650444, + "rewards/margins": 0.15843963623046875, + "rewards/rejected": -0.12334136664867401, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 9.987659530449266e-08, + "logits/chosen": -3.173116445541382, + "logits/rejected": -3.057656764984131, + "logps/chosen": -305.4443359375, + "logps/rejected": -478.31756591796875, + "loss": 0.8981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01899414137005806, + "rewards/margins": 0.10671082139015198, + "rewards/rejected": -0.08771667629480362, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 9.986165699464705e-08, + "logits/chosen": -3.1859121322631836, + "logits/rejected": -3.119231939315796, + "logps/chosen": -280.0865478515625, + "logps/rejected": -504.4825744628906, + "loss": 0.8589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08683013916015625, + "rewards/margins": 0.30463409423828125, + "rewards/rejected": -0.217803955078125, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 9.98458666866564e-08, + "logits/chosen": -3.2702674865722656, + "logits/rejected": -3.1558704376220703, + "logps/chosen": -271.4366760253906, + "logps/rejected": -978.3760986328125, + "loss": 0.8589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0031890869140625, + "rewards/margins": 0.40606689453125, + "rewards/rejected": -0.4028778076171875, + "step": 43 + }, + { + "epoch": 0.06, + "learning_rate": 9.982922465033349e-08, + "logits/chosen": -3.196592330932617, + "logits/rejected": -3.1011955738067627, + "logps/chosen": -277.72369384765625, + "logps/rejected": -746.8526611328125, + "loss": 0.8185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06383819878101349, + "rewards/margins": 0.2836105227470398, + "rewards/rejected": -0.2197723388671875, + "step": 44 + }, + { + "epoch": 0.06, + "learning_rate": 9.981173117004483e-08, + "logits/chosen": -3.1592037677764893, + "logits/rejected": -2.952601909637451, + "logps/chosen": -280.6240234375, + "logps/rejected": -875.4172973632812, + "loss": 0.8186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06334991753101349, + "rewards/margins": 0.3580154478549957, + "rewards/rejected": -0.29466551542282104, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 9.979338654470568e-08, + "logits/chosen": -3.124558448791504, + "logits/rejected": -3.0982770919799805, + "logps/chosen": -261.60638427734375, + "logps/rejected": -443.8964538574219, + "loss": 0.8715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08426438271999359, + "rewards/margins": 0.25358352065086365, + "rewards/rejected": -0.16931915283203125, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 9.977419108777513e-08, + "logits/chosen": -3.208319902420044, + "logits/rejected": -3.006890296936035, + "logps/chosen": -277.5843200683594, + "logps/rejected": -869.0111083984375, + "loss": 0.7701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035826873034238815, + "rewards/margins": 0.45332109928131104, + "rewards/rejected": -0.4174942374229431, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 9.975414512725057e-08, + "logits/chosen": -3.179723024368286, + "logits/rejected": -3.1317391395568848, + "logps/chosen": -272.15838623046875, + "logps/rejected": -589.74560546875, + "loss": 0.8217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023329924792051315, + "rewards/margins": 0.2633781433105469, + "rewards/rejected": -0.24004822969436646, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 9.973324900566212e-08, + "logits/chosen": -3.2586569786071777, + "logits/rejected": -3.1257131099700928, + "logps/chosen": -287.736083984375, + "logps/rejected": -760.92578125, + "loss": 0.8375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11028137803077698, + "rewards/margins": 0.46541139483451843, + "rewards/rejected": -0.35513001680374146, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 9.971150308006688e-08, + "logits/chosen": -3.196011543273926, + "logits/rejected": -3.0792903900146484, + "logps/chosen": -281.2085876464844, + "logps/rejected": -735.2090454101562, + "loss": 0.839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028815461322665215, + "rewards/margins": 0.36580580472946167, + "rewards/rejected": -0.3369903564453125, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 9.968890772204271e-08, + "logits/chosen": -3.2005913257598877, + "logits/rejected": -3.1766748428344727, + "logps/chosen": -318.02313232421875, + "logps/rejected": -713.7093505859375, + "loss": 0.7787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03105621412396431, + "rewards/margins": 0.43599700927734375, + "rewards/rejected": -0.40494078397750854, + "step": 51 + }, + { + "epoch": 0.07, + "learning_rate": 9.96654633176819e-08, + "logits/chosen": -3.124014377593994, + "logits/rejected": -3.0402297973632812, + "logps/chosen": -248.47860717773438, + "logps/rejected": -541.86474609375, + "loss": 0.8169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06288605183362961, + "rewards/margins": 0.37468263506889343, + "rewards/rejected": -0.3117965757846832, + "step": 52 + }, + { + "epoch": 0.07, + "learning_rate": 9.964117026758469e-08, + "logits/chosen": -3.194836139678955, + "logits/rejected": -2.9955854415893555, + "logps/chosen": -292.162841796875, + "logps/rejected": -930.6184692382812, + "loss": 0.8002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.035115815699100494, + "rewards/margins": 0.5283164978027344, + "rewards/rejected": -0.49320071935653687, + "step": 53 + }, + { + "epoch": 0.07, + "learning_rate": 9.961602898685224e-08, + "logits/chosen": -3.179006814956665, + "logits/rejected": -3.062976837158203, + "logps/chosen": -265.8880615234375, + "logps/rejected": -803.6100463867188, + "loss": 0.7594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08634338527917862, + "rewards/margins": 0.5654022693634033, + "rewards/rejected": -0.4790588617324829, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 9.959003990507971e-08, + "logits/chosen": -3.204099655151367, + "logits/rejected": -3.026198387145996, + "logps/chosen": -305.41717529296875, + "logps/rejected": -922.2069091796875, + "loss": 0.7865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04644470289349556, + "rewards/margins": 0.586712658405304, + "rewards/rejected": -0.5402679443359375, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 9.956320346634876e-08, + "logits/chosen": -3.188807725906372, + "logits/rejected": -3.0674996376037598, + "logps/chosen": -276.7745361328125, + "logps/rejected": -391.873779296875, + "loss": 0.7347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.047023773193359375, + "rewards/margins": 0.2781227231025696, + "rewards/rejected": -0.2310989499092102, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 9.953552012922011e-08, + "logits/chosen": -3.1956052780151367, + "logits/rejected": -3.0323009490966797, + "logps/chosen": -267.902099609375, + "logps/rejected": -338.7499084472656, + "loss": 0.7685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005396271124482155, + "rewards/margins": 0.1857139617204666, + "rewards/rejected": -0.19111022353172302, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 9.950699036672558e-08, + "logits/chosen": -3.2637784481048584, + "logits/rejected": -3.1500515937805176, + "logps/chosen": -282.23162841796875, + "logps/rejected": -627.0332641601562, + "loss": 0.7286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09574432671070099, + "rewards/margins": 0.41945040225982666, + "rewards/rejected": -0.3237060606479645, + "step": 58 + }, + { + "epoch": 0.08, + "learning_rate": 9.947761466636013e-08, + "logits/chosen": -3.2019500732421875, + "logits/rejected": -3.0869851112365723, + "logps/chosen": -284.5959167480469, + "logps/rejected": -907.2308349609375, + "loss": 0.7037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02913971059024334, + "rewards/margins": 0.7361831665039062, + "rewards/rejected": -0.707043468952179, + "step": 59 + }, + { + "epoch": 0.08, + "learning_rate": 9.944739353007342e-08, + "logits/chosen": -3.2203369140625, + "logits/rejected": -3.0183143615722656, + "logps/chosen": -251.86341857910156, + "logps/rejected": -521.030029296875, + "loss": 0.7459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0645240768790245, + "rewards/margins": 0.4045204222202301, + "rewards/rejected": -0.339996337890625, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 9.941632747426128e-08, + "logits/chosen": -3.2086873054504395, + "logits/rejected": -3.030205726623535, + "logps/chosen": -278.8790283203125, + "logps/rejected": -1463.236083984375, + "loss": 0.7414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07151641696691513, + "rewards/margins": 1.0069442987442017, + "rewards/rejected": -0.9354279041290283, + "step": 61 + }, + { + "epoch": 0.08, + "learning_rate": 9.938441702975688e-08, + "logits/chosen": -3.178344249725342, + "logits/rejected": -3.1531031131744385, + "logps/chosen": -293.93548583984375, + "logps/rejected": -775.3367309570312, + "loss": 0.7691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10590972751379013, + "rewards/margins": 0.7144302129745483, + "rewards/rejected": -0.6085205078125, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 9.93516627418217e-08, + "logits/chosen": -3.1838574409484863, + "logits/rejected": -3.091536521911621, + "logps/chosen": -320.013671875, + "logps/rejected": -787.9466552734375, + "loss": 0.7387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08702392876148224, + "rewards/margins": 0.6274597644805908, + "rewards/rejected": -0.540435791015625, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 9.931806517013611e-08, + "logits/chosen": -3.1866564750671387, + "logits/rejected": -3.1660008430480957, + "logps/chosen": -279.20294189453125, + "logps/rejected": -745.2772216796875, + "loss": 0.6288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11458739638328552, + "rewards/margins": 0.7074187994003296, + "rewards/rejected": -0.5928314328193665, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 9.928362488878995e-08, + "logits/chosen": -3.272548198699951, + "logits/rejected": -3.114353656768799, + "logps/chosen": -284.57452392578125, + "logps/rejected": -1422.476318359375, + "loss": 0.7094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12553711235523224, + "rewards/margins": 1.1294617652893066, + "rewards/rejected": -1.0039246082305908, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 9.924834248627259e-08, + "logits/chosen": -3.177539825439453, + "logits/rejected": -3.1086883544921875, + "logps/chosen": -265.3809814453125, + "logps/rejected": -739.52978515625, + "loss": 0.6922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13242264091968536, + "rewards/margins": 0.7532844543457031, + "rewards/rejected": -0.620861828327179, + "step": 66 + }, + { + "epoch": 0.09, + "learning_rate": 9.921221856546293e-08, + "logits/chosen": -3.179316520690918, + "logits/rejected": -3.085636615753174, + "logps/chosen": -282.3909606933594, + "logps/rejected": -294.0172119140625, + "loss": 0.7687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1426590085029602, + "rewards/margins": 0.30135345458984375, + "rewards/rejected": -0.15869446098804474, + "step": 67 + }, + { + "epoch": 0.09, + "learning_rate": 9.917525374361911e-08, + "logits/chosen": -3.174375534057617, + "logits/rejected": -3.141751527786255, + "logps/chosen": -305.6304626464844, + "logps/rejected": -541.6306762695312, + "loss": 0.7296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03631897270679474, + "rewards/margins": 0.5604614019393921, + "rewards/rejected": -0.5241425037384033, + "step": 68 + }, + { + "epoch": 0.09, + "learning_rate": 9.913744865236797e-08, + "logits/chosen": -3.228339672088623, + "logits/rejected": -3.1125965118408203, + "logps/chosen": -284.9007568359375, + "logps/rejected": -512.6011352539062, + "loss": 0.7277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09803162515163422, + "rewards/margins": 0.5360382199287415, + "rewards/rejected": -0.43800660967826843, + "step": 69 + }, + { + "epoch": 0.09, + "learning_rate": 9.909880393769419e-08, + "logits/chosen": -3.2147321701049805, + "logits/rejected": -3.091953754425049, + "logps/chosen": -271.5090026855469, + "logps/rejected": -830.239990234375, + "loss": 0.6879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09206847846508026, + "rewards/margins": 0.902496337890625, + "rewards/rejected": -0.8104278445243835, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 9.905932025992931e-08, + "logits/chosen": -3.1389129161834717, + "logits/rejected": -3.0822839736938477, + "logps/chosen": -311.4069519042969, + "logps/rejected": -560.31005859375, + "loss": 0.6805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07468262314796448, + "rewards/margins": 0.4990326166152954, + "rewards/rejected": -0.42434996366500854, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 9.901899829374047e-08, + "logits/chosen": -3.126861095428467, + "logits/rejected": -3.1229891777038574, + "logps/chosen": -310.8626403808594, + "logps/rejected": -619.2788696289062, + "loss": 0.7015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08417053520679474, + "rewards/margins": 0.5766845941543579, + "rewards/rejected": -0.49251407384872437, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 9.89778387281188e-08, + "logits/chosen": -3.264277696609497, + "logits/rejected": -2.995083808898926, + "logps/chosen": -266.6864318847656, + "logps/rejected": -1061.7960205078125, + "loss": 0.6594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1406913697719574, + "rewards/margins": 1.0569511651992798, + "rewards/rejected": -0.916259765625, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 9.893584226636772e-08, + "logits/chosen": -3.2731590270996094, + "logits/rejected": -3.080470085144043, + "logps/chosen": -269.9696044921875, + "logps/rejected": -1106.810546875, + "loss": 0.7458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12290649116039276, + "rewards/margins": 0.8437134027481079, + "rewards/rejected": -0.720806896686554, + "step": 74 + }, + { + "epoch": 0.1, + "learning_rate": 9.889300962609089e-08, + "logits/chosen": -3.2486886978149414, + "logits/rejected": -3.1838159561157227, + "logps/chosen": -242.00576782226562, + "logps/rejected": -809.3795166015625, + "loss": 0.6506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19307251274585724, + "rewards/margins": 0.9557251334190369, + "rewards/rejected": -0.762652575969696, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 9.884934153917997e-08, + "logits/chosen": -3.22540283203125, + "logits/rejected": -3.1572141647338867, + "logps/chosen": -269.60467529296875, + "logps/rejected": -585.8292236328125, + "loss": 0.7259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.142242431640625, + "rewards/margins": 0.6158202886581421, + "rewards/rejected": -0.4735778868198395, + "step": 76 + }, + { + "epoch": 0.1, + "learning_rate": 9.880483875180204e-08, + "logits/chosen": -3.2093470096588135, + "logits/rejected": -3.118375778198242, + "logps/chosen": -279.4237365722656, + "logps/rejected": -525.1126098632812, + "loss": 0.7413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13385391235351562, + "rewards/margins": 0.5230598449707031, + "rewards/rejected": -0.3892059624195099, + "step": 77 + }, + { + "epoch": 0.1, + "learning_rate": 9.875950202438699e-08, + "logits/chosen": -3.254056453704834, + "logits/rejected": -3.1082019805908203, + "logps/chosen": -252.6153564453125, + "logps/rejected": -577.9073486328125, + "loss": 0.6466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19097672402858734, + "rewards/margins": 0.7179176807403564, + "rewards/rejected": -0.5269409418106079, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 9.871333213161437e-08, + "logits/chosen": -3.1728591918945312, + "logits/rejected": -3.081968307495117, + "logps/chosen": -271.90020751953125, + "logps/rejected": -556.3427734375, + "loss": 0.7151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08932800590991974, + "rewards/margins": 0.7096390128135681, + "rewards/rejected": -0.6203109622001648, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 9.866632986240029e-08, + "logits/chosen": -3.2357606887817383, + "logits/rejected": -3.134946823120117, + "logps/chosen": -259.5243225097656, + "logps/rejected": -1609.33154296875, + "loss": 0.6444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16226348280906677, + "rewards/margins": 1.65581214427948, + "rewards/rejected": -1.4935486316680908, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 9.861849601988382e-08, + "logits/chosen": -3.205221652984619, + "logits/rejected": -3.0942211151123047, + "logps/chosen": -314.8915710449219, + "logps/rejected": -1069.901611328125, + "loss": 0.6569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09865418076515198, + "rewards/margins": 1.103912353515625, + "rewards/rejected": -1.0052582025527954, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 9.856983142141337e-08, + "logits/chosen": -3.2025558948516846, + "logits/rejected": -3.1110525131225586, + "logps/chosen": -280.8265075683594, + "logps/rejected": -445.7962951660156, + "loss": 0.6374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.172535702586174, + "rewards/margins": 0.5844039916992188, + "rewards/rejected": -0.41186827421188354, + "step": 82 + }, + { + "epoch": 0.11, + "learning_rate": 9.852033689853267e-08, + "logits/chosen": -3.2400825023651123, + "logits/rejected": -3.030686855316162, + "logps/chosen": -284.1300354003906, + "logps/rejected": -1529.8499755859375, + "loss": 0.6111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09761352837085724, + "rewards/margins": 1.4114012718200684, + "rewards/rejected": -1.313787817955017, + "step": 83 + }, + { + "epoch": 0.11, + "learning_rate": 9.847001329696651e-08, + "logits/chosen": -3.1684911251068115, + "logits/rejected": -2.937425136566162, + "logps/chosen": -279.3697509765625, + "logps/rejected": -1059.4603271484375, + "loss": 0.6568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13714447617530823, + "rewards/margins": 1.0777053833007812, + "rewards/rejected": -0.9405609369277954, + "step": 84 + }, + { + "epoch": 0.11, + "learning_rate": 9.841886147660644e-08, + "logits/chosen": -3.1631908416748047, + "logits/rejected": -3.0938401222229004, + "logps/chosen": -234.89324951171875, + "logps/rejected": -448.7402038574219, + "loss": 0.7365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.079320527613163, + "rewards/margins": 0.5594200491905212, + "rewards/rejected": -0.48009949922561646, + "step": 85 + }, + { + "epoch": 0.11, + "learning_rate": 9.836688231149591e-08, + "logits/chosen": -3.315497875213623, + "logits/rejected": -3.142397403717041, + "logps/chosen": -230.982666015625, + "logps/rejected": -789.46142578125, + "loss": 0.6017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23948365449905396, + "rewards/margins": 1.1359329223632812, + "rewards/rejected": -0.8964492678642273, + "step": 86 + }, + { + "epoch": 0.11, + "learning_rate": 9.831407668981545e-08, + "logits/chosen": -3.244413375854492, + "logits/rejected": -3.00750470161438, + "logps/chosen": -287.3304748535156, + "logps/rejected": -1963.118408203125, + "loss": 0.6052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11360473930835724, + "rewards/margins": 1.9815430641174316, + "rewards/rejected": -1.8679382801055908, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 9.826044551386743e-08, + "logits/chosen": -3.2289395332336426, + "logits/rejected": -3.0343246459960938, + "logps/chosen": -219.81251525878906, + "logps/rejected": -382.386962890625, + "loss": 0.6199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1276039183139801, + "rewards/margins": 0.4951835870742798, + "rewards/rejected": -0.3675796687602997, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 9.820598970006067e-08, + "logits/chosen": -3.178089141845703, + "logits/rejected": -3.055624008178711, + "logps/chosen": -325.2677917480469, + "logps/rejected": -617.375732421875, + "loss": 0.6918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11183929443359375, + "rewards/margins": 0.7589950561523438, + "rewards/rejected": -0.64715576171875, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 9.81507101788948e-08, + "logits/chosen": -3.167525291442871, + "logits/rejected": -3.0772173404693604, + "logps/chosen": -260.0639953613281, + "logps/rejected": -1074.713134765625, + "loss": 0.6344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17578965425491333, + "rewards/margins": 1.1272591352462769, + "rewards/rejected": -0.9514694213867188, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 9.80946078949443e-08, + "logits/chosen": -3.1669793128967285, + "logits/rejected": -3.087132453918457, + "logps/chosen": -248.36822509765625, + "logps/rejected": -616.6222534179688, + "loss": 0.6035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2073814570903778, + "rewards/margins": 0.8807670474052429, + "rewards/rejected": -0.6733856201171875, + "step": 91 + }, + { + "epoch": 0.12, + "learning_rate": 9.803768380684241e-08, + "logits/chosen": -3.208198070526123, + "logits/rejected": -3.1501595973968506, + "logps/chosen": -259.35968017578125, + "logps/rejected": -512.9354858398438, + "loss": 0.6837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0895385816693306, + "rewards/margins": 0.784515380859375, + "rewards/rejected": -0.694976806640625, + "step": 92 + }, + { + "epoch": 0.12, + "learning_rate": 9.797993888726472e-08, + "logits/chosen": -3.197451591491699, + "logits/rejected": -3.084780216217041, + "logps/chosen": -256.2140808105469, + "logps/rejected": -645.968505859375, + "loss": 0.6252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20065385103225708, + "rewards/margins": 1.083905816078186, + "rewards/rejected": -0.883251965045929, + "step": 93 + }, + { + "epoch": 0.12, + "learning_rate": 9.792137412291263e-08, + "logits/chosen": -3.2116434574127197, + "logits/rejected": -3.0890889167785645, + "logps/chosen": -252.74905395507812, + "logps/rejected": -352.84375, + "loss": 0.711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14729461073875427, + "rewards/margins": 0.47517701983451843, + "rewards/rejected": -0.32788240909576416, + "step": 94 + }, + { + "epoch": 0.12, + "learning_rate": 9.786199051449635e-08, + "logits/chosen": -3.1472105979919434, + "logits/rejected": -3.078524589538574, + "logps/chosen": -282.37310791015625, + "logps/rejected": -483.4310607910156, + "loss": 0.5944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16697081923484802, + "rewards/margins": 0.6734917163848877, + "rewards/rejected": -0.5065208673477173, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 9.780178907671787e-08, + "logits/chosen": -3.2094972133636475, + "logits/rejected": -3.066826820373535, + "logps/chosen": -269.3948974609375, + "logps/rejected": -603.6390380859375, + "loss": 0.6983, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21273957192897797, + "rewards/margins": 0.9186508655548096, + "rewards/rejected": -0.7059112787246704, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 9.774077083825372e-08, + "logits/chosen": -3.2306981086730957, + "logits/rejected": -3.064549446105957, + "logps/chosen": -266.2947692871094, + "logps/rejected": -461.3046875, + "loss": 0.6204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23298493027687073, + "rewards/margins": 0.7461960315704346, + "rewards/rejected": -0.5132110714912415, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 9.767893684173721e-08, + "logits/chosen": -3.1625170707702637, + "logits/rejected": -3.1020374298095703, + "logps/chosen": -274.80816650390625, + "logps/rejected": -737.2002563476562, + "loss": 0.6511, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19486543536186218, + "rewards/margins": 1.039270043373108, + "rewards/rejected": -0.8444046378135681, + "step": 98 + }, + { + "epoch": 0.13, + "learning_rate": 9.761628814374073e-08, + "logits/chosen": -3.2034082412719727, + "logits/rejected": -3.099215030670166, + "logps/chosen": -288.63946533203125, + "logps/rejected": -525.8933715820312, + "loss": 0.5832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15676116943359375, + "rewards/margins": 0.7866165637969971, + "rewards/rejected": -0.6298553943634033, + "step": 99 + }, + { + "epoch": 0.13, + "learning_rate": 9.755282581475768e-08, + "logits/chosen": -3.205339193344116, + "logits/rejected": -3.1478774547576904, + "logps/chosen": -285.13848876953125, + "logps/rejected": -783.9542236328125, + "loss": 0.5314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08089447021484375, + "rewards/margins": 1.106144666671753, + "rewards/rejected": -1.0252501964569092, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 9.748855093918415e-08, + "logits/chosen": -3.1897921562194824, + "logits/rejected": -3.0608434677124023, + "logps/chosen": -231.93006896972656, + "logps/rejected": -845.4267578125, + "loss": 0.5707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16866454482078552, + "rewards/margins": 1.2161681652069092, + "rewards/rejected": -1.0475037097930908, + "step": 101 + }, + { + "epoch": 0.13, + "learning_rate": 9.742346461530047e-08, + "logits/chosen": -3.232633590698242, + "logits/rejected": -3.139197826385498, + "logps/chosen": -259.1247863769531, + "logps/rejected": -552.74853515625, + "loss": 0.6148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16763001680374146, + "rewards/margins": 0.892598032951355, + "rewards/rejected": -0.7249679565429688, + "step": 102 + }, + { + "epoch": 0.13, + "learning_rate": 9.73575679552523e-08, + "logits/chosen": -3.187584400177002, + "logits/rejected": -3.14288067817688, + "logps/chosen": -277.8973388671875, + "logps/rejected": -678.540283203125, + "loss": 0.5956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20747071504592896, + "rewards/margins": 1.3852355480194092, + "rewards/rejected": -1.177764892578125, + "step": 103 + }, + { + "epoch": 0.13, + "learning_rate": 9.729086208503173e-08, + "logits/chosen": -3.225581169128418, + "logits/rejected": -3.124305248260498, + "logps/chosen": -282.7128601074219, + "logps/rejected": -1420.5516357421875, + "loss": 0.5322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13647538423538208, + "rewards/margins": 2.0013740062713623, + "rewards/rejected": -1.864898681640625, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 9.722334814445807e-08, + "logits/chosen": -3.249488353729248, + "logits/rejected": -3.155796527862549, + "logps/chosen": -278.12939453125, + "logps/rejected": -656.8532104492188, + "loss": 0.6042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16782532632350922, + "rewards/margins": 1.2654144763946533, + "rewards/rejected": -1.097589135169983, + "step": 105 + }, + { + "epoch": 0.14, + "learning_rate": 9.715502728715826e-08, + "logits/chosen": -3.24147367477417, + "logits/rejected": -3.128276824951172, + "logps/chosen": -261.3128662109375, + "logps/rejected": -465.136962890625, + "loss": 0.5991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18871155381202698, + "rewards/margins": 0.8243271112442017, + "rewards/rejected": -0.6356155872344971, + "step": 106 + }, + { + "epoch": 0.14, + "learning_rate": 9.708590068054727e-08, + "logits/chosen": -3.1955466270446777, + "logits/rejected": -3.168020009994507, + "logps/chosen": -290.58441162109375, + "logps/rejected": -688.3718872070312, + "loss": 0.5265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2682662904262543, + "rewards/margins": 1.430912733078003, + "rewards/rejected": -1.1626465320587158, + "step": 107 + }, + { + "epoch": 0.14, + "learning_rate": 9.701596950580806e-08, + "logits/chosen": -3.2129597663879395, + "logits/rejected": -3.068620204925537, + "logps/chosen": -258.60101318359375, + "logps/rejected": -1063.4639892578125, + "loss": 0.594, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2610282897949219, + "rewards/margins": 1.6513419151306152, + "rewards/rejected": -1.390313744544983, + "step": 108 + }, + { + "epoch": 0.14, + "learning_rate": 9.694523495787148e-08, + "logits/chosen": -3.211796760559082, + "logits/rejected": -3.0631299018859863, + "logps/chosen": -247.74713134765625, + "logps/rejected": -356.0926208496094, + "loss": 0.6009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16823044419288635, + "rewards/margins": 0.6108619570732117, + "rewards/rejected": -0.4426315426826477, + "step": 109 + }, + { + "epoch": 0.14, + "learning_rate": 9.687369824539577e-08, + "logits/chosen": -3.1124353408813477, + "logits/rejected": -3.074751853942871, + "logps/chosen": -266.6031494140625, + "logps/rejected": -1149.2724609375, + "loss": 0.5124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22237014770507812, + "rewards/margins": 1.8717446327209473, + "rewards/rejected": -1.6493744850158691, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 9.680136059074597e-08, + "logits/chosen": -3.253190279006958, + "logits/rejected": -3.0429184436798096, + "logps/chosen": -267.9667663574219, + "logps/rejected": -494.64276123046875, + "loss": 0.5411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2519127130508423, + "rewards/margins": 0.8795021176338196, + "rewards/rejected": -0.6275894045829773, + "step": 111 + }, + { + "epoch": 0.14, + "learning_rate": 9.672822322997304e-08, + "logits/chosen": -3.196317434310913, + "logits/rejected": -3.1306419372558594, + "logps/chosen": -294.1663818359375, + "logps/rejected": -424.9119567871094, + "loss": 0.5646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2398529052734375, + "rewards/margins": 0.6258819699287415, + "rewards/rejected": -0.38602906465530396, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 9.665428741279266e-08, + "logits/chosen": -3.1672780513763428, + "logits/rejected": -3.10898756980896, + "logps/chosen": -272.286865234375, + "logps/rejected": -422.021484375, + "loss": 0.6073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29716187715530396, + "rewards/margins": 0.8059631586074829, + "rewards/rejected": -0.508801281452179, + "step": 113 + }, + { + "epoch": 0.15, + "learning_rate": 9.657955440256394e-08, + "logits/chosen": -3.092973232269287, + "logits/rejected": -3.13098406791687, + "logps/chosen": -308.89300537109375, + "logps/rejected": -479.5576171875, + "loss": 0.6176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26124268770217896, + "rewards/margins": 0.8999404907226562, + "rewards/rejected": -0.6386978626251221, + "step": 114 + }, + { + "epoch": 0.15, + "learning_rate": 9.650402547626786e-08, + "logits/chosen": -3.2419586181640625, + "logits/rejected": -3.1310627460479736, + "logps/chosen": -317.8136291503906, + "logps/rejected": -890.1861572265625, + "loss": 0.585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23588258028030396, + "rewards/margins": 1.3703949451446533, + "rewards/rejected": -1.1345124244689941, + "step": 115 + }, + { + "epoch": 0.15, + "learning_rate": 9.642770192448534e-08, + "logits/chosen": -3.266158103942871, + "logits/rejected": -3.1037349700927734, + "logps/chosen": -293.49896240234375, + "logps/rejected": -1216.82763671875, + "loss": 0.5981, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.151560977101326, + "rewards/margins": 1.8161239624023438, + "rewards/rejected": -1.6645630598068237, + "step": 116 + }, + { + "epoch": 0.15, + "learning_rate": 9.635058505137534e-08, + "logits/chosen": -3.249492645263672, + "logits/rejected": -3.079134464263916, + "logps/chosen": -262.58404541015625, + "logps/rejected": -592.40478515625, + "loss": 0.5561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14934463798999786, + "rewards/margins": 1.0092506408691406, + "rewards/rejected": -0.859906017780304, + "step": 117 + }, + { + "epoch": 0.15, + "learning_rate": 9.627267617465242e-08, + "logits/chosen": -3.213693618774414, + "logits/rejected": -3.0735855102539062, + "logps/chosen": -268.5896301269531, + "logps/rejected": -443.41778564453125, + "loss": 0.6606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22592315077781677, + "rewards/margins": 0.6774322986602783, + "rewards/rejected": -0.45150911808013916, + "step": 118 + }, + { + "epoch": 0.15, + "learning_rate": 9.619397662556434e-08, + "logits/chosen": -3.279954433441162, + "logits/rejected": -3.0756824016571045, + "logps/chosen": -245.59963989257812, + "logps/rejected": -797.6090087890625, + "loss": 0.4392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2844032347202301, + "rewards/margins": 1.3237502574920654, + "rewards/rejected": -1.0393470525741577, + "step": 119 + }, + { + "epoch": 0.15, + "learning_rate": 9.611448774886923e-08, + "logits/chosen": -3.178924083709717, + "logits/rejected": -3.1504597663879395, + "logps/chosen": -312.4584045410156, + "logps/rejected": -685.58251953125, + "loss": 0.5625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27057647705078125, + "rewards/margins": 1.632664442062378, + "rewards/rejected": -1.3620879650115967, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 9.603421090281269e-08, + "logits/chosen": -3.246631145477295, + "logits/rejected": -3.0019302368164062, + "logps/chosen": -222.1398162841797, + "logps/rejected": -1245.0810546875, + "loss": 0.5267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24563217163085938, + "rewards/margins": 1.8258293867111206, + "rewards/rejected": -1.5801972150802612, + "step": 121 + }, + { + "epoch": 0.16, + "learning_rate": 9.595314745910454e-08, + "logits/chosen": -3.248745918273926, + "logits/rejected": -3.199028968811035, + "logps/chosen": -303.1894836425781, + "logps/rejected": -784.0449829101562, + "loss": 0.5879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11307220160961151, + "rewards/margins": 1.2647597789764404, + "rewards/rejected": -1.1516876220703125, + "step": 122 + }, + { + "epoch": 0.16, + "learning_rate": 9.587129880289538e-08, + "logits/chosen": -3.2444088459014893, + "logits/rejected": -3.152775764465332, + "logps/chosen": -265.3672790527344, + "logps/rejected": -1206.6802978515625, + "loss": 0.5134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18402405083179474, + "rewards/margins": 2.289755344390869, + "rewards/rejected": -2.105731248855591, + "step": 123 + }, + { + "epoch": 0.16, + "learning_rate": 9.578866633275286e-08, + "logits/chosen": -3.240121364593506, + "logits/rejected": -3.070645809173584, + "logps/chosen": -256.24468994140625, + "logps/rejected": -402.74420166015625, + "loss": 0.6354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18097686767578125, + "rewards/margins": 0.5445693731307983, + "rewards/rejected": -0.3635925352573395, + "step": 124 + }, + { + "epoch": 0.16, + "learning_rate": 9.570525146063798e-08, + "logits/chosen": -3.2100870609283447, + "logits/rejected": -2.983729362487793, + "logps/chosen": -255.03759765625, + "logps/rejected": -623.83740234375, + "loss": 0.6143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1663559079170227, + "rewards/margins": 0.7890075445175171, + "rewards/rejected": -0.6226516962051392, + "step": 125 + }, + { + "epoch": 0.16, + "learning_rate": 9.562105561188067e-08, + "logits/chosen": -3.229557991027832, + "logits/rejected": -3.160883903503418, + "logps/chosen": -251.61459350585938, + "logps/rejected": -677.67822265625, + "loss": 0.5175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19573363661766052, + "rewards/margins": 1.3656508922576904, + "rewards/rejected": -1.169917345046997, + "step": 126 + }, + { + "epoch": 0.16, + "learning_rate": 9.553608022515576e-08, + "logits/chosen": -3.1917355060577393, + "logits/rejected": -3.058990478515625, + "logps/chosen": -284.66680908203125, + "logps/rejected": -530.2095336914062, + "loss": 0.5384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2681472897529602, + "rewards/margins": 0.9965347051620483, + "rewards/rejected": -0.7283874750137329, + "step": 127 + }, + { + "epoch": 0.16, + "learning_rate": 9.545032675245812e-08, + "logits/chosen": -3.2484378814697266, + "logits/rejected": -3.0409440994262695, + "logps/chosen": -267.27703857421875, + "logps/rejected": -760.8374633789062, + "loss": 0.5347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20673218369483948, + "rewards/margins": 1.2694534063339233, + "rewards/rejected": -1.0627212524414062, + "step": 128 + }, + { + "epoch": 0.16, + "learning_rate": 9.536379665907798e-08, + "logits/chosen": -3.2435507774353027, + "logits/rejected": -3.1459779739379883, + "logps/chosen": -279.9857177734375, + "logps/rejected": -554.2315673828125, + "loss": 0.5424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3401809632778168, + "rewards/margins": 1.5546982288360596, + "rewards/rejected": -1.2145172357559204, + "step": 129 + }, + { + "epoch": 0.17, + "learning_rate": 9.527649142357594e-08, + "logits/chosen": -3.1629245281219482, + "logits/rejected": -3.0240073204040527, + "logps/chosen": -292.37322998046875, + "logps/rejected": -879.453857421875, + "loss": 0.4933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19845733046531677, + "rewards/margins": 1.463252305984497, + "rewards/rejected": -1.264794945716858, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 9.518841253775753e-08, + "logits/chosen": -3.2498745918273926, + "logits/rejected": -3.081306219100952, + "logps/chosen": -250.9097137451172, + "logps/rejected": -772.6010131835938, + "loss": 0.5334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2575836181640625, + "rewards/margins": 1.5795929431915283, + "rewards/rejected": -1.3220093250274658, + "step": 131 + }, + { + "epoch": 0.17, + "learning_rate": 9.509956150664795e-08, + "logits/chosen": -3.2200329303741455, + "logits/rejected": -3.075040817260742, + "logps/chosen": -275.0686340332031, + "logps/rejected": -483.974609375, + "loss": 0.53, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2848869562149048, + "rewards/margins": 1.181664228439331, + "rewards/rejected": -0.896777331829071, + "step": 132 + }, + { + "epoch": 0.17, + "learning_rate": 9.500993984846612e-08, + "logits/chosen": -3.253690719604492, + "logits/rejected": -3.058516263961792, + "logps/chosen": -286.270263671875, + "logps/rejected": -480.7252197265625, + "loss": 0.5589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2626892030239105, + "rewards/margins": 1.1500306129455566, + "rewards/rejected": -0.887341320514679, + "step": 133 + }, + { + "epoch": 0.17, + "learning_rate": 9.491954909459894e-08, + "logits/chosen": -3.109799861907959, + "logits/rejected": -3.04663348197937, + "logps/chosen": -271.45562744140625, + "logps/rejected": -474.9178771972656, + "loss": 0.4957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4252372980117798, + "rewards/margins": 1.1373589038848877, + "rewards/rejected": -0.7121216058731079, + "step": 134 + }, + { + "epoch": 0.17, + "learning_rate": 9.482839078957499e-08, + "logits/chosen": -3.2534868717193604, + "logits/rejected": -3.1077980995178223, + "logps/chosen": -301.7548828125, + "logps/rejected": -720.43359375, + "loss": 0.4813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.244415283203125, + "rewards/margins": 1.6351349353790283, + "rewards/rejected": -1.3907196521759033, + "step": 135 + }, + { + "epoch": 0.17, + "learning_rate": 9.473646649103817e-08, + "logits/chosen": -3.2158236503601074, + "logits/rejected": -3.053718090057373, + "logps/chosen": -302.9681396484375, + "logps/rejected": -1092.42236328125, + "loss": 0.5108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.306600958108902, + "rewards/margins": 1.8672454357147217, + "rewards/rejected": -1.5606446266174316, + "step": 136 + }, + { + "epoch": 0.17, + "learning_rate": 9.464377776972114e-08, + "logits/chosen": -3.195523262023926, + "logits/rejected": -3.124316692352295, + "logps/chosen": -253.77932739257812, + "logps/rejected": -643.30859375, + "loss": 0.4826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21136780083179474, + "rewards/margins": 1.2860534191131592, + "rewards/rejected": -1.0746856927871704, + "step": 137 + }, + { + "epoch": 0.18, + "learning_rate": 9.455032620941839e-08, + "logits/chosen": -3.222108840942383, + "logits/rejected": -3.077343463897705, + "logps/chosen": -247.5036163330078, + "logps/rejected": -670.3700561523438, + "loss": 0.5148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22095641493797302, + "rewards/margins": 1.22845458984375, + "rewards/rejected": -1.0074981451034546, + "step": 138 + }, + { + "epoch": 0.18, + "learning_rate": 9.445611340695925e-08, + "logits/chosen": -3.230902910232544, + "logits/rejected": -3.1110482215881348, + "logps/chosen": -245.5793914794922, + "logps/rejected": -627.0997314453125, + "loss": 0.4997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23573532700538635, + "rewards/margins": 1.4144798517227173, + "rewards/rejected": -1.1787445545196533, + "step": 139 + }, + { + "epoch": 0.18, + "learning_rate": 9.436114097218058e-08, + "logits/chosen": -3.2241721153259277, + "logits/rejected": -3.0438928604125977, + "logps/chosen": -290.4257507324219, + "logps/rejected": -939.662353515625, + "loss": 0.563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24467775225639343, + "rewards/margins": 1.693734884262085, + "rewards/rejected": -1.4490571022033691, + "step": 140 + }, + { + "epoch": 0.18, + "learning_rate": 9.426541052789925e-08, + "logits/chosen": -3.248753547668457, + "logits/rejected": -3.130268096923828, + "logps/chosen": -280.1632080078125, + "logps/rejected": -503.7763671875, + "loss": 0.5356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20016632974147797, + "rewards/margins": 1.1681594848632812, + "rewards/rejected": -0.9679931998252869, + "step": 141 + }, + { + "epoch": 0.18, + "learning_rate": 9.416892370988443e-08, + "logits/chosen": -3.181546926498413, + "logits/rejected": -3.1146583557128906, + "logps/chosen": -267.7093505859375, + "logps/rejected": -384.83380126953125, + "loss": 0.512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2443641722202301, + "rewards/margins": 0.9093895554542542, + "rewards/rejected": -0.6650253534317017, + "step": 142 + }, + { + "epoch": 0.18, + "learning_rate": 9.40716821668296e-08, + "logits/chosen": -3.241330146789551, + "logits/rejected": -3.0811493396759033, + "logps/chosen": -274.85162353515625, + "logps/rejected": -391.7928161621094, + "loss": 0.5756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20169372856616974, + "rewards/margins": 0.7639992237091064, + "rewards/rejected": -0.5623054504394531, + "step": 143 + }, + { + "epoch": 0.18, + "learning_rate": 9.397368756032444e-08, + "logits/chosen": -3.267599105834961, + "logits/rejected": -3.1008493900299072, + "logps/chosen": -269.8920593261719, + "logps/rejected": -1719.7657470703125, + "loss": 0.4242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2795867919921875, + "rewards/margins": 3.1647067070007324, + "rewards/rejected": -2.885119915008545, + "step": 144 + }, + { + "epoch": 0.18, + "learning_rate": 9.387494156482642e-08, + "logits/chosen": -3.2497384548187256, + "logits/rejected": -3.062983989715576, + "logps/chosen": -265.49969482421875, + "logps/rejected": -588.4465942382812, + "loss": 0.4875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2198326140642166, + "rewards/margins": 1.323957085609436, + "rewards/rejected": -1.1041245460510254, + "step": 145 + }, + { + "epoch": 0.19, + "learning_rate": 9.377544586763214e-08, + "logits/chosen": -3.2346057891845703, + "logits/rejected": -3.1124706268310547, + "logps/chosen": -249.7569122314453, + "logps/rejected": -580.2872314453125, + "loss": 0.5441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.276712030172348, + "rewards/margins": 1.4203078746795654, + "rewards/rejected": -1.1435959339141846, + "step": 146 + }, + { + "epoch": 0.19, + "learning_rate": 9.367520216884854e-08, + "logits/chosen": -3.2128641605377197, + "logits/rejected": -3.1469998359680176, + "logps/chosen": -268.954833984375, + "logps/rejected": -779.4927978515625, + "loss": 0.5164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23169861733913422, + "rewards/margins": 1.5494521856307983, + "rewards/rejected": -1.317753553390503, + "step": 147 + }, + { + "epoch": 0.19, + "learning_rate": 9.357421218136385e-08, + "logits/chosen": -3.254892349243164, + "logits/rejected": -3.1013343334198, + "logps/chosen": -277.88055419921875, + "logps/rejected": -1758.9927978515625, + "loss": 0.4477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3600616455078125, + "rewards/margins": 3.629812717437744, + "rewards/rejected": -3.2697510719299316, + "step": 148 + }, + { + "epoch": 0.19, + "learning_rate": 9.347247763081834e-08, + "logits/chosen": -3.183506965637207, + "logits/rejected": -3.0385212898254395, + "logps/chosen": -283.99017333984375, + "logps/rejected": -567.9764404296875, + "loss": 0.5432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2215011715888977, + "rewards/margins": 1.210658311843872, + "rewards/rejected": -0.9891570806503296, + "step": 149 + }, + { + "epoch": 0.19, + "learning_rate": 9.337000025557476e-08, + "logits/chosen": -3.1977880001068115, + "logits/rejected": -3.121166229248047, + "logps/chosen": -264.046142578125, + "logps/rejected": -566.2144165039062, + "loss": 0.4656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34343260526657104, + "rewards/margins": 1.3246994018554688, + "rewards/rejected": -0.9812668561935425, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 9.32667818066887e-08, + "logits/chosen": -3.2719783782958984, + "logits/rejected": -3.075904369354248, + "logps/chosen": -246.17054748535156, + "logps/rejected": -335.04046630859375, + "loss": 0.562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3026107847690582, + "rewards/margins": 0.898735761642456, + "rewards/rejected": -0.5961250066757202, + "step": 151 + }, + { + "epoch": 0.19, + "learning_rate": 9.316282404787869e-08, + "logits/chosen": -3.186253786087036, + "logits/rejected": -3.097630500793457, + "logps/chosen": -276.0863037109375, + "logps/rejected": -843.6341552734375, + "loss": 0.419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3002563714981079, + "rewards/margins": 2.042044162750244, + "rewards/rejected": -1.7417877912521362, + "step": 152 + }, + { + "epoch": 0.2, + "learning_rate": 9.305812875549598e-08, + "logits/chosen": -3.2121293544769287, + "logits/rejected": -3.148219108581543, + "logps/chosen": -260.3309631347656, + "logps/rejected": -840.69921875, + "loss": 0.4628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29757386445999146, + "rewards/margins": 1.9100983142852783, + "rewards/rejected": -1.6125245094299316, + "step": 153 + }, + { + "epoch": 0.2, + "learning_rate": 9.295269771849425e-08, + "logits/chosen": -3.144493818283081, + "logits/rejected": -3.074275016784668, + "logps/chosen": -278.36004638671875, + "logps/rejected": -460.7501220703125, + "loss": 0.516, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37408447265625, + "rewards/margins": 1.1409485340118408, + "rewards/rejected": -0.766864001750946, + "step": 154 + }, + { + "epoch": 0.2, + "learning_rate": 9.284653273839905e-08, + "logits/chosen": -3.227804660797119, + "logits/rejected": -3.1724438667297363, + "logps/chosen": -264.53167724609375, + "logps/rejected": -788.062744140625, + "loss": 0.4373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28542253375053406, + "rewards/margins": 1.768653154373169, + "rewards/rejected": -1.4832305908203125, + "step": 155 + }, + { + "epoch": 0.2, + "learning_rate": 9.273963562927694e-08, + "logits/chosen": -3.1931581497192383, + "logits/rejected": -3.0417888164520264, + "logps/chosen": -276.38323974609375, + "logps/rejected": -946.396484375, + "loss": 0.4422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4333938658237457, + "rewards/margins": 2.1005373001098633, + "rewards/rejected": -1.6671433448791504, + "step": 156 + }, + { + "epoch": 0.2, + "learning_rate": 9.26320082177046e-08, + "logits/chosen": -3.2060441970825195, + "logits/rejected": -3.0414462089538574, + "logps/chosen": -266.4167785644531, + "logps/rejected": -578.912353515625, + "loss": 0.5215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3412727415561676, + "rewards/margins": 1.577024221420288, + "rewards/rejected": -1.2357513904571533, + "step": 157 + }, + { + "epoch": 0.2, + "learning_rate": 9.252365234273753e-08, + "logits/chosen": -3.2144687175750732, + "logits/rejected": -3.0201101303100586, + "logps/chosen": -334.81341552734375, + "logps/rejected": -616.509033203125, + "loss": 0.5391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07585296779870987, + "rewards/margins": 1.1614608764648438, + "rewards/rejected": -1.085607886314392, + "step": 158 + }, + { + "epoch": 0.2, + "learning_rate": 9.241456985587868e-08, + "logits/chosen": -3.269808053970337, + "logits/rejected": -3.0607588291168213, + "logps/chosen": -256.9342956542969, + "logps/rejected": -470.6138610839844, + "loss": 0.5017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2941436767578125, + "rewards/margins": 1.1117491722106934, + "rewards/rejected": -0.8176056146621704, + "step": 159 + }, + { + "epoch": 0.2, + "learning_rate": 9.230476262104676e-08, + "logits/chosen": -3.15559983253479, + "logits/rejected": -3.0462658405303955, + "logps/chosen": -299.77667236328125, + "logps/rejected": -674.4666748046875, + "loss": 0.5743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36475372314453125, + "rewards/margins": 1.4643478393554688, + "rewards/rejected": -1.0995941162109375, + "step": 160 + }, + { + "epoch": 0.21, + "learning_rate": 9.219423251454446e-08, + "logits/chosen": -3.17065167427063, + "logits/rejected": -3.1350765228271484, + "logps/chosen": -286.233642578125, + "logps/rejected": -519.5081787109375, + "loss": 0.4553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36709290742874146, + "rewards/margins": 1.4920287132263184, + "rewards/rejected": -1.1249358654022217, + "step": 161 + }, + { + "epoch": 0.21, + "learning_rate": 9.208298142502635e-08, + "logits/chosen": -3.2574820518493652, + "logits/rejected": -3.122138023376465, + "logps/chosen": -268.22021484375, + "logps/rejected": -507.57147216796875, + "loss": 0.461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4071304202079773, + "rewards/margins": 1.4297516345977783, + "rewards/rejected": -1.0226211547851562, + "step": 162 + }, + { + "epoch": 0.21, + "learning_rate": 9.197101125346657e-08, + "logits/chosen": -3.208670139312744, + "logits/rejected": -3.0601115226745605, + "logps/chosen": -266.68829345703125, + "logps/rejected": -396.4921875, + "loss": 0.5032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3175643980503082, + "rewards/margins": 1.179656982421875, + "rewards/rejected": -0.8620926141738892, + "step": 163 + }, + { + "epoch": 0.21, + "learning_rate": 9.185832391312642e-08, + "logits/chosen": -3.2405972480773926, + "logits/rejected": -3.1467318534851074, + "logps/chosen": -222.26206970214844, + "logps/rejected": -346.9363708496094, + "loss": 0.5111, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26559221744537354, + "rewards/margins": 0.9636039733886719, + "rewards/rejected": -0.6980118155479431, + "step": 164 + }, + { + "epoch": 0.21, + "learning_rate": 9.174492132952165e-08, + "logits/chosen": -3.261976957321167, + "logits/rejected": -3.1106576919555664, + "logps/chosen": -274.165283203125, + "logps/rejected": -265.56915283203125, + "loss": 0.5399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3825027346611023, + "rewards/margins": 0.8911719918251038, + "rewards/rejected": -0.5086692571640015, + "step": 165 + }, + { + "epoch": 0.21, + "learning_rate": 9.163080544038952e-08, + "logits/chosen": -3.225123405456543, + "logits/rejected": -3.083148956298828, + "logps/chosen": -259.9715576171875, + "logps/rejected": -738.6724853515625, + "loss": 0.5017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41467440128326416, + "rewards/margins": 1.9102065563201904, + "rewards/rejected": -1.4955322742462158, + "step": 166 + }, + { + "epoch": 0.21, + "learning_rate": 9.15159781956557e-08, + "logits/chosen": -3.2103118896484375, + "logits/rejected": -3.104519844055176, + "logps/chosen": -287.00860595703125, + "logps/rejected": -492.1153564453125, + "loss": 0.4724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32432252168655396, + "rewards/margins": 1.5203049182891846, + "rewards/rejected": -1.1959824562072754, + "step": 167 + }, + { + "epoch": 0.21, + "learning_rate": 9.1400441557401e-08, + "logits/chosen": -3.235203981399536, + "logits/rejected": -3.096116065979004, + "logps/chosen": -260.292236328125, + "logps/rejected": -3354.768310546875, + "loss": 0.4864, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.309152215719223, + "rewards/margins": 4.3759446144104, + "rewards/rejected": -4.0667924880981445, + "step": 168 + }, + { + "epoch": 0.22, + "learning_rate": 9.128419749982779e-08, + "logits/chosen": -3.1514816284179688, + "logits/rejected": -3.047553539276123, + "logps/chosen": -263.6236572265625, + "logps/rejected": -629.51904296875, + "loss": 0.4293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.429983526468277, + "rewards/margins": 1.56829833984375, + "rewards/rejected": -1.1383148431777954, + "step": 169 + }, + { + "epoch": 0.22, + "learning_rate": 9.116724800922628e-08, + "logits/chosen": -3.1737589836120605, + "logits/rejected": -3.045121669769287, + "logps/chosen": -298.0084228515625, + "logps/rejected": -516.9315185546875, + "loss": 0.4881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2707977294921875, + "rewards/margins": 1.3989791870117188, + "rewards/rejected": -1.1281814575195312, + "step": 170 + }, + { + "epoch": 0.22, + "learning_rate": 9.10495950839406e-08, + "logits/chosen": -3.2455344200134277, + "logits/rejected": -3.086900234222412, + "logps/chosen": -226.0857696533203, + "logps/rejected": -903.7576293945312, + "loss": 0.4903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34157103300094604, + "rewards/margins": 2.1123809814453125, + "rewards/rejected": -1.7708100080490112, + "step": 171 + }, + { + "epoch": 0.22, + "learning_rate": 9.093124073433462e-08, + "logits/chosen": -3.306060552597046, + "logits/rejected": -3.071288824081421, + "logps/chosen": -269.2454833984375, + "logps/rejected": -1025.1622314453125, + "loss": 0.5152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24457016587257385, + "rewards/margins": 2.3119499683380127, + "rewards/rejected": -2.0673797130584717, + "step": 172 + }, + { + "epoch": 0.22, + "learning_rate": 9.081218698275762e-08, + "logits/chosen": -3.2598514556884766, + "logits/rejected": -3.177288770675659, + "logps/chosen": -273.6548156738281, + "logps/rejected": -791.3944091796875, + "loss": 0.4409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40378111600875854, + "rewards/margins": 2.182248115539551, + "rewards/rejected": -1.778466820716858, + "step": 173 + }, + { + "epoch": 0.22, + "learning_rate": 9.069243586350974e-08, + "logits/chosen": -3.140399217605591, + "logits/rejected": -3.0678253173828125, + "logps/chosen": -289.8533935546875, + "logps/rejected": -835.7498779296875, + "loss": 0.5228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20758438110351562, + "rewards/margins": 1.487699031829834, + "rewards/rejected": -1.280114769935608, + "step": 174 + }, + { + "epoch": 0.22, + "learning_rate": 9.057198942280721e-08, + "logits/chosen": -3.2245612144470215, + "logits/rejected": -3.146380662918091, + "logps/chosen": -290.834228515625, + "logps/rejected": -582.288818359375, + "loss": 0.4601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3004959225654602, + "rewards/margins": 1.7849228382110596, + "rewards/rejected": -1.4844269752502441, + "step": 175 + }, + { + "epoch": 0.22, + "learning_rate": 9.045084971874737e-08, + "logits/chosen": -3.205282688140869, + "logits/rejected": -3.147152900695801, + "logps/chosen": -260.7502746582031, + "logps/rejected": -635.558349609375, + "loss": 0.5438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2940788269042969, + "rewards/margins": 1.6630165576934814, + "rewards/rejected": -1.3689377307891846, + "step": 176 + }, + { + "epoch": 0.23, + "learning_rate": 9.032901882127352e-08, + "logits/chosen": -3.2116036415100098, + "logits/rejected": -2.939517021179199, + "logps/chosen": -241.55587768554688, + "logps/rejected": -412.3927001953125, + "loss": 0.4688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4386787414550781, + "rewards/margins": 1.153577446937561, + "rewards/rejected": -0.7148987054824829, + "step": 177 + }, + { + "epoch": 0.23, + "learning_rate": 9.020649881213957e-08, + "logits/chosen": -3.235788345336914, + "logits/rejected": -3.1002590656280518, + "logps/chosen": -273.9399108886719, + "logps/rejected": -1034.345458984375, + "loss": 0.4238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26855164766311646, + "rewards/margins": 2.267709255218506, + "rewards/rejected": -1.9991576671600342, + "step": 178 + }, + { + "epoch": 0.23, + "learning_rate": 9.008329178487441e-08, + "logits/chosen": -3.2471888065338135, + "logits/rejected": -3.1538608074188232, + "logps/chosen": -283.16143798828125, + "logps/rejected": -732.34814453125, + "loss": 0.4996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2505683898925781, + "rewards/margins": 2.270646095275879, + "rewards/rejected": -2.020077705383301, + "step": 179 + }, + { + "epoch": 0.23, + "learning_rate": 8.995939984474623e-08, + "logits/chosen": -3.201460838317871, + "logits/rejected": -3.032376289367676, + "logps/chosen": -296.79534912109375, + "logps/rejected": -1296.814697265625, + "loss": 0.4259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26985475420951843, + "rewards/margins": 2.6705384254455566, + "rewards/rejected": -2.400683641433716, + "step": 180 + }, + { + "epoch": 0.23, + "learning_rate": 8.983482510872644e-08, + "logits/chosen": -3.220674991607666, + "logits/rejected": -3.13254451751709, + "logps/chosen": -275.7559509277344, + "logps/rejected": -698.0775146484375, + "loss": 0.4184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2965072691440582, + "rewards/margins": 1.72185218334198, + "rewards/rejected": -1.4253449440002441, + "step": 181 + }, + { + "epoch": 0.23, + "learning_rate": 8.970956970545355e-08, + "logits/chosen": -3.2560224533081055, + "logits/rejected": -3.0941858291625977, + "logps/chosen": -264.4842834472656, + "logps/rejected": -575.678955078125, + "loss": 0.4977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37636566162109375, + "rewards/margins": 1.524204969406128, + "rewards/rejected": -1.1478393077850342, + "step": 182 + }, + { + "epoch": 0.23, + "learning_rate": 8.958363577519683e-08, + "logits/chosen": -3.1894447803497314, + "logits/rejected": -3.1149868965148926, + "logps/chosen": -268.1027526855469, + "logps/rejected": -451.1796875, + "loss": 0.493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32774126529693604, + "rewards/margins": 1.3023414611816406, + "rewards/rejected": -0.9746002554893494, + "step": 183 + }, + { + "epoch": 0.23, + "learning_rate": 8.945702546981968e-08, + "logits/chosen": -3.217100143432617, + "logits/rejected": -3.067873477935791, + "logps/chosen": -300.1533203125, + "logps/rejected": -668.2071533203125, + "loss": 0.4804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39901429414749146, + "rewards/margins": 1.5416091680526733, + "rewards/rejected": -1.1425949335098267, + "step": 184 + }, + { + "epoch": 0.24, + "learning_rate": 8.932974095274289e-08, + "logits/chosen": -3.176520824432373, + "logits/rejected": -3.187788963317871, + "logps/chosen": -305.5220031738281, + "logps/rejected": -488.2687683105469, + "loss": 0.4959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3745620846748352, + "rewards/margins": 1.5302170515060425, + "rewards/rejected": -1.1556549072265625, + "step": 185 + }, + { + "epoch": 0.24, + "learning_rate": 8.920178439890764e-08, + "logits/chosen": -3.2271547317504883, + "logits/rejected": -3.1234147548675537, + "logps/chosen": -258.425048828125, + "logps/rejected": -434.0025939941406, + "loss": 0.4506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44920504093170166, + "rewards/margins": 1.2140289545059204, + "rewards/rejected": -0.7648239731788635, + "step": 186 + }, + { + "epoch": 0.24, + "learning_rate": 8.907315799473844e-08, + "logits/chosen": -3.2730541229248047, + "logits/rejected": -3.0687122344970703, + "logps/chosen": -287.92633056640625, + "logps/rejected": -401.3916015625, + "loss": 0.4997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34727784991264343, + "rewards/margins": 0.9673828482627869, + "rewards/rejected": -0.6201050281524658, + "step": 187 + }, + { + "epoch": 0.24, + "learning_rate": 8.894386393810562e-08, + "logits/chosen": -3.251582622528076, + "logits/rejected": -3.0113325119018555, + "logps/chosen": -232.48965454101562, + "logps/rejected": -627.26611328125, + "loss": 0.447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46285170316696167, + "rewards/margins": 1.7132759094238281, + "rewards/rejected": -1.2504241466522217, + "step": 188 + }, + { + "epoch": 0.24, + "learning_rate": 8.881390443828787e-08, + "logits/chosen": -3.2655391693115234, + "logits/rejected": -3.1651298999786377, + "logps/chosen": -240.70150756835938, + "logps/rejected": -783.6235961914062, + "loss": 0.4243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.381552129983902, + "rewards/margins": 2.6372923851013184, + "rewards/rejected": -2.2557404041290283, + "step": 189 + }, + { + "epoch": 0.24, + "learning_rate": 8.868328171593447e-08, + "logits/chosen": -3.3162975311279297, + "logits/rejected": -3.2113523483276367, + "logps/chosen": -250.2537384033203, + "logps/rejected": -562.4055786132812, + "loss": 0.4175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3461868166923523, + "rewards/margins": 1.8464523553848267, + "rewards/rejected": -1.5002655982971191, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 8.855199800302735e-08, + "logits/chosen": -3.1891703605651855, + "logits/rejected": -3.0797247886657715, + "logps/chosen": -251.97850036621094, + "logps/rejected": -507.8576354980469, + "loss": 0.4545, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31134873628616333, + "rewards/margins": 1.4388481378555298, + "rewards/rejected": -1.1274994611740112, + "step": 191 + }, + { + "epoch": 0.24, + "learning_rate": 8.842005554284295e-08, + "logits/chosen": -3.1666154861450195, + "logits/rejected": -3.073214054107666, + "logps/chosen": -250.25189208984375, + "logps/rejected": -540.1085205078125, + "loss": 0.4501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.359527587890625, + "rewards/margins": 1.459930419921875, + "rewards/rejected": -1.10040283203125, + "step": 192 + }, + { + "epoch": 0.25, + "learning_rate": 8.828745658991386e-08, + "logits/chosen": -3.244716167449951, + "logits/rejected": -3.1671600341796875, + "logps/chosen": -266.5964660644531, + "logps/rejected": -672.74267578125, + "loss": 0.3961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4358307123184204, + "rewards/margins": 2.199383497238159, + "rewards/rejected": -1.7635529041290283, + "step": 193 + }, + { + "epoch": 0.25, + "learning_rate": 8.815420340999033e-08, + "logits/chosen": -3.1737849712371826, + "logits/rejected": -3.0225114822387695, + "logps/chosen": -270.1991882324219, + "logps/rejected": -722.2974243164062, + "loss": 0.4286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33156126737594604, + "rewards/margins": 2.0399413108825684, + "rewards/rejected": -1.708380103111267, + "step": 194 + }, + { + "epoch": 0.25, + "learning_rate": 8.802029828000155e-08, + "logits/chosen": -3.1871538162231445, + "logits/rejected": -3.0733680725097656, + "logps/chosen": -283.7910461425781, + "logps/rejected": -1445.568359375, + "loss": 0.3791, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43170928955078125, + "rewards/margins": 3.133815050125122, + "rewards/rejected": -2.702105760574341, + "step": 195 + }, + { + "epoch": 0.25, + "learning_rate": 8.788574348801674e-08, + "logits/chosen": -3.1117658615112305, + "logits/rejected": -3.075467109680176, + "logps/chosen": -262.2356872558594, + "logps/rejected": -634.2557983398438, + "loss": 0.385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3848114013671875, + "rewards/margins": 1.8572189807891846, + "rewards/rejected": -1.472407579421997, + "step": 196 + }, + { + "epoch": 0.25, + "learning_rate": 8.775054133320602e-08, + "logits/chosen": -3.2402353286743164, + "logits/rejected": -3.119497299194336, + "logps/chosen": -270.65435791015625, + "logps/rejected": -762.36669921875, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46697157621383667, + "rewards/margins": 2.4525458812713623, + "rewards/rejected": -1.9855743646621704, + "step": 197 + }, + { + "epoch": 0.25, + "learning_rate": 8.761469412580124e-08, + "logits/chosen": -3.213440418243408, + "logits/rejected": -3.0395431518554688, + "logps/chosen": -257.3152770996094, + "logps/rejected": -289.0408935546875, + "loss": 0.4475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43348923325538635, + "rewards/margins": 0.979637861251831, + "rewards/rejected": -0.5461486577987671, + "step": 198 + }, + { + "epoch": 0.25, + "learning_rate": 8.74782041870563e-08, + "logits/chosen": -3.2383265495300293, + "logits/rejected": -3.079974889755249, + "logps/chosen": -272.5057373046875, + "logps/rejected": -558.467041015625, + "loss": 0.4554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43718570470809937, + "rewards/margins": 2.0728697776794434, + "rewards/rejected": -1.6356842517852783, + "step": 199 + }, + { + "epoch": 0.25, + "learning_rate": 8.734107384920769e-08, + "logits/chosen": -3.2364003658294678, + "logits/rejected": -3.0902514457702637, + "logps/chosen": -236.01736450195312, + "logps/rejected": -1753.173583984375, + "loss": 0.4273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41504210233688354, + "rewards/margins": 4.386319160461426, + "rewards/rejected": -3.9712769985198975, + "step": 200 + }, + { + "epoch": 0.26, + "learning_rate": 8.720330545543453e-08, + "logits/chosen": -3.1663050651550293, + "logits/rejected": -3.041562557220459, + "logps/chosen": -284.65557861328125, + "logps/rejected": -516.01904296875, + "loss": 0.4726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3750366270542145, + "rewards/margins": 1.4880051612854004, + "rewards/rejected": -1.1129684448242188, + "step": 201 + }, + { + "epoch": 0.26, + "learning_rate": 8.706490135981854e-08, + "logits/chosen": -3.217196464538574, + "logits/rejected": -3.095245838165283, + "logps/chosen": -250.80685424804688, + "logps/rejected": -727.6781005859375, + "loss": 0.4665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3089554011821747, + "rewards/margins": 2.021226406097412, + "rewards/rejected": -1.7122712135314941, + "step": 202 + }, + { + "epoch": 0.26, + "learning_rate": 8.692586392730385e-08, + "logits/chosen": -3.212827205657959, + "logits/rejected": -3.093693733215332, + "logps/chosen": -263.1221923828125, + "logps/rejected": -1032.5748291015625, + "loss": 0.4335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43004608154296875, + "rewards/margins": 2.685458183288574, + "rewards/rejected": -2.2554123401641846, + "step": 203 + }, + { + "epoch": 0.26, + "learning_rate": 8.678619553365658e-08, + "logits/chosen": -3.2391369342803955, + "logits/rejected": -3.1852688789367676, + "logps/chosen": -291.59503173828125, + "logps/rejected": -726.26513671875, + "loss": 0.4403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4695281982421875, + "rewards/margins": 2.5107269287109375, + "rewards/rejected": -2.04119873046875, + "step": 204 + }, + { + "epoch": 0.26, + "learning_rate": 8.664589856542419e-08, + "logits/chosen": -3.2188267707824707, + "logits/rejected": -3.123276948928833, + "logps/chosen": -267.6202392578125, + "logps/rejected": -953.811279296875, + "loss": 0.4058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46234130859375, + "rewards/margins": 2.5878052711486816, + "rewards/rejected": -2.1254639625549316, + "step": 205 + }, + { + "epoch": 0.26, + "learning_rate": 8.650497541989481e-08, + "logits/chosen": -3.1799983978271484, + "logits/rejected": -3.1198010444641113, + "logps/chosen": -271.68463134765625, + "logps/rejected": -551.0331420898438, + "loss": 0.4564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3651779294013977, + "rewards/margins": 1.7871216535568237, + "rewards/rejected": -1.4219436645507812, + "step": 206 + }, + { + "epoch": 0.26, + "learning_rate": 8.636342850505615e-08, + "logits/chosen": -3.1994800567626953, + "logits/rejected": -3.113060474395752, + "logps/chosen": -286.9228515625, + "logps/rejected": -432.50421142578125, + "loss": 0.4344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5472946166992188, + "rewards/margins": 1.5160691738128662, + "rewards/rejected": -0.9687744379043579, + "step": 207 + }, + { + "epoch": 0.27, + "learning_rate": 8.622126023955445e-08, + "logits/chosen": -3.2195074558258057, + "logits/rejected": -3.070190906524658, + "logps/chosen": -251.24679565429688, + "logps/rejected": -1213.88671875, + "loss": 0.4229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3506935238838196, + "rewards/margins": 3.4456095695495605, + "rewards/rejected": -3.0949158668518066, + "step": 208 + }, + { + "epoch": 0.27, + "learning_rate": 8.60784730526531e-08, + "logits/chosen": -3.1645803451538086, + "logits/rejected": -3.0542211532592773, + "logps/chosen": -280.79248046875, + "logps/rejected": -1239.57470703125, + "loss": 0.4434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5068451166152954, + "rewards/margins": 2.941030979156494, + "rewards/rejected": -2.4341859817504883, + "step": 209 + }, + { + "epoch": 0.27, + "learning_rate": 8.593506938419119e-08, + "logits/chosen": -3.236959457397461, + "logits/rejected": -3.088212013244629, + "logps/chosen": -299.781005859375, + "logps/rejected": -1074.75048828125, + "loss": 0.4631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4485672116279602, + "rewards/margins": 2.7564163208007812, + "rewards/rejected": -2.307849168777466, + "step": 210 + }, + { + "epoch": 0.27, + "learning_rate": 8.579105168454172e-08, + "logits/chosen": -3.1503610610961914, + "logits/rejected": -3.062401294708252, + "logps/chosen": -286.3128662109375, + "logps/rejected": -638.28466796875, + "loss": 0.4491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.440896600484848, + "rewards/margins": 1.890367031097412, + "rewards/rejected": -1.4494705200195312, + "step": 211 + }, + { + "epoch": 0.27, + "learning_rate": 8.564642241456985e-08, + "logits/chosen": -3.2222392559051514, + "logits/rejected": -3.1613030433654785, + "logps/chosen": -249.0835418701172, + "logps/rejected": -722.8142700195312, + "loss": 0.4341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4626304805278778, + "rewards/margins": 2.5527031421661377, + "rewards/rejected": -2.0900726318359375, + "step": 212 + }, + { + "epoch": 0.27, + "learning_rate": 8.550118404559074e-08, + "logits/chosen": -3.2989959716796875, + "logits/rejected": -3.0971078872680664, + "logps/chosen": -270.9194641113281, + "logps/rejected": -514.4583740234375, + "loss": 0.4264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3833664059638977, + "rewards/margins": 1.788600206375122, + "rewards/rejected": -1.4052337408065796, + "step": 213 + }, + { + "epoch": 0.27, + "learning_rate": 8.535533905932736e-08, + "logits/chosen": -3.1801748275756836, + "logits/rejected": -3.0921761989593506, + "logps/chosen": -285.05413818359375, + "logps/rejected": -1264.244140625, + "loss": 0.3859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41749268770217896, + "rewards/margins": 3.3511719703674316, + "rewards/rejected": -2.9336791038513184, + "step": 214 + }, + { + "epoch": 0.27, + "learning_rate": 8.52088899478682e-08, + "logits/chosen": -3.2841594219207764, + "logits/rejected": -3.156682014465332, + "logps/chosen": -288.9742736816406, + "logps/rejected": -389.3135986328125, + "loss": 0.4505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37594300508499146, + "rewards/margins": 1.468637228012085, + "rewards/rejected": -1.0926941633224487, + "step": 215 + }, + { + "epoch": 0.28, + "learning_rate": 8.506183921362442e-08, + "logits/chosen": -3.2098922729492188, + "logits/rejected": -3.1375999450683594, + "logps/chosen": -261.0645446777344, + "logps/rejected": -643.0162353515625, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3807945251464844, + "rewards/margins": 2.284291982650757, + "rewards/rejected": -1.9034974575042725, + "step": 216 + }, + { + "epoch": 0.28, + "learning_rate": 8.491418936928741e-08, + "logits/chosen": -3.152174949645996, + "logits/rejected": -3.0282115936279297, + "logps/chosen": -273.86859130859375, + "logps/rejected": -1079.341064453125, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4598068594932556, + "rewards/margins": 2.9791367053985596, + "rewards/rejected": -2.519329786300659, + "step": 217 + }, + { + "epoch": 0.28, + "learning_rate": 8.47659429377856e-08, + "logits/chosen": -3.1262006759643555, + "logits/rejected": -3.0685906410217285, + "logps/chosen": -241.6959228515625, + "logps/rejected": -1047.167724609375, + "loss": 0.3667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4353576898574829, + "rewards/margins": 3.2536346912384033, + "rewards/rejected": -2.818276882171631, + "step": 218 + }, + { + "epoch": 0.28, + "learning_rate": 8.461710245224147e-08, + "logits/chosen": -3.2663803100585938, + "logits/rejected": -3.1009159088134766, + "logps/chosen": -248.75216674804688, + "logps/rejected": -618.4751586914062, + "loss": 0.4047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45787736773490906, + "rewards/margins": 2.2409539222717285, + "rewards/rejected": -1.783076524734497, + "step": 219 + }, + { + "epoch": 0.28, + "learning_rate": 8.446767045592829e-08, + "logits/chosen": -3.2158169746398926, + "logits/rejected": -3.178985118865967, + "logps/chosen": -253.3416748046875, + "logps/rejected": -589.3444213867188, + "loss": 0.3894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4993324279785156, + "rewards/margins": 2.1800456047058105, + "rewards/rejected": -1.6807129383087158, + "step": 220 + }, + { + "epoch": 0.28, + "learning_rate": 8.431764950222655e-08, + "logits/chosen": -3.243056058883667, + "logits/rejected": -2.9614901542663574, + "logps/chosen": -290.27301025390625, + "logps/rejected": -955.3764038085938, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44229888916015625, + "rewards/margins": 2.7643327713012695, + "rewards/rejected": -2.3220338821411133, + "step": 221 + }, + { + "epoch": 0.28, + "learning_rate": 8.416704215458041e-08, + "logits/chosen": -3.2664713859558105, + "logits/rejected": -3.114051342010498, + "logps/chosen": -276.4022216796875, + "logps/rejected": -632.9278564453125, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41187822818756104, + "rewards/margins": 2.0347023010253906, + "rewards/rejected": -1.6228241920471191, + "step": 222 + }, + { + "epoch": 0.28, + "learning_rate": 8.401585098645395e-08, + "logits/chosen": -3.1889567375183105, + "logits/rejected": -2.9790892601013184, + "logps/chosen": -264.1165771484375, + "logps/rejected": -1413.15869140625, + "loss": 0.4171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5154106616973877, + "rewards/margins": 3.547112464904785, + "rewards/rejected": -3.0317018032073975, + "step": 223 + }, + { + "epoch": 0.29, + "learning_rate": 8.386407858128706e-08, + "logits/chosen": -3.225870370864868, + "logits/rejected": -3.139531135559082, + "logps/chosen": -236.1001739501953, + "logps/rejected": -782.833251953125, + "loss": 0.4106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34958115220069885, + "rewards/margins": 2.447462320327759, + "rewards/rejected": -2.097881317138672, + "step": 224 + }, + { + "epoch": 0.29, + "learning_rate": 8.371172753245137e-08, + "logits/chosen": -3.228869915008545, + "logits/rejected": -2.9268431663513184, + "logps/chosen": -234.92633056640625, + "logps/rejected": -302.86846923828125, + "loss": 0.4339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5125175714492798, + "rewards/margins": 1.1622428894042969, + "rewards/rejected": -0.6497253179550171, + "step": 225 + }, + { + "epoch": 0.29, + "learning_rate": 8.355880044320598e-08, + "logits/chosen": -3.226562261581421, + "logits/rejected": -3.139944076538086, + "logps/chosen": -240.04660034179688, + "logps/rejected": -389.6215515136719, + "loss": 0.3751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3449699282646179, + "rewards/margins": 1.3948631286621094, + "rewards/rejected": -1.0498931407928467, + "step": 226 + }, + { + "epoch": 0.29, + "learning_rate": 8.340529992665288e-08, + "logits/chosen": -3.1921496391296387, + "logits/rejected": -2.987018585205078, + "logps/chosen": -276.63690185546875, + "logps/rejected": -1116.6636962890625, + "loss": 0.4492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3388107419013977, + "rewards/margins": 2.6831085681915283, + "rewards/rejected": -2.3442978858947754, + "step": 227 + }, + { + "epoch": 0.29, + "learning_rate": 8.32512286056924e-08, + "logits/chosen": -3.2544548511505127, + "logits/rejected": -3.086526393890381, + "logps/chosen": -236.00869750976562, + "logps/rejected": -1172.2431640625, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4150596857070923, + "rewards/margins": 2.9807076454162598, + "rewards/rejected": -2.565648078918457, + "step": 228 + }, + { + "epoch": 0.29, + "learning_rate": 8.309658911297832e-08, + "logits/chosen": -3.2350025177001953, + "logits/rejected": -3.068281412124634, + "logps/chosen": -238.57827758789062, + "logps/rejected": -269.26593017578125, + "loss": 0.4771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.520977795124054, + "rewards/margins": 1.1165939569473267, + "rewards/rejected": -0.5956161618232727, + "step": 229 + }, + { + "epoch": 0.29, + "learning_rate": 8.294138409087289e-08, + "logits/chosen": -3.1615474224090576, + "logits/rejected": -3.105302333831787, + "logps/chosen": -301.74688720703125, + "logps/rejected": -477.19976806640625, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43353271484375, + "rewards/margins": 1.5629913806915283, + "rewards/rejected": -1.1294586658477783, + "step": 230 + }, + { + "epoch": 0.29, + "learning_rate": 8.278561619140171e-08, + "logits/chosen": -3.200448513031006, + "logits/rejected": -3.155416488647461, + "logps/chosen": -300.092041015625, + "logps/rejected": -482.43896484375, + "loss": 0.4253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4299728572368622, + "rewards/margins": 1.993682861328125, + "rewards/rejected": -1.56371009349823, + "step": 231 + }, + { + "epoch": 0.3, + "learning_rate": 8.262928807620843e-08, + "logits/chosen": -3.263545036315918, + "logits/rejected": -3.1364526748657227, + "logps/chosen": -242.46231079101562, + "logps/rejected": -522.7157592773438, + "loss": 0.4057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4356193542480469, + "rewards/margins": 1.6452003717422485, + "rewards/rejected": -1.2095810174942017, + "step": 232 + }, + { + "epoch": 0.3, + "learning_rate": 8.247240241650917e-08, + "logits/chosen": -3.1748578548431396, + "logits/rejected": -2.9965338706970215, + "logps/chosen": -264.83917236328125, + "logps/rejected": -587.790771484375, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4969291687011719, + "rewards/margins": 1.6661689281463623, + "rewards/rejected": -1.1692397594451904, + "step": 233 + }, + { + "epoch": 0.3, + "learning_rate": 8.231496189304703e-08, + "logits/chosen": -3.2112247943878174, + "logits/rejected": -3.1143202781677246, + "logps/chosen": -281.2066345214844, + "logps/rejected": -917.684326171875, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4497924745082855, + "rewards/margins": 2.9550933837890625, + "rewards/rejected": -2.505300998687744, + "step": 234 + }, + { + "epoch": 0.3, + "learning_rate": 8.215696919604617e-08, + "logits/chosen": -3.2292237281799316, + "logits/rejected": -3.1017048358917236, + "logps/chosen": -260.67156982421875, + "logps/rejected": -895.0767211914062, + "loss": 0.3891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3821975588798523, + "rewards/margins": 2.5798964500427246, + "rewards/rejected": -2.1976990699768066, + "step": 235 + }, + { + "epoch": 0.3, + "learning_rate": 8.199842702516583e-08, + "logits/chosen": -3.2289226055145264, + "logits/rejected": -3.239138603210449, + "logps/chosen": -267.64849853515625, + "logps/rejected": -3648.28271484375, + "loss": 0.3935, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.374542236328125, + "rewards/margins": 6.801305770874023, + "rewards/rejected": -6.426764011383057, + "step": 236 + }, + { + "epoch": 0.3, + "learning_rate": 8.18393380894543e-08, + "logits/chosen": -3.2413153648376465, + "logits/rejected": -3.0680313110351562, + "logps/chosen": -310.81561279296875, + "logps/rejected": -513.294921875, + "loss": 0.4663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5198928713798523, + "rewards/margins": 2.100545644760132, + "rewards/rejected": -1.5806527137756348, + "step": 237 + }, + { + "epoch": 0.3, + "learning_rate": 8.167970510730252e-08, + "logits/chosen": -3.149392604827881, + "logits/rejected": -3.020718574523926, + "logps/chosen": -257.0147399902344, + "logps/rejected": -422.6219482421875, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4742469787597656, + "rewards/margins": 1.358912706375122, + "rewards/rejected": -0.8846657276153564, + "step": 238 + }, + { + "epoch": 0.3, + "learning_rate": 8.151953080639775e-08, + "logits/chosen": -3.2266783714294434, + "logits/rejected": -3.043273448944092, + "logps/chosen": -255.8751678466797, + "logps/rejected": -860.739501953125, + "loss": 0.403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4962249994277954, + "rewards/margins": 2.510974168777466, + "rewards/rejected": -2.014749050140381, + "step": 239 + }, + { + "epoch": 0.31, + "learning_rate": 8.135881792367685e-08, + "logits/chosen": -3.2324414253234863, + "logits/rejected": -3.128674030303955, + "logps/chosen": -265.20819091796875, + "logps/rejected": -412.2508544921875, + "loss": 0.4151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3726768493652344, + "rewards/margins": 1.3902335166931152, + "rewards/rejected": -1.0175567865371704, + "step": 240 + }, + { + "epoch": 0.31, + "learning_rate": 8.119756920527954e-08, + "logits/chosen": -3.2702019214630127, + "logits/rejected": -3.1178605556488037, + "logps/chosen": -264.7179870605469, + "logps/rejected": -456.40399169921875, + "loss": 0.4491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5458267331123352, + "rewards/margins": 1.7709853649139404, + "rewards/rejected": -1.22515869140625, + "step": 241 + }, + { + "epoch": 0.31, + "learning_rate": 8.103578740650156e-08, + "logits/chosen": -3.253571033477783, + "logits/rejected": -3.1311511993408203, + "logps/chosen": -271.547119140625, + "logps/rejected": -620.11181640625, + "loss": 0.469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.417135626077652, + "rewards/margins": 2.153817653656006, + "rewards/rejected": -1.7366821765899658, + "step": 242 + }, + { + "epoch": 0.31, + "learning_rate": 8.087347529174742e-08, + "logits/chosen": -3.2294020652770996, + "logits/rejected": -3.0349693298339844, + "logps/chosen": -272.89483642578125, + "logps/rejected": -726.4990234375, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6158050298690796, + "rewards/margins": 2.1597321033477783, + "rewards/rejected": -1.5439269542694092, + "step": 243 + }, + { + "epoch": 0.31, + "learning_rate": 8.07106356344834e-08, + "logits/chosen": -3.144735336303711, + "logits/rejected": -3.011761426925659, + "logps/chosen": -297.918212890625, + "logps/rejected": -461.4812316894531, + "loss": 0.4196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3582397401332855, + "rewards/margins": 1.2872345447540283, + "rewards/rejected": -0.9289947748184204, + "step": 244 + }, + { + "epoch": 0.31, + "learning_rate": 8.054727121718987e-08, + "logits/chosen": -3.183558940887451, + "logits/rejected": -2.9791369438171387, + "logps/chosen": -220.34095764160156, + "logps/rejected": -1359.12060546875, + "loss": 0.433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.491455078125, + "rewards/margins": 3.7282958030700684, + "rewards/rejected": -3.2368407249450684, + "step": 245 + }, + { + "epoch": 0.31, + "learning_rate": 8.038338483131406e-08, + "logits/chosen": -3.2804007530212402, + "logits/rejected": -3.0939855575561523, + "logps/chosen": -311.7437744140625, + "logps/rejected": -788.90185546875, + "loss": 0.4272, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6149078607559204, + "rewards/margins": 2.716198682785034, + "rewards/rejected": -2.1012909412384033, + "step": 246 + }, + { + "epoch": 0.31, + "learning_rate": 8.021897927722208e-08, + "logits/chosen": -3.2026214599609375, + "logits/rejected": -3.1306920051574707, + "logps/chosen": -256.1947021484375, + "logps/rejected": -840.6460571289062, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37209853529930115, + "rewards/margins": 2.6872901916503906, + "rewards/rejected": -2.3151917457580566, + "step": 247 + }, + { + "epoch": 0.32, + "learning_rate": 8.005405736415125e-08, + "logits/chosen": -3.234829902648926, + "logits/rejected": -3.1613669395446777, + "logps/chosen": -259.9140930175781, + "logps/rejected": -660.4373779296875, + "loss": 0.3601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3060058653354645, + "rewards/margins": 2.141195774078369, + "rewards/rejected": -1.8351898193359375, + "step": 248 + }, + { + "epoch": 0.32, + "learning_rate": 7.988862191016203e-08, + "logits/chosen": -3.180788040161133, + "logits/rejected": -3.149352550506592, + "logps/chosen": -249.9974365234375, + "logps/rejected": -677.5968017578125, + "loss": 0.4194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5361915826797485, + "rewards/margins": 2.423320770263672, + "rewards/rejected": -1.887129306793213, + "step": 249 + }, + { + "epoch": 0.32, + "learning_rate": 7.97226757420899e-08, + "logits/chosen": -3.268799304962158, + "logits/rejected": -3.075873613357544, + "logps/chosen": -276.3642883300781, + "logps/rejected": -3694.078369140625, + "loss": 0.4451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5415847897529602, + "rewards/margins": 7.72932243347168, + "rewards/rejected": -7.187737941741943, + "step": 250 + }, + { + "epoch": 0.32, + "learning_rate": 7.955622169549696e-08, + "logits/chosen": -3.304189682006836, + "logits/rejected": -3.1278882026672363, + "logps/chosen": -237.94700622558594, + "logps/rejected": -549.65869140625, + "loss": 0.4237, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5615615844726562, + "rewards/margins": 2.2059097290039062, + "rewards/rejected": -1.64434814453125, + "step": 251 + }, + { + "epoch": 0.32, + "learning_rate": 7.938926261462366e-08, + "logits/chosen": -3.2966294288635254, + "logits/rejected": -3.0933916568756104, + "logps/chosen": -214.28036499023438, + "logps/rejected": -319.1855773925781, + "loss": 0.3838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5406745672225952, + "rewards/margins": 1.2265777587890625, + "rewards/rejected": -0.6859031915664673, + "step": 252 + }, + { + "epoch": 0.32, + "learning_rate": 7.922180135233999e-08, + "logits/chosen": -3.238814353942871, + "logits/rejected": -3.06154203414917, + "logps/chosen": -306.0054931640625, + "logps/rejected": -627.9767456054688, + "loss": 0.4208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47254180908203125, + "rewards/margins": 2.0864243507385254, + "rewards/rejected": -1.6138825416564941, + "step": 253 + }, + { + "epoch": 0.32, + "learning_rate": 7.905384077009691e-08, + "logits/chosen": -3.217670440673828, + "logits/rejected": -3.0215904712677, + "logps/chosen": -240.8350372314453, + "logps/rejected": -645.37255859375, + "loss": 0.4191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5614990592002869, + "rewards/margins": 1.99437415599823, + "rewards/rejected": -1.4328750371932983, + "step": 254 + }, + { + "epoch": 0.33, + "learning_rate": 7.888538373787734e-08, + "logits/chosen": -3.20556640625, + "logits/rejected": -3.0657615661621094, + "logps/chosen": -255.41940307617188, + "logps/rejected": -833.5130615234375, + "loss": 0.3827, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39604949951171875, + "rewards/margins": 2.320723056793213, + "rewards/rejected": -1.9246735572814941, + "step": 255 + }, + { + "epoch": 0.33, + "learning_rate": 7.871643313414717e-08, + "logits/chosen": -3.289523124694824, + "logits/rejected": -3.2216854095458984, + "logps/chosen": -240.60009765625, + "logps/rejected": -762.2745361328125, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34896546602249146, + "rewards/margins": 2.6757264137268066, + "rewards/rejected": -2.326760768890381, + "step": 256 + }, + { + "epoch": 0.33, + "learning_rate": 7.854699184580609e-08, + "logits/chosen": -3.253596782684326, + "logits/rejected": -3.152712345123291, + "logps/chosen": -297.0100402832031, + "logps/rejected": -887.7996826171875, + "loss": 0.3913, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6015273928642273, + "rewards/margins": 2.9607956409454346, + "rewards/rejected": -2.3592681884765625, + "step": 257 + }, + { + "epoch": 0.33, + "learning_rate": 7.837706276813818e-08, + "logits/chosen": -3.16787052154541, + "logits/rejected": -3.1107959747314453, + "logps/chosen": -278.51165771484375, + "logps/rejected": -586.007568359375, + "loss": 0.4685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5923843383789062, + "rewards/margins": 2.2788102626800537, + "rewards/rejected": -1.686425805091858, + "step": 258 + }, + { + "epoch": 0.33, + "learning_rate": 7.820664880476255e-08, + "logits/chosen": -3.2387566566467285, + "logits/rejected": -3.051863670349121, + "logps/chosen": -270.1986999511719, + "logps/rejected": -339.0693054199219, + "loss": 0.398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47383880615234375, + "rewards/margins": 1.294661045074463, + "rewards/rejected": -0.8208221197128296, + "step": 259 + }, + { + "epoch": 0.33, + "learning_rate": 7.803575286758363e-08, + "logits/chosen": -3.193565845489502, + "logits/rejected": -3.140021324157715, + "logps/chosen": -296.5667724609375, + "logps/rejected": -717.3156127929688, + "loss": 0.3893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5195114016532898, + "rewards/margins": 2.8922271728515625, + "rewards/rejected": -2.372715711593628, + "step": 260 + }, + { + "epoch": 0.33, + "learning_rate": 7.786437787674148e-08, + "logits/chosen": -3.1390693187713623, + "logits/rejected": -2.9967291355133057, + "logps/chosen": -297.9837646484375, + "logps/rejected": -243.63525390625, + "loss": 0.4842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.487335205078125, + "rewards/margins": 0.8863235712051392, + "rewards/rejected": -0.39898836612701416, + "step": 261 + }, + { + "epoch": 0.33, + "learning_rate": 7.769252676056186e-08, + "logits/chosen": -3.2387983798980713, + "logits/rejected": -3.1138341426849365, + "logps/chosen": -261.7395935058594, + "logps/rejected": -842.0194091796875, + "loss": 0.4121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5328712463378906, + "rewards/margins": 3.1974129676818848, + "rewards/rejected": -2.664541721343994, + "step": 262 + }, + { + "epoch": 0.34, + "learning_rate": 7.752020245550617e-08, + "logits/chosen": -3.175065040588379, + "logits/rejected": -3.096149206161499, + "logps/chosen": -259.11883544921875, + "logps/rejected": -655.9813232421875, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6042991876602173, + "rewards/margins": 2.5828423500061035, + "rewards/rejected": -1.9785430431365967, + "step": 263 + }, + { + "epoch": 0.34, + "learning_rate": 7.734740790612135e-08, + "logits/chosen": -3.2636404037475586, + "logits/rejected": -3.0959300994873047, + "logps/chosen": -271.8035888671875, + "logps/rejected": -742.9571533203125, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5755554437637329, + "rewards/margins": 2.7662370204925537, + "rewards/rejected": -2.1906814575195312, + "step": 264 + }, + { + "epoch": 0.34, + "learning_rate": 7.717414606498946e-08, + "logits/chosen": -3.2182254791259766, + "logits/rejected": -3.078953266143799, + "logps/chosen": -271.88067626953125, + "logps/rejected": -1048.7379150390625, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6018631458282471, + "rewards/margins": 3.178898811340332, + "rewards/rejected": -2.577035427093506, + "step": 265 + }, + { + "epoch": 0.34, + "learning_rate": 7.700041989267735e-08, + "logits/chosen": -3.1988158226013184, + "logits/rejected": -3.118926525115967, + "logps/chosen": -286.6795654296875, + "logps/rejected": -705.2054443359375, + "loss": 0.4149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5601127743721008, + "rewards/margins": 2.7871623039245605, + "rewards/rejected": -2.2270493507385254, + "step": 266 + }, + { + "epoch": 0.34, + "learning_rate": 7.682623235768597e-08, + "logits/chosen": -3.248302698135376, + "logits/rejected": -3.0962562561035156, + "logps/chosen": -269.82012939453125, + "logps/rejected": -665.2428588867188, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5203773975372314, + "rewards/margins": 2.246265411376953, + "rewards/rejected": -1.7258880138397217, + "step": 267 + }, + { + "epoch": 0.34, + "learning_rate": 7.665158643639968e-08, + "logits/chosen": -3.208375930786133, + "logits/rejected": -3.0864205360412598, + "logps/chosen": -283.03790283203125, + "logps/rejected": -613.3585205078125, + "loss": 0.4082, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5686309933662415, + "rewards/margins": 2.9894533157348633, + "rewards/rejected": -2.4208221435546875, + "step": 268 + }, + { + "epoch": 0.34, + "learning_rate": 7.647648511303544e-08, + "logits/chosen": -3.1807467937469482, + "logits/rejected": -3.103084087371826, + "logps/chosen": -254.58316040039062, + "logps/rejected": -528.7763061523438, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5679565668106079, + "rewards/margins": 2.100759983062744, + "rewards/rejected": -1.5328032970428467, + "step": 269 + }, + { + "epoch": 0.34, + "learning_rate": 7.63009313795917e-08, + "logits/chosen": -3.240121364593506, + "logits/rejected": -3.208927631378174, + "logps/chosen": -315.9365234375, + "logps/rejected": -785.983154296875, + "loss": 0.4004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5709365606307983, + "rewards/margins": 2.788832187652588, + "rewards/rejected": -2.2178955078125, + "step": 270 + }, + { + "epoch": 0.35, + "learning_rate": 7.612492823579743e-08, + "logits/chosen": -3.184896469116211, + "logits/rejected": -3.224226951599121, + "logps/chosen": -277.3204040527344, + "logps/rejected": -1171.753173828125, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44649049639701843, + "rewards/margins": 3.63787841796875, + "rewards/rejected": -3.1913881301879883, + "step": 271 + }, + { + "epoch": 0.35, + "learning_rate": 7.594847868906076e-08, + "logits/chosen": -3.2182235717773438, + "logits/rejected": -3.1188485622406006, + "logps/chosen": -214.88726806640625, + "logps/rejected": -744.3470458984375, + "loss": 0.3971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4846298098564148, + "rewards/margins": 3.0894577503204346, + "rewards/rejected": -2.604827880859375, + "step": 272 + }, + { + "epoch": 0.35, + "learning_rate": 7.577158575441756e-08, + "logits/chosen": -3.192800521850586, + "logits/rejected": -3.0903844833374023, + "logps/chosen": -264.591552734375, + "logps/rejected": -342.2158203125, + "loss": 0.4226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5591148734092712, + "rewards/margins": 1.3854835033416748, + "rewards/rejected": -0.8263687491416931, + "step": 273 + }, + { + "epoch": 0.35, + "learning_rate": 7.559425245448005e-08, + "logits/chosen": -3.228640556335449, + "logits/rejected": -3.0594985485076904, + "logps/chosen": -274.08709716796875, + "logps/rejected": -844.3082275390625, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46406400203704834, + "rewards/margins": 2.9154021739959717, + "rewards/rejected": -2.451338291168213, + "step": 274 + }, + { + "epoch": 0.35, + "learning_rate": 7.541648181938503e-08, + "logits/chosen": -3.2219924926757812, + "logits/rejected": -3.171656847000122, + "logps/chosen": -259.4461975097656, + "logps/rejected": -974.8018798828125, + "loss": 0.381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5492790341377258, + "rewards/margins": 3.434014320373535, + "rewards/rejected": -2.884735107421875, + "step": 275 + }, + { + "epoch": 0.35, + "learning_rate": 7.523827688674219e-08, + "logits/chosen": -3.20353102684021, + "logits/rejected": -3.1142048835754395, + "logps/chosen": -283.5196533203125, + "logps/rejected": -498.5640869140625, + "loss": 0.4335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41024935245513916, + "rewards/margins": 1.5509231090545654, + "rewards/rejected": -1.1406738758087158, + "step": 276 + }, + { + "epoch": 0.35, + "learning_rate": 7.505964070158213e-08, + "logits/chosen": -3.2294347286224365, + "logits/rejected": -3.0829505920410156, + "logps/chosen": -240.194580078125, + "logps/rejected": -1063.827392578125, + "loss": 0.4031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4574996829032898, + "rewards/margins": 3.5526046752929688, + "rewards/rejected": -3.0951051712036133, + "step": 277 + }, + { + "epoch": 0.35, + "learning_rate": 7.488057631630437e-08, + "logits/chosen": -3.140026807785034, + "logits/rejected": -3.0180411338806152, + "logps/chosen": -257.1299743652344, + "logps/rejected": -733.8638916015625, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5944817066192627, + "rewards/margins": 2.1903083324432373, + "rewards/rejected": -1.5958267450332642, + "step": 278 + }, + { + "epoch": 0.36, + "learning_rate": 7.47010867906252e-08, + "logits/chosen": -3.2095346450805664, + "logits/rejected": -3.038356304168701, + "logps/chosen": -265.0348205566406, + "logps/rejected": -869.27880859375, + "loss": 0.3982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4929046630859375, + "rewards/margins": 2.645394802093506, + "rewards/rejected": -2.1524901390075684, + "step": 279 + }, + { + "epoch": 0.36, + "learning_rate": 7.452117519152541e-08, + "logits/chosen": -3.222959041595459, + "logits/rejected": -2.979867458343506, + "logps/chosen": -254.10330200195312, + "logps/rejected": -532.6549072265625, + "loss": 0.4653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49472808837890625, + "rewards/margins": 1.6931121349334717, + "rewards/rejected": -1.1983840465545654, + "step": 280 + }, + { + "epoch": 0.36, + "learning_rate": 7.434084459319781e-08, + "logits/chosen": -3.2464680671691895, + "logits/rejected": -3.1162543296813965, + "logps/chosen": -256.7456359863281, + "logps/rejected": -450.536376953125, + "loss": 0.4602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7038200497627258, + "rewards/margins": 1.897396206855774, + "rewards/rejected": -1.1935760974884033, + "step": 281 + }, + { + "epoch": 0.36, + "learning_rate": 7.41600980769948e-08, + "logits/chosen": -3.229959726333618, + "logits/rejected": -3.078371524810791, + "logps/chosen": -236.96139526367188, + "logps/rejected": -536.6329956054688, + "loss": 0.3444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4042305052280426, + "rewards/margins": 2.1066079139709473, + "rewards/rejected": -1.7023773193359375, + "step": 282 + }, + { + "epoch": 0.36, + "learning_rate": 7.397893873137563e-08, + "logits/chosen": -3.240194082260132, + "logits/rejected": -3.198420286178589, + "logps/chosen": -257.788818359375, + "logps/rejected": -583.4436645507812, + "loss": 0.3859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43458861112594604, + "rewards/margins": 2.4514756202697754, + "rewards/rejected": -2.0168869495391846, + "step": 283 + }, + { + "epoch": 0.36, + "learning_rate": 7.379736965185368e-08, + "logits/chosen": -3.243300676345825, + "logits/rejected": -2.9895477294921875, + "logps/chosen": -263.516845703125, + "logps/rejected": -303.3957824707031, + "loss": 0.4182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46576082706451416, + "rewards/margins": 1.2235665321350098, + "rewards/rejected": -0.7578056454658508, + "step": 284 + }, + { + "epoch": 0.36, + "learning_rate": 7.361539394094355e-08, + "logits/chosen": -3.2440900802612305, + "logits/rejected": -3.073721408843994, + "logps/chosen": -293.4841003417969, + "logps/rejected": -1122.183837890625, + "loss": 0.403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4585663080215454, + "rewards/margins": 3.495889186859131, + "rewards/rejected": -3.037322998046875, + "step": 285 + }, + { + "epoch": 0.36, + "learning_rate": 7.343301470810807e-08, + "logits/chosen": -3.2181406021118164, + "logits/rejected": -3.179230213165283, + "logps/chosen": -290.5972900390625, + "logps/rejected": -848.8682861328125, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4979751706123352, + "rewards/margins": 3.079066514968872, + "rewards/rejected": -2.5810914039611816, + "step": 286 + }, + { + "epoch": 0.37, + "learning_rate": 7.325023506970511e-08, + "logits/chosen": -3.204409122467041, + "logits/rejected": -3.1336073875427246, + "logps/chosen": -252.88279724121094, + "logps/rejected": -387.59332275390625, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3826996088027954, + "rewards/margins": 1.594232201576233, + "rewards/rejected": -1.2115325927734375, + "step": 287 + }, + { + "epoch": 0.37, + "learning_rate": 7.306705814893439e-08, + "logits/chosen": -3.210505485534668, + "logits/rejected": -3.115476131439209, + "logps/chosen": -296.14129638671875, + "logps/rejected": -693.3168334960938, + "loss": 0.4363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.513409435749054, + "rewards/margins": 2.697744846343994, + "rewards/rejected": -2.184335231781006, + "step": 288 + }, + { + "epoch": 0.37, + "learning_rate": 7.288348707578408e-08, + "logits/chosen": -3.230729579925537, + "logits/rejected": -3.1251206398010254, + "logps/chosen": -261.218505859375, + "logps/rejected": -528.5595703125, + "loss": 0.3924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5241485834121704, + "rewards/margins": 1.895655870437622, + "rewards/rejected": -1.3715072870254517, + "step": 289 + }, + { + "epoch": 0.37, + "learning_rate": 7.269952498697734e-08, + "logits/chosen": -3.2783865928649902, + "logits/rejected": -3.1243176460266113, + "logps/chosen": -271.44677734375, + "logps/rejected": -521.9924926757812, + "loss": 0.3977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5952751636505127, + "rewards/margins": 2.278769016265869, + "rewards/rejected": -1.6834938526153564, + "step": 290 + }, + { + "epoch": 0.37, + "learning_rate": 7.251517502591869e-08, + "logits/chosen": -3.3082847595214844, + "logits/rejected": -3.0972461700439453, + "logps/chosen": -277.62799072265625, + "logps/rejected": -345.0617370605469, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5599274039268494, + "rewards/margins": 1.615304708480835, + "rewards/rejected": -1.0553772449493408, + "step": 291 + }, + { + "epoch": 0.37, + "learning_rate": 7.233044034264033e-08, + "logits/chosen": -3.2090978622436523, + "logits/rejected": -3.1384072303771973, + "logps/chosen": -290.23968505859375, + "logps/rejected": -570.1703491210938, + "loss": 0.3591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5316711664199829, + "rewards/margins": 2.5397660732269287, + "rewards/rejected": -2.0080947875976562, + "step": 292 + }, + { + "epoch": 0.37, + "learning_rate": 7.214532409374828e-08, + "logits/chosen": -3.247340440750122, + "logits/rejected": -3.0995893478393555, + "logps/chosen": -273.72119140625, + "logps/rejected": -867.5750732421875, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6365112066268921, + "rewards/margins": 3.076974391937256, + "rewards/rejected": -2.4404633045196533, + "step": 293 + }, + { + "epoch": 0.37, + "learning_rate": 7.195982944236851e-08, + "logits/chosen": -3.281822681427002, + "logits/rejected": -3.102816104888916, + "logps/chosen": -276.9607849121094, + "logps/rejected": -644.716552734375, + "loss": 0.4024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6453521847724915, + "rewards/margins": 2.6207642555236816, + "rewards/rejected": -1.975412130355835, + "step": 294 + }, + { + "epoch": 0.38, + "learning_rate": 7.17739595580928e-08, + "logits/chosen": -3.2222704887390137, + "logits/rejected": -3.093686819076538, + "logps/chosen": -283.27667236328125, + "logps/rejected": -686.3480224609375, + "loss": 0.3608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5495529174804688, + "rewards/margins": 2.2330825328826904, + "rewards/rejected": -1.6835296154022217, + "step": 295 + }, + { + "epoch": 0.38, + "learning_rate": 7.158771761692464e-08, + "logits/chosen": -3.2096261978149414, + "logits/rejected": -3.100811719894409, + "logps/chosen": -254.68214416503906, + "logps/rejected": -794.8727416992188, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5345344543457031, + "rewards/margins": 3.3512978553771973, + "rewards/rejected": -2.816763401031494, + "step": 296 + }, + { + "epoch": 0.38, + "learning_rate": 7.140110680122495e-08, + "logits/chosen": -3.2453744411468506, + "logits/rejected": -3.081746816635132, + "logps/chosen": -302.7302551269531, + "logps/rejected": -1248.84521484375, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35496824979782104, + "rewards/margins": 3.693295478820801, + "rewards/rejected": -3.338327169418335, + "step": 297 + }, + { + "epoch": 0.38, + "learning_rate": 7.121413029965768e-08, + "logits/chosen": -3.2108309268951416, + "logits/rejected": -3.1256837844848633, + "logps/chosen": -271.0490417480469, + "logps/rejected": -597.1795654296875, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3772674798965454, + "rewards/margins": 2.251095771789551, + "rewards/rejected": -1.8738281726837158, + "step": 298 + }, + { + "epoch": 0.38, + "learning_rate": 7.102679130713537e-08, + "logits/chosen": -3.2329440116882324, + "logits/rejected": -3.1604623794555664, + "logps/chosen": -259.5572814941406, + "logps/rejected": -484.19891357421875, + "loss": 0.4061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5421661138534546, + "rewards/margins": 2.2732315063476562, + "rewards/rejected": -1.7310655117034912, + "step": 299 + }, + { + "epoch": 0.38, + "learning_rate": 7.083909302476451e-08, + "logits/chosen": -3.247115135192871, + "logits/rejected": -3.0345325469970703, + "logps/chosen": -249.624755859375, + "logps/rejected": -846.7308349609375, + "loss": 0.4098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5345001220703125, + "rewards/margins": 2.7676515579223633, + "rewards/rejected": -2.233151435852051, + "step": 300 + }, + { + "epoch": 0.38, + "learning_rate": 7.065103865979087e-08, + "logits/chosen": -3.2484192848205566, + "logits/rejected": -3.215458869934082, + "logps/chosen": -259.69561767578125, + "logps/rejected": -983.6759643554688, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5386749505996704, + "rewards/margins": 3.7970428466796875, + "rewards/rejected": -3.2583680152893066, + "step": 301 + }, + { + "epoch": 0.38, + "learning_rate": 7.046263142554469e-08, + "logits/chosen": -3.233896255493164, + "logits/rejected": -3.1946659088134766, + "logps/chosen": -320.2520751953125, + "logps/rejected": -517.7954711914062, + "loss": 0.4084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6836471557617188, + "rewards/margins": 2.0971786975860596, + "rewards/rejected": -1.4135315418243408, + "step": 302 + }, + { + "epoch": 0.39, + "learning_rate": 7.027387454138578e-08, + "logits/chosen": -3.1886394023895264, + "logits/rejected": -3.0552711486816406, + "logps/chosen": -263.4596862792969, + "logps/rejected": -648.997314453125, + "loss": 0.3498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.458578497171402, + "rewards/margins": 2.4366180896759033, + "rewards/rejected": -1.9780395030975342, + "step": 303 + }, + { + "epoch": 0.39, + "learning_rate": 7.008477123264848e-08, + "logits/chosen": -3.228318929672241, + "logits/rejected": -3.0960707664489746, + "logps/chosen": -261.45489501953125, + "logps/rejected": -989.8316650390625, + "loss": 0.3675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4187919497489929, + "rewards/margins": 3.7722129821777344, + "rewards/rejected": -3.353421211242676, + "step": 304 + }, + { + "epoch": 0.39, + "learning_rate": 6.989532473058657e-08, + "logits/chosen": -3.266249179840088, + "logits/rejected": -3.1163387298583984, + "logps/chosen": -249.91102600097656, + "logps/rejected": -324.58734130859375, + "loss": 0.4483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7975090146064758, + "rewards/margins": 1.9247093200683594, + "rewards/rejected": -1.1272003650665283, + "step": 305 + }, + { + "epoch": 0.39, + "learning_rate": 6.970553827231808e-08, + "logits/chosen": -3.1861443519592285, + "logits/rejected": -3.1610541343688965, + "logps/chosen": -277.1021728515625, + "logps/rejected": -642.0130004882812, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5335479974746704, + "rewards/margins": 2.81256103515625, + "rewards/rejected": -2.279013156890869, + "step": 306 + }, + { + "epoch": 0.39, + "learning_rate": 6.951541510076994e-08, + "logits/chosen": -3.2544562816619873, + "logits/rejected": -3.1522045135498047, + "logps/chosen": -247.2447052001953, + "logps/rejected": -372.7816467285156, + "loss": 0.3647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6268280148506165, + "rewards/margins": 1.7041763067245483, + "rewards/rejected": -1.0773483514785767, + "step": 307 + }, + { + "epoch": 0.39, + "learning_rate": 6.932495846462261e-08, + "logits/chosen": -3.246141195297241, + "logits/rejected": -3.079935073852539, + "logps/chosen": -285.4745788574219, + "logps/rejected": -746.250732421875, + "loss": 0.3503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5319603085517883, + "rewards/margins": 2.778078556060791, + "rewards/rejected": -2.2461183071136475, + "step": 308 + }, + { + "epoch": 0.39, + "learning_rate": 6.913417161825448e-08, + "logits/chosen": -3.3026835918426514, + "logits/rejected": -3.012725830078125, + "logps/chosen": -254.237548828125, + "logps/rejected": -1236.461669921875, + "loss": 0.4105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5202964544296265, + "rewards/margins": 4.067836761474609, + "rewards/rejected": -3.5475401878356934, + "step": 309 + }, + { + "epoch": 0.4, + "learning_rate": 6.894305782168638e-08, + "logits/chosen": -3.251133680343628, + "logits/rejected": -3.0996253490448, + "logps/chosen": -248.3268585205078, + "logps/rejected": -542.343994140625, + "loss": 0.4563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6715729236602783, + "rewards/margins": 2.4726624488830566, + "rewards/rejected": -1.8010895252227783, + "step": 310 + }, + { + "epoch": 0.4, + "learning_rate": 6.875162034052578e-08, + "logits/chosen": -3.2203235626220703, + "logits/rejected": -3.1321287155151367, + "logps/chosen": -269.0015563964844, + "logps/rejected": -473.5055847167969, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4804641604423523, + "rewards/margins": 2.1036758422851562, + "rewards/rejected": -1.6232116222381592, + "step": 311 + }, + { + "epoch": 0.4, + "learning_rate": 6.855986244591103e-08, + "logits/chosen": -3.205334186553955, + "logits/rejected": -3.0513219833374023, + "logps/chosen": -274.95355224609375, + "logps/rejected": -557.198486328125, + "loss": 0.3691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6072219610214233, + "rewards/margins": 2.1531996726989746, + "rewards/rejected": -1.5459778308868408, + "step": 312 + }, + { + "epoch": 0.4, + "learning_rate": 6.836778741445549e-08, + "logits/chosen": -3.2034618854522705, + "logits/rejected": -3.0333762168884277, + "logps/chosen": -296.662353515625, + "logps/rejected": -550.0028076171875, + "loss": 0.4009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7087448239326477, + "rewards/margins": 2.058114767074585, + "rewards/rejected": -1.3493698835372925, + "step": 313 + }, + { + "epoch": 0.4, + "learning_rate": 6.817539852819148e-08, + "logits/chosen": -3.2702741622924805, + "logits/rejected": -3.166503667831421, + "logps/chosen": -268.00634765625, + "logps/rejected": -806.49267578125, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6116684079170227, + "rewards/margins": 2.740109443664551, + "rewards/rejected": -2.1284408569335938, + "step": 314 + }, + { + "epoch": 0.4, + "learning_rate": 6.798269907451427e-08, + "logits/chosen": -3.251681089401245, + "logits/rejected": -3.1892709732055664, + "logps/chosen": -242.12469482421875, + "logps/rejected": -1401.67236328125, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6342422962188721, + "rewards/margins": 5.14932107925415, + "rewards/rejected": -4.515078544616699, + "step": 315 + }, + { + "epoch": 0.4, + "learning_rate": 6.778969234612582e-08, + "logits/chosen": -3.2430434226989746, + "logits/rejected": -3.0947301387786865, + "logps/chosen": -316.50787353515625, + "logps/rejected": -592.93798828125, + "loss": 0.3774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4797409176826477, + "rewards/margins": 2.1264052391052246, + "rewards/rejected": -1.6466645002365112, + "step": 316 + }, + { + "epoch": 0.4, + "learning_rate": 6.759638164097861e-08, + "logits/chosen": -3.1544594764709473, + "logits/rejected": -2.9919309616088867, + "logps/chosen": -307.0419006347656, + "logps/rejected": -588.1734619140625, + "loss": 0.3897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6117477416992188, + "rewards/margins": 2.3817856311798096, + "rewards/rejected": -1.7700378894805908, + "step": 317 + }, + { + "epoch": 0.41, + "learning_rate": 6.740277026221922e-08, + "logits/chosen": -3.2646870613098145, + "logits/rejected": -3.1445789337158203, + "logps/chosen": -274.25250244140625, + "logps/rejected": -773.155029296875, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6305465698242188, + "rewards/margins": 3.0039260387420654, + "rewards/rejected": -2.3733794689178467, + "step": 318 + }, + { + "epoch": 0.41, + "learning_rate": 6.720886151813194e-08, + "logits/chosen": -3.248100757598877, + "logits/rejected": -3.1480469703674316, + "logps/chosen": -272.2279052734375, + "logps/rejected": -441.34710693359375, + "loss": 0.3743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6003387570381165, + "rewards/margins": 1.9771698713302612, + "rewards/rejected": -1.3768310546875, + "step": 319 + }, + { + "epoch": 0.41, + "learning_rate": 6.701465872208215e-08, + "logits/chosen": -3.240837574005127, + "logits/rejected": -3.053413152694702, + "logps/chosen": -281.51458740234375, + "logps/rejected": -592.050048828125, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6643539667129517, + "rewards/margins": 2.586766242980957, + "rewards/rejected": -1.9224121570587158, + "step": 320 + }, + { + "epoch": 0.41, + "learning_rate": 6.682016519245985e-08, + "logits/chosen": -3.1449599266052246, + "logits/rejected": -3.1680216789245605, + "logps/chosen": -312.53570556640625, + "logps/rejected": -722.756591796875, + "loss": 0.4282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.66400146484375, + "rewards/margins": 3.2279160022735596, + "rewards/rejected": -2.5639145374298096, + "step": 321 + }, + { + "epoch": 0.41, + "learning_rate": 6.662538425262284e-08, + "logits/chosen": -3.254441499710083, + "logits/rejected": -3.07482647895813, + "logps/chosen": -254.2643585205078, + "logps/rejected": -235.13528442382812, + "loss": 0.4158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5962028503417969, + "rewards/margins": 1.2562141418457031, + "rewards/rejected": -0.6600112915039062, + "step": 322 + }, + { + "epoch": 0.41, + "learning_rate": 6.643031923083994e-08, + "logits/chosen": -3.221853256225586, + "logits/rejected": -3.088529586791992, + "logps/chosen": -234.59613037109375, + "logps/rejected": -506.0549011230469, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.611010730266571, + "rewards/margins": 2.4870362281799316, + "rewards/rejected": -1.8760254383087158, + "step": 323 + }, + { + "epoch": 0.41, + "learning_rate": 6.623497346023418e-08, + "logits/chosen": -3.194857120513916, + "logits/rejected": -3.101712703704834, + "logps/chosen": -273.81219482421875, + "logps/rejected": -846.3603515625, + "loss": 0.3804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5715477466583252, + "rewards/margins": 2.6912665367126465, + "rewards/rejected": -2.1197190284729004, + "step": 324 + }, + { + "epoch": 0.41, + "learning_rate": 6.603935027872579e-08, + "logits/chosen": -3.2199366092681885, + "logits/rejected": -3.0288286209106445, + "logps/chosen": -273.66424560546875, + "logps/rejected": -1251.7664794921875, + "loss": 0.323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5847564935684204, + "rewards/margins": 3.7708969116210938, + "rewards/rejected": -3.186140537261963, + "step": 325 + }, + { + "epoch": 0.42, + "learning_rate": 6.584345302897522e-08, + "logits/chosen": -3.3136940002441406, + "logits/rejected": -3.128885269165039, + "logps/chosen": -242.98146057128906, + "logps/rejected": -493.832763671875, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6105988025665283, + "rewards/margins": 2.2380967140197754, + "rewards/rejected": -1.627497911453247, + "step": 326 + }, + { + "epoch": 0.42, + "learning_rate": 6.564728505832595e-08, + "logits/chosen": -3.205026626586914, + "logits/rejected": -3.003901243209839, + "logps/chosen": -295.2000732421875, + "logps/rejected": -475.047119140625, + "loss": 0.3689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6362717151641846, + "rewards/margins": 2.0667266845703125, + "rewards/rejected": -1.430454969406128, + "step": 327 + }, + { + "epoch": 0.42, + "learning_rate": 6.545084971874738e-08, + "logits/chosen": -3.2904701232910156, + "logits/rejected": -3.125066041946411, + "logps/chosen": -295.12890625, + "logps/rejected": -808.356689453125, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6361954212188721, + "rewards/margins": 3.285417318344116, + "rewards/rejected": -2.649221897125244, + "step": 328 + }, + { + "epoch": 0.42, + "learning_rate": 6.525415036677744e-08, + "logits/chosen": -3.1865785121917725, + "logits/rejected": -3.0958073139190674, + "logps/chosen": -258.227783203125, + "logps/rejected": -988.89697265625, + "loss": 0.3668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6571159362792969, + "rewards/margins": 3.20180606842041, + "rewards/rejected": -2.544689893722534, + "step": 329 + }, + { + "epoch": 0.42, + "learning_rate": 6.505719036346537e-08, + "logits/chosen": -3.2919552326202393, + "logits/rejected": -3.15748929977417, + "logps/chosen": -236.75393676757812, + "logps/rejected": -641.1058349609375, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7321456670761108, + "rewards/margins": 2.933835029602051, + "rewards/rejected": -2.2016892433166504, + "step": 330 + }, + { + "epoch": 0.42, + "learning_rate": 6.485997307431419e-08, + "logits/chosen": -3.2484865188598633, + "logits/rejected": -3.168623447418213, + "logps/chosen": -251.8829803466797, + "logps/rejected": -556.1759033203125, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5945968627929688, + "rewards/margins": 2.2411394119262695, + "rewards/rejected": -1.6465423107147217, + "step": 331 + }, + { + "epoch": 0.42, + "learning_rate": 6.466250186922324e-08, + "logits/chosen": -3.219688892364502, + "logits/rejected": -2.8870105743408203, + "logps/chosen": -286.19012451171875, + "logps/rejected": -1555.9449462890625, + "loss": 0.331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6913238763809204, + "rewards/margins": 4.932772636413574, + "rewards/rejected": -4.241449356079102, + "step": 332 + }, + { + "epoch": 0.42, + "learning_rate": 6.446478012243055e-08, + "logits/chosen": -3.2645931243896484, + "logits/rejected": -3.1281628608703613, + "logps/chosen": -265.1976318359375, + "logps/rejected": -485.3858947753906, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6832634210586548, + "rewards/margins": 2.1312217712402344, + "rewards/rejected": -1.4479584693908691, + "step": 333 + }, + { + "epoch": 0.43, + "learning_rate": 6.426681121245526e-08, + "logits/chosen": -3.170590400695801, + "logits/rejected": -3.0790657997131348, + "logps/chosen": -233.20701599121094, + "logps/rejected": -952.615234375, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6155837774276733, + "rewards/margins": 3.6876845359802246, + "rewards/rejected": -3.072100877761841, + "step": 334 + }, + { + "epoch": 0.43, + "learning_rate": 6.406859852203981e-08, + "logits/chosen": -3.2602858543395996, + "logits/rejected": -3.0824761390686035, + "logps/chosen": -268.1780090332031, + "logps/rejected": -509.92822265625, + "loss": 0.3622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5962623357772827, + "rewards/margins": 2.12960147857666, + "rewards/rejected": -1.533339023590088, + "step": 335 + }, + { + "epoch": 0.43, + "learning_rate": 6.387014543809223e-08, + "logits/chosen": -3.2815356254577637, + "logits/rejected": -3.2010536193847656, + "logps/chosen": -252.0404052734375, + "logps/rejected": -610.1356201171875, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.595990002155304, + "rewards/margins": 3.009368896484375, + "rewards/rejected": -2.413378953933716, + "step": 336 + }, + { + "epoch": 0.43, + "learning_rate": 6.367145535162812e-08, + "logits/chosen": -3.2270679473876953, + "logits/rejected": -3.1296894550323486, + "logps/chosen": -268.2633056640625, + "logps/rejected": -564.955810546875, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43243637681007385, + "rewards/margins": 2.3305931091308594, + "rewards/rejected": -1.898156762123108, + "step": 337 + }, + { + "epoch": 0.43, + "learning_rate": 6.347253165771289e-08, + "logits/chosen": -3.2361292839050293, + "logits/rejected": -3.1543476581573486, + "logps/chosen": -226.90863037109375, + "logps/rejected": -908.2044677734375, + "loss": 0.3515, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6019127368927002, + "rewards/margins": 3.4777750968933105, + "rewards/rejected": -2.8758621215820312, + "step": 338 + }, + { + "epoch": 0.43, + "learning_rate": 6.327337775540361e-08, + "logits/chosen": -3.207679271697998, + "logits/rejected": -3.162569999694824, + "logps/chosen": -284.4725341796875, + "logps/rejected": -425.4876708984375, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4988159239292145, + "rewards/margins": 1.9111084938049316, + "rewards/rejected": -1.41229248046875, + "step": 339 + }, + { + "epoch": 0.43, + "learning_rate": 6.307399704769099e-08, + "logits/chosen": -3.2492856979370117, + "logits/rejected": -3.1336238384246826, + "logps/chosen": -251.49838256835938, + "logps/rejected": -479.62188720703125, + "loss": 0.3416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5979385375976562, + "rewards/margins": 2.0856752395629883, + "rewards/rejected": -1.4877365827560425, + "step": 340 + }, + { + "epoch": 0.43, + "learning_rate": 6.287439294144119e-08, + "logits/chosen": -3.221390724182129, + "logits/rejected": -3.145526885986328, + "logps/chosen": -243.05438232421875, + "logps/rejected": -383.7883605957031, + "loss": 0.3554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6908218860626221, + "rewards/margins": 1.9158387184143066, + "rewards/rejected": -1.2250168323516846, + "step": 341 + }, + { + "epoch": 0.44, + "learning_rate": 6.26745688473377e-08, + "logits/chosen": -3.2117133140563965, + "logits/rejected": -3.1700732707977295, + "logps/chosen": -256.1409606933594, + "logps/rejected": -577.3831787109375, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6632011532783508, + "rewards/margins": 2.8258659839630127, + "rewards/rejected": -2.1626648902893066, + "step": 342 + }, + { + "epoch": 0.44, + "learning_rate": 6.247452817982293e-08, + "logits/chosen": -3.1609408855438232, + "logits/rejected": -3.0378055572509766, + "logps/chosen": -239.4635772705078, + "logps/rejected": -427.191162109375, + "loss": 0.3771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5603836178779602, + "rewards/margins": 2.277688503265381, + "rewards/rejected": -1.717305064201355, + "step": 343 + }, + { + "epoch": 0.44, + "learning_rate": 6.227427435703996e-08, + "logits/chosen": -3.242983341217041, + "logits/rejected": -3.1390364170074463, + "logps/chosen": -275.0694885253906, + "logps/rejected": -434.9574279785156, + "loss": 0.3568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6592445373535156, + "rewards/margins": 2.1421852111816406, + "rewards/rejected": -1.482940673828125, + "step": 344 + }, + { + "epoch": 0.44, + "learning_rate": 6.20738108007741e-08, + "logits/chosen": -3.2276453971862793, + "logits/rejected": -3.1273531913757324, + "logps/chosen": -298.12750244140625, + "logps/rejected": -1288.9134521484375, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5695907473564148, + "rewards/margins": 5.263096809387207, + "rewards/rejected": -4.693505764007568, + "step": 345 + }, + { + "epoch": 0.44, + "learning_rate": 6.187314093639443e-08, + "logits/chosen": -3.274176597595215, + "logits/rejected": -3.138808012008667, + "logps/chosen": -272.44281005859375, + "logps/rejected": -589.2888793945312, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7795883417129517, + "rewards/margins": 2.812774658203125, + "rewards/rejected": -2.033186435699463, + "step": 346 + }, + { + "epoch": 0.44, + "learning_rate": 6.167226819279528e-08, + "logits/chosen": -3.187497615814209, + "logits/rejected": -3.0817153453826904, + "logps/chosen": -274.72747802734375, + "logps/rejected": -885.1285400390625, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7421677112579346, + "rewards/margins": 3.5787675380706787, + "rewards/rejected": -2.836599826812744, + "step": 347 + }, + { + "epoch": 0.44, + "learning_rate": 6.147119600233758e-08, + "logits/chosen": -3.2322440147399902, + "logits/rejected": -3.14560604095459, + "logps/chosen": -269.521484375, + "logps/rejected": -1056.919921875, + "loss": 0.3832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7829925417900085, + "rewards/margins": 4.206484794616699, + "rewards/rejected": -3.423492670059204, + "step": 348 + }, + { + "epoch": 0.44, + "learning_rate": 6.126992780079031e-08, + "logits/chosen": -3.2008843421936035, + "logits/rejected": -3.050225257873535, + "logps/chosen": -280.77691650390625, + "logps/rejected": -1183.6429443359375, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.722363293170929, + "rewards/margins": 4.013253688812256, + "rewards/rejected": -3.2908904552459717, + "step": 349 + }, + { + "epoch": 0.45, + "learning_rate": 6.106846702727172e-08, + "logits/chosen": -3.194495677947998, + "logits/rejected": -3.099644660949707, + "logps/chosen": -280.3257141113281, + "logps/rejected": -624.2388916015625, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7366104125976562, + "rewards/margins": 2.8682847023010254, + "rewards/rejected": -2.131674289703369, + "step": 350 + }, + { + "epoch": 0.45, + "learning_rate": 6.086681712419058e-08, + "logits/chosen": -3.3061792850494385, + "logits/rejected": -3.2045531272888184, + "logps/chosen": -250.75119018554688, + "logps/rejected": -486.31573486328125, + "loss": 0.3672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7099769711494446, + "rewards/margins": 2.6018898487091064, + "rewards/rejected": -1.8919129371643066, + "step": 351 + }, + { + "epoch": 0.45, + "learning_rate": 6.066498153718735e-08, + "logits/chosen": -3.2533071041107178, + "logits/rejected": -3.1714413166046143, + "logps/chosen": -274.798095703125, + "logps/rejected": -560.4915771484375, + "loss": 0.3562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6938705444335938, + "rewards/margins": 2.434007406234741, + "rewards/rejected": -1.7401368618011475, + "step": 352 + }, + { + "epoch": 0.45, + "learning_rate": 6.046296371507533e-08, + "logits/chosen": -3.2481892108917236, + "logits/rejected": -3.1616287231445312, + "logps/chosen": -252.42189025878906, + "logps/rejected": -583.50244140625, + "loss": 0.3314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.753063976764679, + "rewards/margins": 2.6959567070007324, + "rewards/rejected": -1.9428925514221191, + "step": 353 + }, + { + "epoch": 0.45, + "learning_rate": 6.02607671097817e-08, + "logits/chosen": -3.2809243202209473, + "logits/rejected": -3.088115692138672, + "logps/chosen": -273.10943603515625, + "logps/rejected": -570.7112426757812, + "loss": 0.3632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6636184453964233, + "rewards/margins": 2.4443235397338867, + "rewards/rejected": -1.780705213546753, + "step": 354 + }, + { + "epoch": 0.45, + "learning_rate": 6.005839517628861e-08, + "logits/chosen": -3.281942367553711, + "logits/rejected": -3.1486411094665527, + "logps/chosen": -275.8187561035156, + "logps/rejected": -678.0150146484375, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6312279105186462, + "rewards/margins": 3.268981456756592, + "rewards/rejected": -2.637753486633301, + "step": 355 + }, + { + "epoch": 0.45, + "learning_rate": 5.985585137257401e-08, + "logits/chosen": -3.2016468048095703, + "logits/rejected": -3.062408924102783, + "logps/chosen": -265.64404296875, + "logps/rejected": -1168.824462890625, + "loss": 0.3569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.559539794921875, + "rewards/margins": 3.6006226539611816, + "rewards/rejected": -3.0410828590393066, + "step": 356 + }, + { + "epoch": 0.46, + "learning_rate": 5.965313915955268e-08, + "logits/chosen": -3.2528367042541504, + "logits/rejected": -3.090085029602051, + "logps/chosen": -228.8092041015625, + "logps/rejected": -785.1793823242188, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5413597226142883, + "rewards/margins": 2.804135799407959, + "rewards/rejected": -2.2627761363983154, + "step": 357 + }, + { + "epoch": 0.46, + "learning_rate": 5.945026200101701e-08, + "logits/chosen": -3.240324020385742, + "logits/rejected": -3.1672616004943848, + "logps/chosen": -242.69630432128906, + "logps/rejected": -835.1952514648438, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6618102788925171, + "rewards/margins": 3.266876220703125, + "rewards/rejected": -2.6050658226013184, + "step": 358 + }, + { + "epoch": 0.46, + "learning_rate": 5.9247223363577924e-08, + "logits/chosen": -3.25748872756958, + "logits/rejected": -3.125854969024658, + "logps/chosen": -275.693359375, + "logps/rejected": -672.1638793945312, + "loss": 0.3939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5371513366699219, + "rewards/margins": 2.8809943199157715, + "rewards/rejected": -2.3438429832458496, + "step": 359 + }, + { + "epoch": 0.46, + "learning_rate": 5.90440267166055e-08, + "logits/chosen": -3.168233871459961, + "logits/rejected": -3.1218905448913574, + "logps/chosen": -271.54888916015625, + "logps/rejected": -543.5419921875, + "loss": 0.3616, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7504852414131165, + "rewards/margins": 2.4019012451171875, + "rewards/rejected": -1.6514160633087158, + "step": 360 + }, + { + "epoch": 0.46, + "learning_rate": 5.8840675532169806e-08, + "logits/chosen": -3.2551724910736084, + "logits/rejected": -3.1538891792297363, + "logps/chosen": -217.16146850585938, + "logps/rejected": -965.95849609375, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5860252380371094, + "rewards/margins": 4.38204288482666, + "rewards/rejected": -3.796017646789551, + "step": 361 + }, + { + "epoch": 0.46, + "learning_rate": 5.8637173284981525e-08, + "logits/chosen": -3.3110907077789307, + "logits/rejected": -3.158294916152954, + "logps/chosen": -258.9945068359375, + "logps/rejected": -383.0972595214844, + "loss": 0.415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5501960515975952, + "rewards/margins": 2.1075005531311035, + "rewards/rejected": -1.5573043823242188, + "step": 362 + }, + { + "epoch": 0.46, + "learning_rate": 5.843352345233257e-08, + "logits/chosen": -3.291278839111328, + "logits/rejected": -3.1626269817352295, + "logps/chosen": -259.82586669921875, + "logps/rejected": -700.020263671875, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6913177967071533, + "rewards/margins": 3.58563232421875, + "rewards/rejected": -2.894314765930176, + "step": 363 + }, + { + "epoch": 0.46, + "learning_rate": 5.8229729514036697e-08, + "logits/chosen": -3.3151886463165283, + "logits/rejected": -3.1235532760620117, + "logps/chosen": -258.6416931152344, + "logps/rejected": -692.3502197265625, + "loss": 0.346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7566788196563721, + "rewards/margins": 2.712899684906006, + "rewards/rejected": -1.956221103668213, + "step": 364 + }, + { + "epoch": 0.47, + "learning_rate": 5.802579495237003e-08, + "logits/chosen": -3.2962489128112793, + "logits/rejected": -3.1533169746398926, + "logps/chosen": -284.6039123535156, + "logps/rejected": -713.4740600585938, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5759079456329346, + "rewards/margins": 3.4766130447387695, + "rewards/rejected": -2.900705099105835, + "step": 365 + }, + { + "epoch": 0.47, + "learning_rate": 5.7821723252011546e-08, + "logits/chosen": -3.174954891204834, + "logits/rejected": -3.121610164642334, + "logps/chosen": -242.47970581054688, + "logps/rejected": -365.2344970703125, + "loss": 0.3513, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6035217642784119, + "rewards/margins": 1.5816636085510254, + "rewards/rejected": -0.9781417846679688, + "step": 366 + }, + { + "epoch": 0.47, + "learning_rate": 5.7617517899983546e-08, + "logits/chosen": -3.3058998584747314, + "logits/rejected": -3.1842684745788574, + "logps/chosen": -260.03704833984375, + "logps/rejected": -428.03338623046875, + "loss": 0.3961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7125045657157898, + "rewards/margins": 2.0963821411132812, + "rewards/rejected": -1.3838776350021362, + "step": 367 + }, + { + "epoch": 0.47, + "learning_rate": 5.741318238559209e-08, + "logits/chosen": -3.2687954902648926, + "logits/rejected": -3.0456557273864746, + "logps/chosen": -262.13226318359375, + "logps/rejected": -474.8670654296875, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6357818841934204, + "rewards/margins": 2.0317063331604004, + "rewards/rejected": -1.3959243297576904, + "step": 368 + }, + { + "epoch": 0.47, + "learning_rate": 5.7208720200367334e-08, + "logits/chosen": -3.2461304664611816, + "logits/rejected": -3.190211772918701, + "logps/chosen": -253.00009155273438, + "logps/rejected": -800.0416259765625, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5682159662246704, + "rewards/margins": 3.2842650413513184, + "rewards/rejected": -2.7160491943359375, + "step": 369 + }, + { + "epoch": 0.47, + "learning_rate": 5.7004134838003895e-08, + "logits/chosen": -3.2626233100891113, + "logits/rejected": -3.078598976135254, + "logps/chosen": -249.43045043945312, + "logps/rejected": -256.66802978515625, + "loss": 0.362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8222931623458862, + "rewards/margins": 1.5509445667266846, + "rewards/rejected": -0.7286514639854431, + "step": 370 + }, + { + "epoch": 0.47, + "learning_rate": 5.6799429794301135e-08, + "logits/chosen": -3.234226703643799, + "logits/rejected": -3.138455390930176, + "logps/chosen": -286.97900390625, + "logps/rejected": -388.5616760253906, + "loss": 0.3727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.528820812702179, + "rewards/margins": 1.664312720298767, + "rewards/rejected": -1.135491967201233, + "step": 371 + }, + { + "epoch": 0.47, + "learning_rate": 5.659460856710345e-08, + "logits/chosen": -3.225703239440918, + "logits/rejected": -2.971621513366699, + "logps/chosen": -300.6927490234375, + "logps/rejected": -312.5350036621094, + "loss": 0.4108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6131362915039062, + "rewards/margins": 1.484674096107483, + "rewards/rejected": -0.8715378046035767, + "step": 372 + }, + { + "epoch": 0.48, + "learning_rate": 5.63896746562405e-08, + "logits/chosen": -3.2545218467712402, + "logits/rejected": -2.995742082595825, + "logps/chosen": -247.6783447265625, + "logps/rejected": -1203.9464111328125, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.714447021484375, + "rewards/margins": 3.7844910621643066, + "rewards/rejected": -3.0700440406799316, + "step": 373 + }, + { + "epoch": 0.48, + "learning_rate": 5.618463156346739e-08, + "logits/chosen": -3.1887240409851074, + "logits/rejected": -3.0677151679992676, + "logps/chosen": -259.09039306640625, + "logps/rejected": -363.21453857421875, + "loss": 0.3666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5899513363838196, + "rewards/margins": 1.627265214920044, + "rewards/rejected": -1.0373139381408691, + "step": 374 + }, + { + "epoch": 0.48, + "learning_rate": 5.597948279240483e-08, + "logits/chosen": -3.2314553260803223, + "logits/rejected": -3.139500379562378, + "logps/chosen": -291.56298828125, + "logps/rejected": -538.0628051757812, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.377340704202652, + "rewards/margins": 2.4386703968048096, + "rewards/rejected": -2.0613298416137695, + "step": 375 + }, + { + "epoch": 0.48, + "learning_rate": 5.5774231848479313e-08, + "logits/chosen": -3.232259750366211, + "logits/rejected": -3.1359286308288574, + "logps/chosen": -303.42413330078125, + "logps/rejected": -761.020263671875, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.564312756061554, + "rewards/margins": 3.2086517810821533, + "rewards/rejected": -2.644339084625244, + "step": 376 + }, + { + "epoch": 0.48, + "learning_rate": 5.556888223886315e-08, + "logits/chosen": -3.2582051753997803, + "logits/rejected": -3.0738768577575684, + "logps/chosen": -283.0958251953125, + "logps/rejected": -897.5354614257812, + "loss": 0.4313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.855511486530304, + "rewards/margins": 3.3147521018981934, + "rewards/rejected": -2.459240674972534, + "step": 377 + }, + { + "epoch": 0.48, + "learning_rate": 5.536343747241459e-08, + "logits/chosen": -3.2350549697875977, + "logits/rejected": -3.115853786468506, + "logps/chosen": -290.452880859375, + "logps/rejected": -694.1759643554688, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8939942121505737, + "rewards/margins": 3.4985427856445312, + "rewards/rejected": -2.604548692703247, + "step": 378 + }, + { + "epoch": 0.48, + "learning_rate": 5.515790105961785e-08, + "logits/chosen": -3.193549156188965, + "logits/rejected": -3.1489572525024414, + "logps/chosen": -258.0522766113281, + "logps/rejected": -1003.9136352539062, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6988494992256165, + "rewards/margins": 4.639700412750244, + "rewards/rejected": -3.9408507347106934, + "step": 379 + }, + { + "epoch": 0.48, + "learning_rate": 5.495227651252314e-08, + "logits/chosen": -3.2681355476379395, + "logits/rejected": -3.0875484943389893, + "logps/chosen": -282.91876220703125, + "logps/rejected": -311.5776672363281, + "loss": 0.4324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4677986204624176, + "rewards/margins": 1.0875869989395142, + "rewards/rejected": -0.6197883486747742, + "step": 380 + }, + { + "epoch": 0.49, + "learning_rate": 5.474656734468662e-08, + "logits/chosen": -3.2195510864257812, + "logits/rejected": -3.056401252746582, + "logps/chosen": -288.5096130371094, + "logps/rejected": -893.37890625, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5493927001953125, + "rewards/margins": 3.28324294090271, + "rewards/rejected": -2.7338502407073975, + "step": 381 + }, + { + "epoch": 0.49, + "learning_rate": 5.454077707111041e-08, + "logits/chosen": -3.1930856704711914, + "logits/rejected": -3.1450796127319336, + "logps/chosen": -277.99688720703125, + "logps/rejected": -670.109130859375, + "loss": 0.3863, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5771988034248352, + "rewards/margins": 3.220013380050659, + "rewards/rejected": -2.6428146362304688, + "step": 382 + }, + { + "epoch": 0.49, + "learning_rate": 5.433490920818249e-08, + "logits/chosen": -3.291879653930664, + "logits/rejected": -3.1073851585388184, + "logps/chosen": -263.20562744140625, + "logps/rejected": -647.5684814453125, + "loss": 0.3551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6925079822540283, + "rewards/margins": 2.79036283493042, + "rewards/rejected": -2.0978546142578125, + "step": 383 + }, + { + "epoch": 0.49, + "learning_rate": 5.4128967273616623e-08, + "logits/chosen": -3.2620201110839844, + "logits/rejected": -3.123199224472046, + "logps/chosen": -278.3626403808594, + "logps/rejected": -450.60546875, + "loss": 0.3933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7361694574356079, + "rewards/margins": 2.3862709999084473, + "rewards/rejected": -1.6501014232635498, + "step": 384 + }, + { + "epoch": 0.49, + "learning_rate": 5.392295478639225e-08, + "logits/chosen": -3.1709251403808594, + "logits/rejected": -3.147690773010254, + "logps/chosen": -238.37399291992188, + "logps/rejected": -1148.6685791015625, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7170120477676392, + "rewards/margins": 4.566041946411133, + "rewards/rejected": -3.849029541015625, + "step": 385 + }, + { + "epoch": 0.49, + "learning_rate": 5.3716875266694385e-08, + "logits/chosen": -3.214611053466797, + "logits/rejected": -2.9337806701660156, + "logps/chosen": -272.2792053222656, + "logps/rejected": -1333.37353515625, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47888487577438354, + "rewards/margins": 4.146847724914551, + "rewards/rejected": -3.6679625511169434, + "step": 386 + }, + { + "epoch": 0.49, + "learning_rate": 5.351073223585341e-08, + "logits/chosen": -3.2021703720092773, + "logits/rejected": -3.0993125438690186, + "logps/chosen": -278.6165771484375, + "logps/rejected": -670.2313842773438, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7105255126953125, + "rewards/margins": 2.9980592727661133, + "rewards/rejected": -2.287533760070801, + "step": 387 + }, + { + "epoch": 0.49, + "learning_rate": 5.3304529216284965e-08, + "logits/chosen": -3.257398843765259, + "logits/rejected": -3.243666887283325, + "logps/chosen": -235.33602905273438, + "logps/rejected": -633.2448120117188, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6437225341796875, + "rewards/margins": 2.908870220184326, + "rewards/rejected": -2.2651474475860596, + "step": 388 + }, + { + "epoch": 0.5, + "learning_rate": 5.309826973142973e-08, + "logits/chosen": -3.2367496490478516, + "logits/rejected": -3.142157554626465, + "logps/chosen": -262.024658203125, + "logps/rejected": -595.34326171875, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7299743890762329, + "rewards/margins": 2.856306552886963, + "rewards/rejected": -2.1263320446014404, + "step": 389 + }, + { + "epoch": 0.5, + "learning_rate": 5.28919573056932e-08, + "logits/chosen": -3.2669944763183594, + "logits/rejected": -3.0375332832336426, + "logps/chosen": -256.5841979980469, + "logps/rejected": -430.42413330078125, + "loss": 0.3504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6868728399276733, + "rewards/margins": 2.189936876296997, + "rewards/rejected": -1.5030640363693237, + "step": 390 + }, + { + "epoch": 0.5, + "learning_rate": 5.268559546438549e-08, + "logits/chosen": -3.193903923034668, + "logits/rejected": -3.156768321990967, + "logps/chosen": -257.35943603515625, + "logps/rejected": -4094.84716796875, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6293380856513977, + "rewards/margins": 6.335026741027832, + "rewards/rejected": -5.7056884765625, + "step": 391 + }, + { + "epoch": 0.5, + "learning_rate": 5.2479187733661114e-08, + "logits/chosen": -3.2003674507141113, + "logits/rejected": -3.1094279289245605, + "logps/chosen": -271.3429260253906, + "logps/rejected": -660.85888671875, + "loss": 0.368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6033614873886108, + "rewards/margins": 3.0682997703552246, + "rewards/rejected": -2.4649384021759033, + "step": 392 + }, + { + "epoch": 0.5, + "learning_rate": 5.227273764045868e-08, + "logits/chosen": -3.22098970413208, + "logits/rejected": -3.1591603755950928, + "logps/chosen": -251.87167358398438, + "logps/rejected": -807.4339599609375, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6497894525527954, + "rewards/margins": 3.499886989593506, + "rewards/rejected": -2.85009765625, + "step": 393 + }, + { + "epoch": 0.5, + "learning_rate": 5.2066248712440654e-08, + "logits/chosen": -3.2592506408691406, + "logits/rejected": -3.098179578781128, + "logps/chosen": -296.99603271484375, + "logps/rejected": -305.697998046875, + "loss": 0.398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6578583121299744, + "rewards/margins": 1.5365281105041504, + "rewards/rejected": -0.8786697387695312, + "step": 394 + }, + { + "epoch": 0.5, + "learning_rate": 5.185972447793312e-08, + "logits/chosen": -3.266895294189453, + "logits/rejected": -3.1549127101898193, + "logps/chosen": -278.557373046875, + "logps/rejected": -615.2781982421875, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7011260986328125, + "rewards/margins": 2.930926561355591, + "rewards/rejected": -2.2298004627227783, + "step": 395 + }, + { + "epoch": 0.5, + "learning_rate": 5.16531684658654e-08, + "logits/chosen": -3.202725887298584, + "logits/rejected": -3.098365306854248, + "logps/chosen": -269.12384033203125, + "logps/rejected": -527.2980346679688, + "loss": 0.3978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7062347531318665, + "rewards/margins": 2.6206421852111816, + "rewards/rejected": -1.91440749168396, + "step": 396 + }, + { + "epoch": 0.51, + "learning_rate": 5.1446584205709856e-08, + "logits/chosen": -3.226125478744507, + "logits/rejected": -3.1359670162200928, + "logps/chosen": -271.2480163574219, + "logps/rejected": -547.7611083984375, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6336692571640015, + "rewards/margins": 2.5085716247558594, + "rewards/rejected": -1.874902367591858, + "step": 397 + }, + { + "epoch": 0.51, + "learning_rate": 5.123997522742151e-08, + "logits/chosen": -3.232983112335205, + "logits/rejected": -3.1766357421875, + "logps/chosen": -267.85760498046875, + "logps/rejected": -515.4994506835938, + "loss": 0.3572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6137664914131165, + "rewards/margins": 2.454615831375122, + "rewards/rejected": -1.8408493995666504, + "step": 398 + }, + { + "epoch": 0.51, + "learning_rate": 5.103334506137772e-08, + "logits/chosen": -3.2396626472473145, + "logits/rejected": -3.089831590652466, + "logps/chosen": -291.0970458984375, + "logps/rejected": -418.21319580078125, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6219833493232727, + "rewards/margins": 2.292022705078125, + "rewards/rejected": -1.670039415359497, + "step": 399 + }, + { + "epoch": 0.51, + "learning_rate": 5.082669723831793e-08, + "logits/chosen": -3.2121148109436035, + "logits/rejected": -3.071161985397339, + "logps/chosen": -270.32879638671875, + "logps/rejected": -855.6022338867188, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8033287525177002, + "rewards/margins": 3.652825355529785, + "rewards/rejected": -2.849496603012085, + "step": 400 + }, + { + "epoch": 0.51, + "learning_rate": 5.062003528928327e-08, + "logits/chosen": -3.272751808166504, + "logits/rejected": -3.120957851409912, + "logps/chosen": -268.3267822265625, + "logps/rejected": -415.9049072265625, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7698593139648438, + "rewards/margins": 2.1111068725585938, + "rewards/rejected": -1.34124755859375, + "step": 401 + }, + { + "epoch": 0.51, + "learning_rate": 5.041336274555624e-08, + "logits/chosen": -3.235424518585205, + "logits/rejected": -3.1855249404907227, + "logps/chosen": -281.3585510253906, + "logps/rejected": -714.8544921875, + "loss": 0.4077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5944458246231079, + "rewards/margins": 3.1180527210235596, + "rewards/rejected": -2.523606777191162, + "step": 402 + }, + { + "epoch": 0.51, + "learning_rate": 5.0206683138600414e-08, + "logits/chosen": -3.1641526222229004, + "logits/rejected": -3.0834569931030273, + "logps/chosen": -297.9799499511719, + "logps/rejected": -467.9543151855469, + "loss": 0.3872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9889075756072998, + "rewards/margins": 2.5918800830841064, + "rewards/rejected": -1.6029725074768066, + "step": 403 + }, + { + "epoch": 0.51, + "learning_rate": 5e-08, + "logits/chosen": -3.19346284866333, + "logits/rejected": -3.045135259628296, + "logps/chosen": -270.6624755859375, + "logps/rejected": -196.39324951171875, + "loss": 0.4605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6862381100654602, + "rewards/margins": 1.1038376092910767, + "rewards/rejected": -0.41759949922561646, + "step": 404 + }, + { + "epoch": 0.52, + "learning_rate": 4.9793316861399595e-08, + "logits/chosen": -3.2054030895233154, + "logits/rejected": -3.1834540367126465, + "logps/chosen": -264.8404541015625, + "logps/rejected": -407.8164367675781, + "loss": 0.3751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6203773617744446, + "rewards/margins": 2.251713752746582, + "rewards/rejected": -1.6313363313674927, + "step": 405 + }, + { + "epoch": 0.52, + "learning_rate": 4.9586637254443753e-08, + "logits/chosen": -3.2647628784179688, + "logits/rejected": -3.159982204437256, + "logps/chosen": -266.88671875, + "logps/rejected": -400.50628662109375, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7301743030548096, + "rewards/margins": 2.1139824390411377, + "rewards/rejected": -1.3838082551956177, + "step": 406 + }, + { + "epoch": 0.52, + "learning_rate": 4.937996471071675e-08, + "logits/chosen": -3.219193458557129, + "logits/rejected": -3.1289570331573486, + "logps/chosen": -286.0439453125, + "logps/rejected": -852.7981567382812, + "loss": 0.3341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.66912841796875, + "rewards/margins": 3.5454773902893066, + "rewards/rejected": -2.8763489723205566, + "step": 407 + }, + { + "epoch": 0.52, + "learning_rate": 4.917330276168207e-08, + "logits/chosen": -3.2529137134552, + "logits/rejected": -3.078090190887451, + "logps/chosen": -271.8634338378906, + "logps/rejected": -409.9962463378906, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7092811465263367, + "rewards/margins": 2.0802574157714844, + "rewards/rejected": -1.3709763288497925, + "step": 408 + }, + { + "epoch": 0.52, + "learning_rate": 4.8966654938622295e-08, + "logits/chosen": -3.2948451042175293, + "logits/rejected": -3.099519968032837, + "logps/chosen": -265.73614501953125, + "logps/rejected": -494.2835388183594, + "loss": 0.3749, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.841498613357544, + "rewards/margins": 2.4667441844940186, + "rewards/rejected": -1.6252456903457642, + "step": 409 + }, + { + "epoch": 0.52, + "learning_rate": 4.8760024772578495e-08, + "logits/chosen": -3.190556049346924, + "logits/rejected": -3.0772438049316406, + "logps/chosen": -302.77386474609375, + "logps/rejected": -861.2744140625, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6746490597724915, + "rewards/margins": 3.694161891937256, + "rewards/rejected": -3.0195131301879883, + "step": 410 + }, + { + "epoch": 0.52, + "learning_rate": 4.855341579429014e-08, + "logits/chosen": -3.2459816932678223, + "logits/rejected": -3.11415958404541, + "logps/chosen": -303.3800964355469, + "logps/rejected": -1196.7296142578125, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7044403553009033, + "rewards/margins": 4.592559814453125, + "rewards/rejected": -3.8881194591522217, + "step": 411 + }, + { + "epoch": 0.53, + "learning_rate": 4.834683153413459e-08, + "logits/chosen": -3.260183811187744, + "logits/rejected": -3.1303348541259766, + "logps/chosen": -288.8937072753906, + "logps/rejected": -389.3914794921875, + "loss": 0.4332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6449249386787415, + "rewards/margins": 1.9180526733398438, + "rewards/rejected": -1.273127794265747, + "step": 412 + }, + { + "epoch": 0.53, + "learning_rate": 4.814027552206689e-08, + "logits/chosen": -3.2042031288146973, + "logits/rejected": -3.045440912246704, + "logps/chosen": -257.78424072265625, + "logps/rejected": -362.7256164550781, + "loss": 0.3341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6212974786758423, + "rewards/margins": 1.7025666236877441, + "rewards/rejected": -1.0812691450119019, + "step": 413 + }, + { + "epoch": 0.53, + "learning_rate": 4.793375128755933e-08, + "logits/chosen": -3.247021198272705, + "logits/rejected": -3.0820937156677246, + "logps/chosen": -256.81243896484375, + "logps/rejected": -996.5455322265625, + "loss": 0.3821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7801445126533508, + "rewards/margins": 3.276447296142578, + "rewards/rejected": -2.496302843093872, + "step": 414 + }, + { + "epoch": 0.53, + "learning_rate": 4.7727262359541324e-08, + "logits/chosen": -3.231053352355957, + "logits/rejected": -3.084796905517578, + "logps/chosen": -244.62307739257812, + "logps/rejected": -552.5958862304688, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7488166689872742, + "rewards/margins": 2.7352380752563477, + "rewards/rejected": -1.9864213466644287, + "step": 415 + }, + { + "epoch": 0.53, + "learning_rate": 4.7520812266338875e-08, + "logits/chosen": -3.237215518951416, + "logits/rejected": -3.0889718532562256, + "logps/chosen": -237.52401733398438, + "logps/rejected": -466.5859069824219, + "loss": 0.3346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6620849370956421, + "rewards/margins": 2.092996120452881, + "rewards/rejected": -1.4309113025665283, + "step": 416 + }, + { + "epoch": 0.53, + "learning_rate": 4.7314404535614514e-08, + "logits/chosen": -3.2038450241088867, + "logits/rejected": -3.0921292304992676, + "logps/chosen": -306.38250732421875, + "logps/rejected": -900.840087890625, + "loss": 0.3407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7103103399276733, + "rewards/margins": 3.5033981800079346, + "rewards/rejected": -2.7930877208709717, + "step": 417 + }, + { + "epoch": 0.53, + "learning_rate": 4.7108042694306806e-08, + "logits/chosen": -3.237574338912964, + "logits/rejected": -3.1662588119506836, + "logps/chosen": -284.76153564453125, + "logps/rejected": -603.6781005859375, + "loss": 0.375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6472122669219971, + "rewards/margins": 2.937222480773926, + "rewards/rejected": -2.2900099754333496, + "step": 418 + }, + { + "epoch": 0.53, + "learning_rate": 4.690173026857027e-08, + "logits/chosen": -3.220792293548584, + "logits/rejected": -3.044682741165161, + "logps/chosen": -248.48287963867188, + "logps/rejected": -1463.016357421875, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7183700799942017, + "rewards/margins": 4.566953659057617, + "rewards/rejected": -3.848583936691284, + "step": 419 + }, + { + "epoch": 0.54, + "learning_rate": 4.669547078371503e-08, + "logits/chosen": -3.1580467224121094, + "logits/rejected": -3.0713706016540527, + "logps/chosen": -250.0205078125, + "logps/rejected": -571.72021484375, + "loss": 0.3832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7873848080635071, + "rewards/margins": 2.756594181060791, + "rewards/rejected": -1.9692093133926392, + "step": 420 + }, + { + "epoch": 0.54, + "learning_rate": 4.648926776414659e-08, + "logits/chosen": -3.2513275146484375, + "logits/rejected": -3.145200729370117, + "logps/chosen": -260.5591735839844, + "logps/rejected": -658.4990844726562, + "loss": 0.3341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7738265991210938, + "rewards/margins": 3.523080348968506, + "rewards/rejected": -2.749253749847412, + "step": 421 + }, + { + "epoch": 0.54, + "learning_rate": 4.6283124733305623e-08, + "logits/chosen": -3.2937278747558594, + "logits/rejected": -3.1257691383361816, + "logps/chosen": -287.8133544921875, + "logps/rejected": -653.8632202148438, + "loss": 0.3743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4819580316543579, + "rewards/margins": 2.6612548828125, + "rewards/rejected": -2.1792969703674316, + "step": 422 + }, + { + "epoch": 0.54, + "learning_rate": 4.6077045213607754e-08, + "logits/chosen": -3.225762367248535, + "logits/rejected": -3.1520848274230957, + "logps/chosen": -233.0726318359375, + "logps/rejected": -723.4998779296875, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5466454029083252, + "rewards/margins": 2.9948129653930664, + "rewards/rejected": -2.448167324066162, + "step": 423 + }, + { + "epoch": 0.54, + "learning_rate": 4.5871032726383385e-08, + "logits/chosen": -3.253185272216797, + "logits/rejected": -3.0663228034973145, + "logps/chosen": -270.78472900390625, + "logps/rejected": -512.999755859375, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8124725818634033, + "rewards/margins": 2.327597141265869, + "rewards/rejected": -1.5151245594024658, + "step": 424 + }, + { + "epoch": 0.54, + "learning_rate": 4.566509079181751e-08, + "logits/chosen": -3.1947922706604004, + "logits/rejected": -3.103447437286377, + "logps/chosen": -264.1605224609375, + "logps/rejected": -867.6554565429688, + "loss": 0.3424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6631393432617188, + "rewards/margins": 3.429543972015381, + "rewards/rejected": -2.766404628753662, + "step": 425 + }, + { + "epoch": 0.54, + "learning_rate": 4.5459222928889584e-08, + "logits/chosen": -3.151660442352295, + "logits/rejected": -3.09566593170166, + "logps/chosen": -281.15673828125, + "logps/rejected": -570.8453369140625, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7283729314804077, + "rewards/margins": 2.745903730392456, + "rewards/rejected": -2.017530918121338, + "step": 426 + }, + { + "epoch": 0.54, + "learning_rate": 4.525343265531338e-08, + "logits/chosen": -3.2846732139587402, + "logits/rejected": -3.104860544204712, + "logps/chosen": -275.74371337890625, + "logps/rejected": -895.8060913085938, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.709155261516571, + "rewards/margins": 3.5651612281799316, + "rewards/rejected": -2.856005907058716, + "step": 427 + }, + { + "epoch": 0.55, + "learning_rate": 4.504772348747686e-08, + "logits/chosen": -3.2042946815490723, + "logits/rejected": -3.151468515396118, + "logps/chosen": -297.0256652832031, + "logps/rejected": -630.2733154296875, + "loss": 0.414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.806409478187561, + "rewards/margins": 2.9867470264434814, + "rewards/rejected": -2.18033766746521, + "step": 428 + }, + { + "epoch": 0.55, + "learning_rate": 4.484209894038215e-08, + "logits/chosen": -3.1762266159057617, + "logits/rejected": -3.126922845840454, + "logps/chosen": -246.36080932617188, + "logps/rejected": -704.9800415039062, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.59075927734375, + "rewards/margins": 3.4554123878479004, + "rewards/rejected": -2.8646531105041504, + "step": 429 + }, + { + "epoch": 0.55, + "learning_rate": 4.463656252758542e-08, + "logits/chosen": -3.2462430000305176, + "logits/rejected": -3.106200933456421, + "logps/chosen": -260.8867492675781, + "logps/rejected": -603.6046752929688, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8314880728721619, + "rewards/margins": 2.7150299549102783, + "rewards/rejected": -1.8835418224334717, + "step": 430 + }, + { + "epoch": 0.55, + "learning_rate": 4.443111776113686e-08, + "logits/chosen": -3.194406509399414, + "logits/rejected": -3.0992751121520996, + "logps/chosen": -285.0775146484375, + "logps/rejected": -1099.71142578125, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7467300295829773, + "rewards/margins": 3.777468681335449, + "rewards/rejected": -3.0307388305664062, + "step": 431 + }, + { + "epoch": 0.55, + "learning_rate": 4.4225768151520695e-08, + "logits/chosen": -3.1957156658172607, + "logits/rejected": -2.999868392944336, + "logps/chosen": -253.60519409179688, + "logps/rejected": -321.24066162109375, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7572822570800781, + "rewards/margins": 1.7122246026992798, + "rewards/rejected": -0.9549423456192017, + "step": 432 + }, + { + "epoch": 0.55, + "learning_rate": 4.402051720759518e-08, + "logits/chosen": -3.225048542022705, + "logits/rejected": -3.1194241046905518, + "logps/chosen": -256.1736145019531, + "logps/rejected": -510.2173767089844, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7332115173339844, + "rewards/margins": 2.5731682777404785, + "rewards/rejected": -1.8399567604064941, + "step": 433 + }, + { + "epoch": 0.55, + "learning_rate": 4.3815368436532614e-08, + "logits/chosen": -3.2270846366882324, + "logits/rejected": -2.917844772338867, + "logps/chosen": -261.96820068359375, + "logps/rejected": -946.9097900390625, + "loss": 0.3819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7499435544013977, + "rewards/margins": 3.2955703735351562, + "rewards/rejected": -2.5456268787384033, + "step": 434 + }, + { + "epoch": 0.55, + "learning_rate": 4.361032534375951e-08, + "logits/chosen": -3.2820935249328613, + "logits/rejected": -3.0980591773986816, + "logps/chosen": -262.088134765625, + "logps/rejected": -1633.665771484375, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5708038210868835, + "rewards/margins": 6.085992336273193, + "rewards/rejected": -5.515188694000244, + "step": 435 + }, + { + "epoch": 0.56, + "learning_rate": 4.340539143289655e-08, + "logits/chosen": -3.2117371559143066, + "logits/rejected": -3.06785249710083, + "logps/chosen": -270.47943115234375, + "logps/rejected": -479.47235107421875, + "loss": 0.3775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8820419311523438, + "rewards/margins": 2.5535812377929688, + "rewards/rejected": -1.671539306640625, + "step": 436 + }, + { + "epoch": 0.56, + "learning_rate": 4.320057020569888e-08, + "logits/chosen": -3.1879920959472656, + "logits/rejected": -3.058281898498535, + "logps/chosen": -313.2799072265625, + "logps/rejected": -1539.025390625, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6944717168807983, + "rewards/margins": 5.516853332519531, + "rewards/rejected": -4.822381496429443, + "step": 437 + }, + { + "epoch": 0.56, + "learning_rate": 4.29958651619961e-08, + "logits/chosen": -3.212714672088623, + "logits/rejected": -3.098940372467041, + "logps/chosen": -283.2081298828125, + "logps/rejected": -561.805908203125, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7631180286407471, + "rewards/margins": 2.8341445922851562, + "rewards/rejected": -2.071026563644409, + "step": 438 + }, + { + "epoch": 0.56, + "learning_rate": 4.279127979963266e-08, + "logits/chosen": -3.2376577854156494, + "logits/rejected": -3.123075485229492, + "logps/chosen": -298.7832946777344, + "logps/rejected": -811.5333251953125, + "loss": 0.3613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7564346790313721, + "rewards/margins": 3.733412265777588, + "rewards/rejected": -2.976977586746216, + "step": 439 + }, + { + "epoch": 0.56, + "learning_rate": 4.2586817614407896e-08, + "logits/chosen": -3.2173376083374023, + "logits/rejected": -3.1637823581695557, + "logps/chosen": -294.6906433105469, + "logps/rejected": -580.4907836914062, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.631456732749939, + "rewards/margins": 2.463242530822754, + "rewards/rejected": -1.8317856788635254, + "step": 440 + }, + { + "epoch": 0.56, + "learning_rate": 4.238248210001645e-08, + "logits/chosen": -3.2967004776000977, + "logits/rejected": -3.030978202819824, + "logps/chosen": -259.40386962890625, + "logps/rejected": -1285.2333984375, + "loss": 0.3448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7011665105819702, + "rewards/margins": 4.889743804931641, + "rewards/rejected": -4.188577175140381, + "step": 441 + }, + { + "epoch": 0.56, + "learning_rate": 4.217827674798844e-08, + "logits/chosen": -3.2176666259765625, + "logits/rejected": -3.130898952484131, + "logps/chosen": -256.3939514160156, + "logps/rejected": -822.9447021484375, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.691022515296936, + "rewards/margins": 3.406500816345215, + "rewards/rejected": -2.7154784202575684, + "step": 442 + }, + { + "epoch": 0.56, + "learning_rate": 4.197420504762997e-08, + "logits/chosen": -3.262028217315674, + "logits/rejected": -3.0831332206726074, + "logps/chosen": -250.53897094726562, + "logps/rejected": -639.8985595703125, + "loss": 0.3544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5934165716171265, + "rewards/margins": 3.2033944129943848, + "rewards/rejected": -2.6099777221679688, + "step": 443 + }, + { + "epoch": 0.57, + "learning_rate": 4.177027048596329e-08, + "logits/chosen": -3.2071895599365234, + "logits/rejected": -3.0790011882781982, + "logps/chosen": -304.27325439453125, + "logps/rejected": -407.51361083984375, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7512786984443665, + "rewards/margins": 2.2349839210510254, + "rewards/rejected": -1.4837051630020142, + "step": 444 + }, + { + "epoch": 0.57, + "learning_rate": 4.156647654766743e-08, + "logits/chosen": -3.2018308639526367, + "logits/rejected": -3.0356431007385254, + "logps/chosen": -249.93441772460938, + "logps/rejected": -1255.794189453125, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7910232543945312, + "rewards/margins": 4.540635108947754, + "rewards/rejected": -3.7496116161346436, + "step": 445 + }, + { + "epoch": 0.57, + "learning_rate": 4.13628267150185e-08, + "logits/chosen": -3.238731861114502, + "logits/rejected": -3.1137595176696777, + "logps/chosen": -264.45379638671875, + "logps/rejected": -547.028564453125, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7895996570587158, + "rewards/margins": 3.2415194511413574, + "rewards/rejected": -2.4519195556640625, + "step": 446 + }, + { + "epoch": 0.57, + "learning_rate": 4.1159324467830196e-08, + "logits/chosen": -3.2905993461608887, + "logits/rejected": -3.134761333465576, + "logps/chosen": -277.0553283691406, + "logps/rejected": -687.280517578125, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7109329104423523, + "rewards/margins": 3.275691270828247, + "rewards/rejected": -2.56475830078125, + "step": 447 + }, + { + "epoch": 0.57, + "learning_rate": 4.095597328339452e-08, + "logits/chosen": -3.200399875640869, + "logits/rejected": -3.173875093460083, + "logps/chosen": -272.79425048828125, + "logps/rejected": -575.504150390625, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.016291856765747, + "rewards/margins": 2.9850869178771973, + "rewards/rejected": -1.9687950611114502, + "step": 448 + }, + { + "epoch": 0.57, + "learning_rate": 4.075277663642208e-08, + "logits/chosen": -3.262465238571167, + "logits/rejected": -3.1337575912475586, + "logps/chosen": -244.30911254882812, + "logps/rejected": -601.1356201171875, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6208541989326477, + "rewards/margins": 2.6280791759490967, + "rewards/rejected": -2.0072250366210938, + "step": 449 + }, + { + "epoch": 0.57, + "learning_rate": 4.054973799898299e-08, + "logits/chosen": -3.220750093460083, + "logits/rejected": -2.9675822257995605, + "logps/chosen": -253.8392333984375, + "logps/rejected": -1203.0849609375, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5285003781318665, + "rewards/margins": 4.062222480773926, + "rewards/rejected": -3.533721923828125, + "step": 450 + }, + { + "epoch": 0.57, + "learning_rate": 4.0346860840447325e-08, + "logits/chosen": -3.1660056114196777, + "logits/rejected": -3.0158963203430176, + "logps/chosen": -286.7347412109375, + "logps/rejected": -1375.703125, + "loss": 0.3314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6960357427597046, + "rewards/margins": 4.485115051269531, + "rewards/rejected": -3.789079189300537, + "step": 451 + }, + { + "epoch": 0.58, + "learning_rate": 4.014414862742599e-08, + "logits/chosen": -3.1942756175994873, + "logits/rejected": -3.1355481147766113, + "logps/chosen": -288.4247131347656, + "logps/rejected": -767.6219482421875, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7492950558662415, + "rewards/margins": 3.830880880355835, + "rewards/rejected": -3.0815858840942383, + "step": 452 + }, + { + "epoch": 0.58, + "learning_rate": 3.994160482371138e-08, + "logits/chosen": -3.1908321380615234, + "logits/rejected": -3.116558313369751, + "logps/chosen": -287.98773193359375, + "logps/rejected": -436.7784729003906, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6932601928710938, + "rewards/margins": 2.502986192703247, + "rewards/rejected": -1.8097259998321533, + "step": 453 + }, + { + "epoch": 0.58, + "learning_rate": 3.973923289021829e-08, + "logits/chosen": -3.217844009399414, + "logits/rejected": -3.0702407360076904, + "logps/chosen": -270.8744812011719, + "logps/rejected": -1440.763916015625, + "loss": 0.3202, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7444419860839844, + "rewards/margins": 5.135763168334961, + "rewards/rejected": -4.391320705413818, + "step": 454 + }, + { + "epoch": 0.58, + "learning_rate": 3.953703628492467e-08, + "logits/chosen": -3.2023582458496094, + "logits/rejected": -3.0243639945983887, + "logps/chosen": -244.8698272705078, + "logps/rejected": -634.566162109375, + "loss": 0.3788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7098579406738281, + "rewards/margins": 2.9888954162597656, + "rewards/rejected": -2.2790374755859375, + "step": 455 + }, + { + "epoch": 0.58, + "learning_rate": 3.933501846281266e-08, + "logits/chosen": -3.256348133087158, + "logits/rejected": -3.0844082832336426, + "logps/chosen": -256.80902099609375, + "logps/rejected": -334.99493408203125, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9002899527549744, + "rewards/margins": 2.0732498168945312, + "rewards/rejected": -1.1729599237442017, + "step": 456 + }, + { + "epoch": 0.58, + "learning_rate": 3.913318287580942e-08, + "logits/chosen": -3.1528282165527344, + "logits/rejected": -3.0535130500793457, + "logps/chosen": -269.269287109375, + "logps/rejected": -4545.48876953125, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.640545666217804, + "rewards/margins": 7.391668796539307, + "rewards/rejected": -6.751123428344727, + "step": 457 + }, + { + "epoch": 0.58, + "learning_rate": 3.893153297272828e-08, + "logits/chosen": -3.22245192527771, + "logits/rejected": -3.0255014896392822, + "logps/chosen": -291.67889404296875, + "logps/rejected": -950.5803833007812, + "loss": 0.3534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7662750482559204, + "rewards/margins": 3.592176914215088, + "rewards/rejected": -2.825901985168457, + "step": 458 + }, + { + "epoch": 0.59, + "learning_rate": 3.87300721992097e-08, + "logits/chosen": -3.248379707336426, + "logits/rejected": -3.1543474197387695, + "logps/chosen": -257.5096740722656, + "logps/rejected": -596.19384765625, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6612319946289062, + "rewards/margins": 2.685725688934326, + "rewards/rejected": -2.024493455886841, + "step": 459 + }, + { + "epoch": 0.59, + "learning_rate": 3.8528803997662425e-08, + "logits/chosen": -3.162740707397461, + "logits/rejected": -3.10451340675354, + "logps/chosen": -274.7113342285156, + "logps/rejected": -410.90069580078125, + "loss": 0.3668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5792472958564758, + "rewards/margins": 2.1629478931427, + "rewards/rejected": -1.5837006568908691, + "step": 460 + }, + { + "epoch": 0.59, + "learning_rate": 3.8327731807204744e-08, + "logits/chosen": -3.237062454223633, + "logits/rejected": -3.119654893875122, + "logps/chosen": -296.1859130859375, + "logps/rejected": -568.8157958984375, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7161239385604858, + "rewards/margins": 2.8507399559020996, + "rewards/rejected": -2.1346161365509033, + "step": 461 + }, + { + "epoch": 0.59, + "learning_rate": 3.812685906360557e-08, + "logits/chosen": -3.3023810386657715, + "logits/rejected": -3.2150511741638184, + "logps/chosen": -274.69879150390625, + "logps/rejected": -1106.1761474609375, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7989685535430908, + "rewards/margins": 5.494024753570557, + "rewards/rejected": -4.695055961608887, + "step": 462 + }, + { + "epoch": 0.59, + "learning_rate": 3.792618919922591e-08, + "logits/chosen": -3.2298502922058105, + "logits/rejected": -3.1052489280700684, + "logps/chosen": -279.3238220214844, + "logps/rejected": -729.8321533203125, + "loss": 0.3483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8115097284317017, + "rewards/margins": 4.286451816558838, + "rewards/rejected": -3.4749419689178467, + "step": 463 + }, + { + "epoch": 0.59, + "learning_rate": 3.7725725642960044e-08, + "logits/chosen": -3.190415143966675, + "logits/rejected": -3.0740017890930176, + "logps/chosen": -313.71331787109375, + "logps/rejected": -807.7523193359375, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8451553583145142, + "rewards/margins": 3.3208954334259033, + "rewards/rejected": -2.4757401943206787, + "step": 464 + }, + { + "epoch": 0.59, + "learning_rate": 3.752547182017708e-08, + "logits/chosen": -3.2075164318084717, + "logits/rejected": -3.125697374343872, + "logps/chosen": -275.1867370605469, + "logps/rejected": -1577.8466796875, + "loss": 0.3169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9897063970565796, + "rewards/margins": 5.621176242828369, + "rewards/rejected": -4.6314697265625, + "step": 465 + }, + { + "epoch": 0.59, + "learning_rate": 3.7325431152662297e-08, + "logits/chosen": -3.2827696800231934, + "logits/rejected": -3.1438956260681152, + "logps/chosen": -248.64447021484375, + "logps/rejected": -472.8500671386719, + "loss": 0.3417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6899780035018921, + "rewards/margins": 2.3661041259765625, + "rewards/rejected": -1.6761261224746704, + "step": 466 + }, + { + "epoch": 0.6, + "learning_rate": 3.7125607058558804e-08, + "logits/chosen": -3.224782943725586, + "logits/rejected": -3.0824363231658936, + "logps/chosen": -261.8575439453125, + "logps/rejected": -671.4842529296875, + "loss": 0.3276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7145729064941406, + "rewards/margins": 3.210987091064453, + "rewards/rejected": -2.4964141845703125, + "step": 467 + }, + { + "epoch": 0.6, + "learning_rate": 3.692600295230901e-08, + "logits/chosen": -3.28701114654541, + "logits/rejected": -3.2086753845214844, + "logps/chosen": -262.57989501953125, + "logps/rejected": -1080.6092529296875, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8113380670547485, + "rewards/margins": 4.42339563369751, + "rewards/rejected": -3.612057685852051, + "step": 468 + }, + { + "epoch": 0.6, + "learning_rate": 3.6726622244596394e-08, + "logits/chosen": -3.2494122982025146, + "logits/rejected": -3.188048839569092, + "logps/chosen": -294.75311279296875, + "logps/rejected": -698.8826904296875, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7637954950332642, + "rewards/margins": 3.5681686401367188, + "rewards/rejected": -2.804373264312744, + "step": 469 + }, + { + "epoch": 0.6, + "learning_rate": 3.6527468342287096e-08, + "logits/chosen": -3.239769697189331, + "logits/rejected": -3.134171724319458, + "logps/chosen": -274.1748046875, + "logps/rejected": -518.4232177734375, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8571258783340454, + "rewards/margins": 2.902047634124756, + "rewards/rejected": -2.044921875, + "step": 470 + }, + { + "epoch": 0.6, + "learning_rate": 3.632854464837188e-08, + "logits/chosen": -3.2138497829437256, + "logits/rejected": -3.186882972717285, + "logps/chosen": -296.0618591308594, + "logps/rejected": -760.6025390625, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8647918701171875, + "rewards/margins": 3.81931471824646, + "rewards/rejected": -2.9545228481292725, + "step": 471 + }, + { + "epoch": 0.6, + "learning_rate": 3.612985456190778e-08, + "logits/chosen": -3.2462005615234375, + "logits/rejected": -3.038898468017578, + "logps/chosen": -306.57122802734375, + "logps/rejected": -871.08251953125, + "loss": 0.3675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7027908563613892, + "rewards/margins": 3.4158430099487305, + "rewards/rejected": -2.713052272796631, + "step": 472 + }, + { + "epoch": 0.6, + "learning_rate": 3.5931401477960176e-08, + "logits/chosen": -3.279299736022949, + "logits/rejected": -3.06704044342041, + "logps/chosen": -237.44427490234375, + "logps/rejected": -368.42120361328125, + "loss": 0.3407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7325027585029602, + "rewards/margins": 1.8345856666564941, + "rewards/rejected": -1.1020828485488892, + "step": 473 + }, + { + "epoch": 0.6, + "learning_rate": 3.5733188787544745e-08, + "logits/chosen": -3.2575974464416504, + "logits/rejected": -3.125190258026123, + "logps/chosen": -283.5243225097656, + "logps/rejected": -1142.085693359375, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8307464718818665, + "rewards/margins": 4.5931243896484375, + "rewards/rejected": -3.762377977371216, + "step": 474 + }, + { + "epoch": 0.61, + "learning_rate": 3.553521987756945e-08, + "logits/chosen": -3.207159996032715, + "logits/rejected": -3.055850028991699, + "logps/chosen": -248.35189819335938, + "logps/rejected": -582.5394897460938, + "loss": 0.3772, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7321716547012329, + "rewards/margins": 2.437269687652588, + "rewards/rejected": -1.7050979137420654, + "step": 475 + }, + { + "epoch": 0.61, + "learning_rate": 3.5337498130776766e-08, + "logits/chosen": -3.174166202545166, + "logits/rejected": -3.1251349449157715, + "logps/chosen": -267.4666748046875, + "logps/rejected": -310.28900146484375, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0081459283828735, + "rewards/margins": 1.975653886795044, + "rewards/rejected": -0.9675079584121704, + "step": 476 + }, + { + "epoch": 0.61, + "learning_rate": 3.5140026925685804e-08, + "logits/chosen": -3.2043304443359375, + "logits/rejected": -3.1000399589538574, + "logps/chosen": -289.518310546875, + "logps/rejected": -555.5352172851562, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7766876220703125, + "rewards/margins": 3.2060272693634033, + "rewards/rejected": -2.429339647293091, + "step": 477 + }, + { + "epoch": 0.61, + "learning_rate": 3.494280963653463e-08, + "logits/chosen": -3.265476703643799, + "logits/rejected": -3.1485447883605957, + "logps/chosen": -232.38360595703125, + "logps/rejected": -490.606201171875, + "loss": 0.351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7450370788574219, + "rewards/margins": 2.6910927295684814, + "rewards/rejected": -1.9460556507110596, + "step": 478 + }, + { + "epoch": 0.61, + "learning_rate": 3.474584963322256e-08, + "logits/chosen": -3.267702102661133, + "logits/rejected": -3.1006391048431396, + "logps/chosen": -260.8533630371094, + "logps/rejected": -245.72360229492188, + "loss": 0.3826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8773964047431946, + "rewards/margins": 1.6634422540664673, + "rewards/rejected": -0.7860458493232727, + "step": 479 + }, + { + "epoch": 0.61, + "learning_rate": 3.4549150281252633e-08, + "logits/chosen": -3.2444987297058105, + "logits/rejected": -3.154402017593384, + "logps/chosen": -295.2914733886719, + "logps/rejected": -489.8719482421875, + "loss": 0.3522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6926208734512329, + "rewards/margins": 2.279946804046631, + "rewards/rejected": -1.5873260498046875, + "step": 480 + }, + { + "epoch": 0.61, + "learning_rate": 3.435271494167404e-08, + "logits/chosen": -3.194520950317383, + "logits/rejected": -3.0401835441589355, + "logps/chosen": -275.7926025390625, + "logps/rejected": -865.6224975585938, + "loss": 0.3373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5752716064453125, + "rewards/margins": 3.4095916748046875, + "rewards/rejected": -2.834320068359375, + "step": 481 + }, + { + "epoch": 0.61, + "learning_rate": 3.415654697102478e-08, + "logits/chosen": -3.200443744659424, + "logits/rejected": -3.0772781372070312, + "logps/chosen": -291.543212890625, + "logps/rejected": -798.7470092773438, + "loss": 0.3438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7563049793243408, + "rewards/margins": 3.5396180152893066, + "rewards/rejected": -2.783313035964966, + "step": 482 + }, + { + "epoch": 0.62, + "learning_rate": 3.396064972127421e-08, + "logits/chosen": -3.2226643562316895, + "logits/rejected": -3.077092170715332, + "logps/chosen": -249.03880310058594, + "logps/rejected": -842.3153076171875, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8276451230049133, + "rewards/margins": 4.273329734802246, + "rewards/rejected": -3.4456849098205566, + "step": 483 + }, + { + "epoch": 0.62, + "learning_rate": 3.376502653976583e-08, + "logits/chosen": -3.3033783435821533, + "logits/rejected": -3.1620521545410156, + "logps/chosen": -248.54515075683594, + "logps/rejected": -457.3966064453125, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.850292980670929, + "rewards/margins": 2.3508729934692383, + "rewards/rejected": -1.500579833984375, + "step": 484 + }, + { + "epoch": 0.62, + "learning_rate": 3.356968076916006e-08, + "logits/chosen": -3.2271409034729004, + "logits/rejected": -3.1354751586914062, + "logps/chosen": -260.30157470703125, + "logps/rejected": -398.0699157714844, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6594513058662415, + "rewards/margins": 1.9784469604492188, + "rewards/rejected": -1.318995714187622, + "step": 485 + }, + { + "epoch": 0.62, + "learning_rate": 3.337461574737716e-08, + "logits/chosen": -3.0992648601531982, + "logits/rejected": -3.040761947631836, + "logps/chosen": -279.94012451171875, + "logps/rejected": -460.386962890625, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7294899225234985, + "rewards/margins": 2.0969841480255127, + "rewards/rejected": -1.3674942255020142, + "step": 486 + }, + { + "epoch": 0.62, + "learning_rate": 3.317983480754015e-08, + "logits/chosen": -3.1670608520507812, + "logits/rejected": -3.0640010833740234, + "logps/chosen": -276.773193359375, + "logps/rejected": -788.0111083984375, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6691627502441406, + "rewards/margins": 3.592566728591919, + "rewards/rejected": -2.9234039783477783, + "step": 487 + }, + { + "epoch": 0.62, + "learning_rate": 3.298534127791784e-08, + "logits/chosen": -3.229215621948242, + "logits/rejected": -3.183879852294922, + "logps/chosen": -289.4977722167969, + "logps/rejected": -612.8384399414062, + "loss": 0.344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7828354239463806, + "rewards/margins": 2.9046311378479004, + "rewards/rejected": -2.121795654296875, + "step": 488 + }, + { + "epoch": 0.62, + "learning_rate": 3.279113848186808e-08, + "logits/chosen": -3.2616829872131348, + "logits/rejected": -3.1825454235076904, + "logps/chosen": -269.2352294921875, + "logps/rejected": -1043.022216796875, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.946881115436554, + "rewards/margins": 4.980612277984619, + "rewards/rejected": -4.033730983734131, + "step": 489 + }, + { + "epoch": 0.62, + "learning_rate": 3.259722973778077e-08, + "logits/chosen": -3.2055540084838867, + "logits/rejected": -2.931516170501709, + "logps/chosen": -300.71710205078125, + "logps/rejected": -1510.306884765625, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8505660891532898, + "rewards/margins": 5.467710494995117, + "rewards/rejected": -4.617144584655762, + "step": 490 + }, + { + "epoch": 0.63, + "learning_rate": 3.24036183590214e-08, + "logits/chosen": -3.250499725341797, + "logits/rejected": -3.1638875007629395, + "logps/chosen": -252.1806640625, + "logps/rejected": -456.29595947265625, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8973366022109985, + "rewards/margins": 2.5213751792907715, + "rewards/rejected": -1.6240386962890625, + "step": 491 + }, + { + "epoch": 0.63, + "learning_rate": 3.221030765387417e-08, + "logits/chosen": -3.267425060272217, + "logits/rejected": -3.0729968547821045, + "logps/chosen": -272.65802001953125, + "logps/rejected": -714.3707885742188, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7564239501953125, + "rewards/margins": 4.0118560791015625, + "rewards/rejected": -3.25543212890625, + "step": 492 + }, + { + "epoch": 0.63, + "learning_rate": 3.201730092548573e-08, + "logits/chosen": -3.2279443740844727, + "logits/rejected": -3.1305999755859375, + "logps/chosen": -257.40997314453125, + "logps/rejected": -744.6495361328125, + "loss": 0.3434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8255035877227783, + "rewards/margins": 3.7099945545196533, + "rewards/rejected": -2.884490966796875, + "step": 493 + }, + { + "epoch": 0.63, + "learning_rate": 3.18246014718085e-08, + "logits/chosen": -3.2469334602355957, + "logits/rejected": -3.1008386611938477, + "logps/chosen": -243.52781677246094, + "logps/rejected": -413.0036926269531, + "loss": 0.3394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.718505859375, + "rewards/margins": 2.145765781402588, + "rewards/rejected": -1.427259922027588, + "step": 494 + }, + { + "epoch": 0.63, + "learning_rate": 3.16322125855445e-08, + "logits/chosen": -3.2830655574798584, + "logits/rejected": -3.0997061729431152, + "logps/chosen": -253.1057891845703, + "logps/rejected": -466.40057373046875, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7962547540664673, + "rewards/margins": 2.3641960620880127, + "rewards/rejected": -1.5679413080215454, + "step": 495 + }, + { + "epoch": 0.63, + "learning_rate": 3.1440137554088955e-08, + "logits/chosen": -3.1753182411193848, + "logits/rejected": -3.0896730422973633, + "logps/chosen": -313.00250244140625, + "logps/rejected": -455.2221984863281, + "loss": 0.3372, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8439132571220398, + "rewards/margins": 2.1578750610351562, + "rewards/rejected": -1.3139618635177612, + "step": 496 + }, + { + "epoch": 0.63, + "learning_rate": 3.1248379659474225e-08, + "logits/chosen": -3.2216484546661377, + "logits/rejected": -3.0465822219848633, + "logps/chosen": -247.40216064453125, + "logps/rejected": -981.2264404296875, + "loss": 0.3212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8490959405899048, + "rewards/margins": 4.398131370544434, + "rewards/rejected": -3.5490355491638184, + "step": 497 + }, + { + "epoch": 0.63, + "learning_rate": 3.1056942178313604e-08, + "logits/chosen": -3.1844589710235596, + "logits/rejected": -3.044269561767578, + "logps/chosen": -287.8991394042969, + "logps/rejected": -1160.2342529296875, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8123718500137329, + "rewards/margins": 4.614738464355469, + "rewards/rejected": -3.8023667335510254, + "step": 498 + }, + { + "epoch": 0.64, + "learning_rate": 3.086582838174551e-08, + "logits/chosen": -3.2012252807617188, + "logits/rejected": -3.0592498779296875, + "logps/chosen": -296.3619384765625, + "logps/rejected": -346.0898132324219, + "loss": 0.339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8498016595840454, + "rewards/margins": 2.178593635559082, + "rewards/rejected": -1.328791856765747, + "step": 499 + }, + { + "epoch": 0.64, + "learning_rate": 3.0675041535377396e-08, + "logits/chosen": -3.2096686363220215, + "logits/rejected": -3.0638818740844727, + "logps/chosen": -257.333251953125, + "logps/rejected": -873.3787841796875, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8456581234931946, + "rewards/margins": 3.701253652572632, + "rewards/rejected": -2.855595588684082, + "step": 500 + }, + { + "epoch": 0.64, + "learning_rate": 3.048458489923005e-08, + "logits/chosen": -3.1706371307373047, + "logits/rejected": -3.029289484024048, + "logps/chosen": -291.7265930175781, + "logps/rejected": -706.798828125, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6848862171173096, + "rewards/margins": 2.6795029640197754, + "rewards/rejected": -1.9946167469024658, + "step": 501 + }, + { + "epoch": 0.64, + "learning_rate": 3.029446172768193e-08, + "logits/chosen": -3.2661898136138916, + "logits/rejected": -3.122596502304077, + "logps/chosen": -289.329345703125, + "logps/rejected": -626.8970336914062, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7962433099746704, + "rewards/margins": 2.959942579269409, + "rewards/rejected": -2.1636993885040283, + "step": 502 + }, + { + "epoch": 0.64, + "learning_rate": 3.0104675269413436e-08, + "logits/chosen": -3.284250497817993, + "logits/rejected": -3.1087212562561035, + "logps/chosen": -259.484130859375, + "logps/rejected": -562.6728515625, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7226676940917969, + "rewards/margins": 3.0032525062561035, + "rewards/rejected": -2.2805848121643066, + "step": 503 + }, + { + "epoch": 0.64, + "learning_rate": 2.991522876735154e-08, + "logits/chosen": -3.282092332839966, + "logits/rejected": -3.1270055770874023, + "logps/chosen": -285.57391357421875, + "logps/rejected": -519.0621948242188, + "loss": 0.3767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6895920038223267, + "rewards/margins": 2.532522678375244, + "rewards/rejected": -1.8429306745529175, + "step": 504 + }, + { + "epoch": 0.64, + "learning_rate": 2.9726125458614215e-08, + "logits/chosen": -3.1571173667907715, + "logits/rejected": -3.144773006439209, + "logps/chosen": -340.250732421875, + "logps/rejected": -804.8697509765625, + "loss": 0.3629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7886428833007812, + "rewards/margins": 4.160179138183594, + "rewards/rejected": -3.3715362548828125, + "step": 505 + }, + { + "epoch": 0.64, + "learning_rate": 2.9537368574455303e-08, + "logits/chosen": -3.200361490249634, + "logits/rejected": -3.1416163444519043, + "logps/chosen": -266.86431884765625, + "logps/rejected": -415.9558410644531, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.776441216468811, + "rewards/margins": 1.8457863330841064, + "rewards/rejected": -1.0693451166152954, + "step": 506 + }, + { + "epoch": 0.65, + "learning_rate": 2.9348961340209117e-08, + "logits/chosen": -3.2498087882995605, + "logits/rejected": -3.1324074268341064, + "logps/chosen": -268.2132568359375, + "logps/rejected": -835.5245971679688, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6560776233673096, + "rewards/margins": 3.5208053588867188, + "rewards/rejected": -2.8647279739379883, + "step": 507 + }, + { + "epoch": 0.65, + "learning_rate": 2.916090697523549e-08, + "logits/chosen": -3.1927647590637207, + "logits/rejected": -2.923441171646118, + "logps/chosen": -293.7490539550781, + "logps/rejected": -1649.60693359375, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6809318661689758, + "rewards/margins": 5.661260604858398, + "rewards/rejected": -4.980328559875488, + "step": 508 + }, + { + "epoch": 0.65, + "learning_rate": 2.897320869286462e-08, + "logits/chosen": -3.2116494178771973, + "logits/rejected": -3.061539649963379, + "logps/chosen": -265.17315673828125, + "logps/rejected": -392.7770690917969, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7900558710098267, + "rewards/margins": 1.9126038551330566, + "rewards/rejected": -1.1225478649139404, + "step": 509 + }, + { + "epoch": 0.65, + "learning_rate": 2.8785869700342317e-08, + "logits/chosen": -3.2594780921936035, + "logits/rejected": -3.1436190605163574, + "logps/chosen": -254.48577880859375, + "logps/rejected": -505.0855407714844, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7725013494491577, + "rewards/margins": 2.616222381591797, + "rewards/rejected": -1.8437211513519287, + "step": 510 + }, + { + "epoch": 0.65, + "learning_rate": 2.8598893198775044e-08, + "logits/chosen": -3.260098695755005, + "logits/rejected": -3.0652217864990234, + "logps/chosen": -269.98248291015625, + "logps/rejected": -1697.991455078125, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9945420026779175, + "rewards/margins": 6.666765213012695, + "rewards/rejected": -5.672223091125488, + "step": 511 + }, + { + "epoch": 0.65, + "learning_rate": 2.841228238307536e-08, + "logits/chosen": -3.22619891166687, + "logits/rejected": -3.028517723083496, + "logps/chosen": -255.2684783935547, + "logps/rejected": -347.3194580078125, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7465088367462158, + "rewards/margins": 1.8274078369140625, + "rewards/rejected": -1.0808990001678467, + "step": 512 + }, + { + "epoch": 0.65, + "learning_rate": 2.8226040441907207e-08, + "logits/chosen": -3.284773349761963, + "logits/rejected": -3.1044492721557617, + "logps/chosen": -251.33314514160156, + "logps/rejected": -1136.5645751953125, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8923729062080383, + "rewards/margins": 4.182623863220215, + "rewards/rejected": -3.2902512550354004, + "step": 513 + }, + { + "epoch": 0.66, + "learning_rate": 2.8040170557631488e-08, + "logits/chosen": -3.205354928970337, + "logits/rejected": -3.071455955505371, + "logps/chosen": -277.3111267089844, + "logps/rejected": -612.5389404296875, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6704696416854858, + "rewards/margins": 3.0538177490234375, + "rewards/rejected": -2.383347988128662, + "step": 514 + }, + { + "epoch": 0.66, + "learning_rate": 2.7854675906251723e-08, + "logits/chosen": -3.243133544921875, + "logits/rejected": -3.1228184700012207, + "logps/chosen": -306.99658203125, + "logps/rejected": -1548.204345703125, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7322174310684204, + "rewards/margins": 6.1508026123046875, + "rewards/rejected": -5.418585300445557, + "step": 515 + }, + { + "epoch": 0.66, + "learning_rate": 2.7669559657359676e-08, + "logits/chosen": -3.232929229736328, + "logits/rejected": -3.1358025074005127, + "logps/chosen": -277.9944763183594, + "logps/rejected": -743.4637451171875, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6300727725028992, + "rewards/margins": 3.426499366760254, + "rewards/rejected": -2.79642653465271, + "step": 516 + }, + { + "epoch": 0.66, + "learning_rate": 2.7484824974081323e-08, + "logits/chosen": -3.237060546875, + "logits/rejected": -3.0703210830688477, + "logps/chosen": -276.933837890625, + "logps/rejected": -900.265869140625, + "loss": 0.3843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7478073239326477, + "rewards/margins": 3.701364040374756, + "rewards/rejected": -2.953556776046753, + "step": 517 + }, + { + "epoch": 0.66, + "learning_rate": 2.730047501302266e-08, + "logits/chosen": -3.2819442749023438, + "logits/rejected": -3.1865782737731934, + "logps/chosen": -245.09686279296875, + "logps/rejected": -831.725830078125, + "loss": 0.27, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8092750310897827, + "rewards/margins": 4.180191993713379, + "rewards/rejected": -3.3709168434143066, + "step": 518 + }, + { + "epoch": 0.66, + "learning_rate": 2.711651292421593e-08, + "logits/chosen": -3.243709087371826, + "logits/rejected": -3.1272714138031006, + "logps/chosen": -283.77880859375, + "logps/rejected": -803.2974853515625, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7700408697128296, + "rewards/margins": 3.8824188709259033, + "rewards/rejected": -3.1123781204223633, + "step": 519 + }, + { + "epoch": 0.66, + "learning_rate": 2.6932941851065616e-08, + "logits/chosen": -3.211003065109253, + "logits/rejected": -3.034639835357666, + "logps/chosen": -288.78753662109375, + "logps/rejected": -580.77978515625, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6921890377998352, + "rewards/margins": 2.537184238433838, + "rewards/rejected": -1.8449952602386475, + "step": 520 + }, + { + "epoch": 0.66, + "learning_rate": 2.6749764930294905e-08, + "logits/chosen": -3.254178524017334, + "logits/rejected": -3.0093841552734375, + "logps/chosen": -308.0101318359375, + "logps/rejected": -1147.224853515625, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5641037225723267, + "rewards/margins": 4.1731276512146, + "rewards/rejected": -3.6090240478515625, + "step": 521 + }, + { + "epoch": 0.67, + "learning_rate": 2.656698529189193e-08, + "logits/chosen": -3.2701053619384766, + "logits/rejected": -3.116480827331543, + "logps/chosen": -240.92303466796875, + "logps/rejected": -432.70465087890625, + "loss": 0.3299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0231246948242188, + "rewards/margins": 2.6888976097106934, + "rewards/rejected": -1.6657730340957642, + "step": 522 + }, + { + "epoch": 0.67, + "learning_rate": 2.638460605905646e-08, + "logits/chosen": -3.259185791015625, + "logits/rejected": -3.125178337097168, + "logps/chosen": -268.7012023925781, + "logps/rejected": -725.4331665039062, + "loss": 0.3424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7501808404922485, + "rewards/margins": 3.2113561630249023, + "rewards/rejected": -2.4611754417419434, + "step": 523 + }, + { + "epoch": 0.67, + "learning_rate": 2.620263034814632e-08, + "logits/chosen": -3.231407880783081, + "logits/rejected": -3.1769704818725586, + "logps/chosen": -261.00274658203125, + "logps/rejected": -620.9232788085938, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.785656750202179, + "rewards/margins": 3.282357692718506, + "rewards/rejected": -2.496701240539551, + "step": 524 + }, + { + "epoch": 0.67, + "learning_rate": 2.6021061268624378e-08, + "logits/chosen": -3.3199143409729004, + "logits/rejected": -3.200247049331665, + "logps/chosen": -266.4005126953125, + "logps/rejected": -660.0227661132812, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8903152942657471, + "rewards/margins": 3.7053420543670654, + "rewards/rejected": -2.8150267601013184, + "step": 525 + }, + { + "epoch": 0.67, + "learning_rate": 2.5839901923005202e-08, + "logits/chosen": -3.2210211753845215, + "logits/rejected": -3.136049270629883, + "logps/chosen": -280.83380126953125, + "logps/rejected": -676.7186279296875, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7081558704376221, + "rewards/margins": 3.3637893199920654, + "rewards/rejected": -2.6556334495544434, + "step": 526 + }, + { + "epoch": 0.67, + "learning_rate": 2.5659155406802192e-08, + "logits/chosen": -3.2269108295440674, + "logits/rejected": -3.1119041442871094, + "logps/chosen": -254.64706420898438, + "logps/rejected": -791.0535888671875, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8280891180038452, + "rewards/margins": 3.7028114795684814, + "rewards/rejected": -2.874722480773926, + "step": 527 + }, + { + "epoch": 0.67, + "learning_rate": 2.5478824808474607e-08, + "logits/chosen": -3.2212681770324707, + "logits/rejected": -2.95735502243042, + "logps/chosen": -250.558349609375, + "logps/rejected": -588.414794921875, + "loss": 0.3277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8706893920898438, + "rewards/margins": 2.766771078109741, + "rewards/rejected": -1.896081566810608, + "step": 528 + }, + { + "epoch": 0.67, + "learning_rate": 2.5298913209374804e-08, + "logits/chosen": -3.216362953186035, + "logits/rejected": -3.104799509048462, + "logps/chosen": -291.16510009765625, + "logps/rejected": -369.62408447265625, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7290573120117188, + "rewards/margins": 1.9648346900939941, + "rewards/rejected": -1.2357773780822754, + "step": 529 + }, + { + "epoch": 0.68, + "learning_rate": 2.5119423683695657e-08, + "logits/chosen": -3.2180850505828857, + "logits/rejected": -3.0878849029541016, + "logps/chosen": -241.8767852783203, + "logps/rejected": -349.07977294921875, + "loss": 0.3417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7009941339492798, + "rewards/margins": 2.1412835121154785, + "rewards/rejected": -1.4402892589569092, + "step": 530 + }, + { + "epoch": 0.68, + "learning_rate": 2.494035929841789e-08, + "logits/chosen": -3.1730904579162598, + "logits/rejected": -3.0138604640960693, + "logps/chosen": -282.81976318359375, + "logps/rejected": -359.59149169921875, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7607322931289673, + "rewards/margins": 1.761634111404419, + "rewards/rejected": -1.0009018182754517, + "step": 531 + }, + { + "epoch": 0.68, + "learning_rate": 2.4761723113257826e-08, + "logits/chosen": -3.2102138996124268, + "logits/rejected": -3.135263442993164, + "logps/chosen": -280.7384338378906, + "logps/rejected": -654.184326171875, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8563896417617798, + "rewards/margins": 3.4530997276306152, + "rewards/rejected": -2.596710205078125, + "step": 532 + }, + { + "epoch": 0.68, + "learning_rate": 2.458351818061497e-08, + "logits/chosen": -3.2461233139038086, + "logits/rejected": -3.1350111961364746, + "logps/chosen": -285.25634765625, + "logps/rejected": -950.964111328125, + "loss": 0.3396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8655045032501221, + "rewards/margins": 3.9639816284179688, + "rewards/rejected": -3.098477363586426, + "step": 533 + }, + { + "epoch": 0.68, + "learning_rate": 2.4405747545519962e-08, + "logits/chosen": -3.2358059883117676, + "logits/rejected": -3.110546588897705, + "logps/chosen": -277.2227783203125, + "logps/rejected": -742.1814575195312, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7114288806915283, + "rewards/margins": 3.7411561012268066, + "rewards/rejected": -3.0297272205352783, + "step": 534 + }, + { + "epoch": 0.68, + "learning_rate": 2.422841424558244e-08, + "logits/chosen": -3.222923755645752, + "logits/rejected": -3.134859561920166, + "logps/chosen": -256.1596984863281, + "logps/rejected": -754.4998779296875, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7973670959472656, + "rewards/margins": 3.925626277923584, + "rewards/rejected": -3.1282591819763184, + "step": 535 + }, + { + "epoch": 0.68, + "learning_rate": 2.4051521310939256e-08, + "logits/chosen": -3.241771697998047, + "logits/rejected": -3.211617946624756, + "logps/chosen": -233.52093505859375, + "logps/rejected": -512.2822265625, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.715766191482544, + "rewards/margins": 3.0254340171813965, + "rewards/rejected": -2.3096680641174316, + "step": 536 + }, + { + "epoch": 0.68, + "learning_rate": 2.3875071764202558e-08, + "logits/chosen": -3.2804274559020996, + "logits/rejected": -3.1656951904296875, + "logps/chosen": -246.5098876953125, + "logps/rejected": -1061.2021484375, + "loss": 0.3238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6952117681503296, + "rewards/margins": 4.988329887390137, + "rewards/rejected": -4.293118476867676, + "step": 537 + }, + { + "epoch": 0.69, + "learning_rate": 2.3699068620408304e-08, + "logits/chosen": -3.202989101409912, + "logits/rejected": -3.0889713764190674, + "logps/chosen": -257.684326171875, + "logps/rejected": -307.6672058105469, + "loss": 0.3584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7886108160018921, + "rewards/margins": 1.756038784980774, + "rewards/rejected": -0.9674278497695923, + "step": 538 + }, + { + "epoch": 0.69, + "learning_rate": 2.352351488696457e-08, + "logits/chosen": -3.227226972579956, + "logits/rejected": -3.129992723464966, + "logps/chosen": -266.2195129394531, + "logps/rejected": -635.551025390625, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.68621826171875, + "rewards/margins": 3.1690611839294434, + "rewards/rejected": -2.4828429222106934, + "step": 539 + }, + { + "epoch": 0.69, + "learning_rate": 2.3348413563600322e-08, + "logits/chosen": -3.2839789390563965, + "logits/rejected": -3.212800979614258, + "logps/chosen": -261.53369140625, + "logps/rejected": -626.46826171875, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.596265435218811, + "rewards/margins": 3.207556962966919, + "rewards/rejected": -2.6112916469573975, + "step": 540 + }, + { + "epoch": 0.69, + "learning_rate": 2.317376764231403e-08, + "logits/chosen": -3.284614086151123, + "logits/rejected": -3.1552085876464844, + "logps/chosen": -262.8857421875, + "logps/rejected": -482.6531677246094, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8574379086494446, + "rewards/margins": 2.540015459060669, + "rewards/rejected": -1.6825776100158691, + "step": 541 + }, + { + "epoch": 0.69, + "learning_rate": 2.2999580107322654e-08, + "logits/chosen": -3.2353124618530273, + "logits/rejected": -3.034973621368408, + "logps/chosen": -251.29417419433594, + "logps/rejected": -445.44232177734375, + "loss": 0.3446, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8257835507392883, + "rewards/margins": 2.4416825771331787, + "rewards/rejected": -1.6158989667892456, + "step": 542 + }, + { + "epoch": 0.69, + "learning_rate": 2.2825853935010535e-08, + "logits/chosen": -3.2750420570373535, + "logits/rejected": -3.0518383979797363, + "logps/chosen": -241.6350555419922, + "logps/rejected": -476.3302001953125, + "loss": 0.346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8880584836006165, + "rewards/margins": 2.4360198974609375, + "rewards/rejected": -1.5479614734649658, + "step": 543 + }, + { + "epoch": 0.69, + "learning_rate": 2.2652592093878663e-08, + "logits/chosen": -3.2640767097473145, + "logits/rejected": -3.157710552215576, + "logps/chosen": -252.5020294189453, + "logps/rejected": -591.795654296875, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8369400501251221, + "rewards/margins": 3.3360307216644287, + "rewards/rejected": -2.4990906715393066, + "step": 544 + }, + { + "epoch": 0.69, + "learning_rate": 2.2479797544493827e-08, + "logits/chosen": -3.192866802215576, + "logits/rejected": -3.1116418838500977, + "logps/chosen": -279.76019287109375, + "logps/rejected": -754.4430541992188, + "loss": 0.339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7363144159317017, + "rewards/margins": 3.5791168212890625, + "rewards/rejected": -2.8428025245666504, + "step": 545 + }, + { + "epoch": 0.7, + "learning_rate": 2.2307473239438153e-08, + "logits/chosen": -3.2561635971069336, + "logits/rejected": -3.0595803260803223, + "logps/chosen": -264.88531494140625, + "logps/rejected": -583.7433471679688, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.863250732421875, + "rewards/margins": 3.018193244934082, + "rewards/rejected": -2.154942512512207, + "step": 546 + }, + { + "epoch": 0.7, + "learning_rate": 2.2135622123258513e-08, + "logits/chosen": -3.256286144256592, + "logits/rejected": -3.1843183040618896, + "logps/chosen": -282.18719482421875, + "logps/rejected": -4316.24365234375, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8060486316680908, + "rewards/margins": 4.675967216491699, + "rewards/rejected": -3.8699188232421875, + "step": 547 + }, + { + "epoch": 0.7, + "learning_rate": 2.196424713241637e-08, + "logits/chosen": -3.1739516258239746, + "logits/rejected": -3.055060625076294, + "logps/chosen": -256.6613464355469, + "logps/rejected": -452.2792053222656, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9305694699287415, + "rewards/margins": 2.6505632400512695, + "rewards/rejected": -1.7199935913085938, + "step": 548 + }, + { + "epoch": 0.7, + "learning_rate": 2.1793351195237446e-08, + "logits/chosen": -3.250770092010498, + "logits/rejected": -3.074728488922119, + "logps/chosen": -273.2873229980469, + "logps/rejected": -1108.5347900390625, + "loss": 0.3423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6904770135879517, + "rewards/margins": 5.026118278503418, + "rewards/rejected": -4.335641860961914, + "step": 549 + }, + { + "epoch": 0.7, + "learning_rate": 2.162293723186182e-08, + "logits/chosen": -3.2359490394592285, + "logits/rejected": -3.174556016921997, + "logps/chosen": -268.327880859375, + "logps/rejected": -768.9863891601562, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8610702753067017, + "rewards/margins": 4.060896396636963, + "rewards/rejected": -3.199826240539551, + "step": 550 + }, + { + "epoch": 0.7, + "learning_rate": 2.1453008154193904e-08, + "logits/chosen": -3.231884479522705, + "logits/rejected": -3.175266742706299, + "logps/chosen": -263.5126953125, + "logps/rejected": -1434.0140380859375, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6874923706054688, + "rewards/margins": 5.842198371887207, + "rewards/rejected": -5.154706001281738, + "step": 551 + }, + { + "epoch": 0.7, + "learning_rate": 2.128356686585282e-08, + "logits/chosen": -3.2089362144470215, + "logits/rejected": -3.1318202018737793, + "logps/chosen": -247.08738708496094, + "logps/rejected": -326.57659912109375, + "loss": 0.4003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7649879455566406, + "rewards/margins": 2.283602237701416, + "rewards/rejected": -1.5186141729354858, + "step": 552 + }, + { + "epoch": 0.7, + "learning_rate": 2.1114616262122648e-08, + "logits/chosen": -3.226365089416504, + "logits/rejected": -3.1156368255615234, + "logps/chosen": -256.169677734375, + "logps/rejected": -581.084228515625, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8134483098983765, + "rewards/margins": 3.2864890098571777, + "rewards/rejected": -2.473040819168091, + "step": 553 + }, + { + "epoch": 0.71, + "learning_rate": 2.0946159229903088e-08, + "logits/chosen": -3.2844085693359375, + "logits/rejected": -3.0901215076446533, + "logps/chosen": -272.3114929199219, + "logps/rejected": -1306.08984375, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0379623174667358, + "rewards/margins": 5.806361198425293, + "rewards/rejected": -4.768399238586426, + "step": 554 + }, + { + "epoch": 0.71, + "learning_rate": 2.077819864766e-08, + "logits/chosen": -3.2365732192993164, + "logits/rejected": -3.1484107971191406, + "logps/chosen": -300.99090576171875, + "logps/rejected": -500.6796875, + "loss": 0.3671, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4185028076171875, + "rewards/margins": 2.444009304046631, + "rewards/rejected": -2.0255067348480225, + "step": 555 + }, + { + "epoch": 0.71, + "learning_rate": 2.0610737385376347e-08, + "logits/chosen": -3.239521026611328, + "logits/rejected": -3.0534563064575195, + "logps/chosen": -319.57867431640625, + "logps/rejected": -547.0084228515625, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7032821774482727, + "rewards/margins": 2.589573860168457, + "rewards/rejected": -1.88629150390625, + "step": 556 + }, + { + "epoch": 0.71, + "learning_rate": 2.0443778304503024e-08, + "logits/chosen": -3.262899875640869, + "logits/rejected": -3.1323235034942627, + "logps/chosen": -260.12847900390625, + "logps/rejected": -549.6962890625, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8047699332237244, + "rewards/margins": 2.8558425903320312, + "rewards/rejected": -2.051072835922241, + "step": 557 + }, + { + "epoch": 0.71, + "learning_rate": 2.0277324257910105e-08, + "logits/chosen": -3.2341151237487793, + "logits/rejected": -3.10391902923584, + "logps/chosen": -293.564453125, + "logps/rejected": -653.33251953125, + "loss": 0.3417, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5919937491416931, + "rewards/margins": 3.3384780883789062, + "rewards/rejected": -2.7464842796325684, + "step": 558 + }, + { + "epoch": 0.71, + "learning_rate": 2.0111378089837954e-08, + "logits/chosen": -3.2408924102783203, + "logits/rejected": -3.1311264038085938, + "logps/chosen": -246.45106506347656, + "logps/rejected": -932.4437866210938, + "loss": 0.3145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6670722961425781, + "rewards/margins": 3.7794137001037598, + "rewards/rejected": -3.1123414039611816, + "step": 559 + }, + { + "epoch": 0.71, + "learning_rate": 1.9945942635848744e-08, + "logits/chosen": -3.2542991638183594, + "logits/rejected": -3.070842742919922, + "logps/chosen": -273.74053955078125, + "logps/rejected": -380.3423156738281, + "loss": 0.3518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7355339527130127, + "rewards/margins": 2.2156548500061035, + "rewards/rejected": -1.4801208972930908, + "step": 560 + }, + { + "epoch": 0.72, + "learning_rate": 1.978102072277791e-08, + "logits/chosen": -3.243678569793701, + "logits/rejected": -3.162158727645874, + "logps/chosen": -268.815185546875, + "logps/rejected": -650.6029663085938, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6376816034317017, + "rewards/margins": 3.8293137550354004, + "rewards/rejected": -3.1916322708129883, + "step": 561 + }, + { + "epoch": 0.72, + "learning_rate": 1.961661516868594e-08, + "logits/chosen": -3.2407901287078857, + "logits/rejected": -3.1372008323669434, + "logps/chosen": -274.2633056640625, + "logps/rejected": -682.1362915039062, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.672736406326294, + "rewards/margins": 3.5003442764282227, + "rewards/rejected": -2.8276078701019287, + "step": 562 + }, + { + "epoch": 0.72, + "learning_rate": 1.9452728782810107e-08, + "logits/chosen": -3.2568745613098145, + "logits/rejected": -3.1358208656311035, + "logps/chosen": -287.94061279296875, + "logps/rejected": -845.9992065429688, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6672897338867188, + "rewards/margins": 4.241227626800537, + "rewards/rejected": -3.5739378929138184, + "step": 563 + }, + { + "epoch": 0.72, + "learning_rate": 1.928936436551661e-08, + "logits/chosen": -3.2237720489501953, + "logits/rejected": -3.19596529006958, + "logps/chosen": -291.1484680175781, + "logps/rejected": -842.9820556640625, + "loss": 0.3092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7337249517440796, + "rewards/margins": 4.3112688064575195, + "rewards/rejected": -3.5775437355041504, + "step": 564 + }, + { + "epoch": 0.72, + "learning_rate": 1.9126524708252555e-08, + "logits/chosen": -3.2352211475372314, + "logits/rejected": -3.066652297973633, + "logps/chosen": -224.43003845214844, + "logps/rejected": -380.8223876953125, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8931503295898438, + "rewards/margins": 2.184948682785034, + "rewards/rejected": -1.2917983531951904, + "step": 565 + }, + { + "epoch": 0.72, + "learning_rate": 1.8964212593498442e-08, + "logits/chosen": -3.215919017791748, + "logits/rejected": -3.106771469116211, + "logps/chosen": -260.96405029296875, + "logps/rejected": -1358.46875, + "loss": 0.2993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8293197751045227, + "rewards/margins": 5.584852695465088, + "rewards/rejected": -4.755533218383789, + "step": 566 + }, + { + "epoch": 0.72, + "learning_rate": 1.8802430794720454e-08, + "logits/chosen": -3.2680017948150635, + "logits/rejected": -3.0507636070251465, + "logps/chosen": -251.55679321289062, + "logps/rejected": -1121.06201171875, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.824327826499939, + "rewards/margins": 5.145266056060791, + "rewards/rejected": -4.3209381103515625, + "step": 567 + }, + { + "epoch": 0.72, + "learning_rate": 1.8641182076323148e-08, + "logits/chosen": -3.1715824604034424, + "logits/rejected": -3.202866554260254, + "logps/chosen": -321.75152587890625, + "logps/rejected": -797.9825439453125, + "loss": 0.3169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5541549921035767, + "rewards/margins": 4.137248516082764, + "rewards/rejected": -3.5830931663513184, + "step": 568 + }, + { + "epoch": 0.73, + "learning_rate": 1.848046919360225e-08, + "logits/chosen": -3.2495555877685547, + "logits/rejected": -3.11578369140625, + "logps/chosen": -271.3127746582031, + "logps/rejected": -560.5536499023438, + "loss": 0.3052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7684272527694702, + "rewards/margins": 3.2391443252563477, + "rewards/rejected": -2.470716953277588, + "step": 569 + }, + { + "epoch": 0.73, + "learning_rate": 1.8320294892697475e-08, + "logits/chosen": -3.225247621536255, + "logits/rejected": -3.104687452316284, + "logps/chosen": -226.76559448242188, + "logps/rejected": -401.80731201171875, + "loss": 0.3314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7220146656036377, + "rewards/margins": 1.9961357116699219, + "rewards/rejected": -1.2741210460662842, + "step": 570 + }, + { + "epoch": 0.73, + "learning_rate": 1.8160661910545715e-08, + "logits/chosen": -3.1751976013183594, + "logits/rejected": -3.1597752571105957, + "logps/chosen": -275.3876953125, + "logps/rejected": -796.18994140625, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8764327764511108, + "rewards/margins": 4.392259120941162, + "rewards/rejected": -3.5158262252807617, + "step": 571 + }, + { + "epoch": 0.73, + "learning_rate": 1.8001572974834166e-08, + "logits/chosen": -3.2187938690185547, + "logits/rejected": -3.093998908996582, + "logps/chosen": -256.5648193359375, + "logps/rejected": -3737.867919921875, + "loss": 0.283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7471191883087158, + "rewards/margins": 6.345770835876465, + "rewards/rejected": -5.59865140914917, + "step": 572 + }, + { + "epoch": 0.73, + "learning_rate": 1.7843030803953834e-08, + "logits/chosen": -3.213759422302246, + "logits/rejected": -3.1596827507019043, + "logps/chosen": -246.65911865234375, + "logps/rejected": -792.6885986328125, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7636443972587585, + "rewards/margins": 4.126757621765137, + "rewards/rejected": -3.3631134033203125, + "step": 573 + }, + { + "epoch": 0.73, + "learning_rate": 1.768503810695295e-08, + "logits/chosen": -3.2023587226867676, + "logits/rejected": -3.0770998001098633, + "logps/chosen": -270.86572265625, + "logps/rejected": -1285.24365234375, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9260948300361633, + "rewards/margins": 5.769783973693848, + "rewards/rejected": -4.84368896484375, + "step": 574 + }, + { + "epoch": 0.73, + "learning_rate": 1.7527597583490822e-08, + "logits/chosen": -3.219900608062744, + "logits/rejected": -3.1315793991088867, + "logps/chosen": -297.710693359375, + "logps/rejected": -694.7577514648438, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6747680902481079, + "rewards/margins": 3.091146945953369, + "rewards/rejected": -2.416378974914551, + "step": 575 + }, + { + "epoch": 0.73, + "learning_rate": 1.7370711923791564e-08, + "logits/chosen": -3.180540084838867, + "logits/rejected": -3.201202392578125, + "logps/chosen": -263.25860595703125, + "logps/rejected": -886.6546020507812, + "loss": 0.2985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6137603521347046, + "rewards/margins": 4.799792289733887, + "rewards/rejected": -4.186032295227051, + "step": 576 + }, + { + "epoch": 0.74, + "learning_rate": 1.7214383808598282e-08, + "logits/chosen": -3.2566757202148438, + "logits/rejected": -3.072033643722534, + "logps/chosen": -264.0920715332031, + "logps/rejected": -583.1455078125, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5542236566543579, + "rewards/margins": 2.919516086578369, + "rewards/rejected": -2.3652923107147217, + "step": 577 + }, + { + "epoch": 0.74, + "learning_rate": 1.70586159091271e-08, + "logits/chosen": -3.2843995094299316, + "logits/rejected": -3.107170820236206, + "logps/chosen": -288.90374755859375, + "logps/rejected": -726.0707397460938, + "loss": 0.3527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7735260128974915, + "rewards/margins": 4.147351264953613, + "rewards/rejected": -3.3738250732421875, + "step": 578 + }, + { + "epoch": 0.74, + "learning_rate": 1.6903410887021675e-08, + "logits/chosen": -3.2015161514282227, + "logits/rejected": -3.1310484409332275, + "logps/chosen": -247.7593994140625, + "logps/rejected": -641.6472778320312, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4243263006210327, + "rewards/margins": 3.4004859924316406, + "rewards/rejected": -2.9761595726013184, + "step": 579 + }, + { + "epoch": 0.74, + "learning_rate": 1.674877139430758e-08, + "logits/chosen": -3.2095062732696533, + "logits/rejected": -3.0416154861450195, + "logps/chosen": -231.75184631347656, + "logps/rejected": -819.450439453125, + "loss": 0.2682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8558929562568665, + "rewards/margins": 3.733224391937256, + "rewards/rejected": -2.877331495285034, + "step": 580 + }, + { + "epoch": 0.74, + "learning_rate": 1.6594700073347112e-08, + "logits/chosen": -3.212902307510376, + "logits/rejected": -3.1676406860351562, + "logps/chosen": -251.88653564453125, + "logps/rejected": -690.9893188476562, + "loss": 0.3522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6753021478652954, + "rewards/margins": 3.9314608573913574, + "rewards/rejected": -3.2561585903167725, + "step": 581 + }, + { + "epoch": 0.74, + "learning_rate": 1.6441199556794033e-08, + "logits/chosen": -3.281851053237915, + "logits/rejected": -3.104097366333008, + "logps/chosen": -272.9608154296875, + "logps/rejected": -974.7318115234375, + "loss": 0.3577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8353775143623352, + "rewards/margins": 4.374662399291992, + "rewards/rejected": -3.539285182952881, + "step": 582 + }, + { + "epoch": 0.74, + "learning_rate": 1.6288272467548632e-08, + "logits/chosen": -3.254124164581299, + "logits/rejected": -3.173774242401123, + "logps/chosen": -242.17605590820312, + "logps/rejected": -691.4262084960938, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6061874628067017, + "rewards/margins": 3.922886610031128, + "rewards/rejected": -3.316699266433716, + "step": 583 + }, + { + "epoch": 0.74, + "learning_rate": 1.6135921418712954e-08, + "logits/chosen": -3.2204155921936035, + "logits/rejected": -3.1220388412475586, + "logps/chosen": -313.1514892578125, + "logps/rejected": -622.0316772460938, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7595703601837158, + "rewards/margins": 3.1077651977539062, + "rewards/rejected": -2.3481948375701904, + "step": 584 + }, + { + "epoch": 0.75, + "learning_rate": 1.5984149013546046e-08, + "logits/chosen": -3.196629047393799, + "logits/rejected": -3.056529998779297, + "logps/chosen": -237.9798583984375, + "logps/rejected": -537.5368041992188, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8136871457099915, + "rewards/margins": 2.6264710426330566, + "rewards/rejected": -1.81278395652771, + "step": 585 + }, + { + "epoch": 0.75, + "learning_rate": 1.583295784541958e-08, + "logits/chosen": -3.209592342376709, + "logits/rejected": -3.214223861694336, + "logps/chosen": -259.4452209472656, + "logps/rejected": -974.7815551757812, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8515586853027344, + "rewards/margins": 5.329689979553223, + "rewards/rejected": -4.478131294250488, + "step": 586 + }, + { + "epoch": 0.75, + "learning_rate": 1.568235049777345e-08, + "logits/chosen": -3.2113938331604004, + "logits/rejected": -3.1722593307495117, + "logps/chosen": -276.16790771484375, + "logps/rejected": -690.9630126953125, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6482262015342712, + "rewards/margins": 3.0896096229553223, + "rewards/rejected": -2.4413833618164062, + "step": 587 + }, + { + "epoch": 0.75, + "learning_rate": 1.553232954407171e-08, + "logits/chosen": -3.2766971588134766, + "logits/rejected": -3.182961940765381, + "logps/chosen": -265.0888977050781, + "logps/rejected": -848.570068359375, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6032425165176392, + "rewards/margins": 3.9435958862304688, + "rewards/rejected": -3.340353488922119, + "step": 588 + }, + { + "epoch": 0.75, + "learning_rate": 1.5382897547758512e-08, + "logits/chosen": -3.2357239723205566, + "logits/rejected": -3.08235239982605, + "logps/chosen": -300.037353515625, + "logps/rejected": -348.5211181640625, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.765881359577179, + "rewards/margins": 1.6757423877716064, + "rewards/rejected": -0.9098610281944275, + "step": 589 + }, + { + "epoch": 0.75, + "learning_rate": 1.52340570622144e-08, + "logits/chosen": -3.2613449096679688, + "logits/rejected": -3.0757498741149902, + "logps/chosen": -262.97979736328125, + "logps/rejected": -650.2523803710938, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8023742437362671, + "rewards/margins": 3.185391426086426, + "rewards/rejected": -2.383017063140869, + "step": 590 + }, + { + "epoch": 0.75, + "learning_rate": 1.508581063071258e-08, + "logits/chosen": -3.269656181335449, + "logits/rejected": -3.1170473098754883, + "logps/chosen": -307.4373779296875, + "logps/rejected": -531.6432495117188, + "loss": 0.3802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9370819330215454, + "rewards/margins": 3.006863594055176, + "rewards/rejected": -2.069781541824341, + "step": 591 + }, + { + "epoch": 0.75, + "learning_rate": 1.493816078637557e-08, + "logits/chosen": -3.2598469257354736, + "logits/rejected": -3.1211001873016357, + "logps/chosen": -259.82281494140625, + "logps/rejected": -859.2696533203125, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6058136224746704, + "rewards/margins": 4.092623710632324, + "rewards/rejected": -3.4868102073669434, + "step": 592 + }, + { + "epoch": 0.76, + "learning_rate": 1.47911100521318e-08, + "logits/chosen": -3.2074780464172363, + "logits/rejected": -3.029238224029541, + "logps/chosen": -260.6468505859375, + "logps/rejected": -508.2646789550781, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8573929071426392, + "rewards/margins": 2.5127792358398438, + "rewards/rejected": -1.6553864479064941, + "step": 593 + }, + { + "epoch": 0.76, + "learning_rate": 1.4644660940672625e-08, + "logits/chosen": -3.2536020278930664, + "logits/rejected": -3.132767915725708, + "logps/chosen": -264.77325439453125, + "logps/rejected": -624.4751586914062, + "loss": 0.3448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8391464352607727, + "rewards/margins": 3.575000047683716, + "rewards/rejected": -2.735853672027588, + "step": 594 + }, + { + "epoch": 0.76, + "learning_rate": 1.4498815954409278e-08, + "logits/chosen": -3.1840949058532715, + "logits/rejected": -3.1417832374572754, + "logps/chosen": -254.8790283203125, + "logps/rejected": -444.6746826171875, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5428146123886108, + "rewards/margins": 2.410038948059082, + "rewards/rejected": -1.8672242164611816, + "step": 595 + }, + { + "epoch": 0.76, + "learning_rate": 1.4353577585430148e-08, + "logits/chosen": -3.3116440773010254, + "logits/rejected": -3.1355459690093994, + "logps/chosen": -226.31564331054688, + "logps/rejected": -326.3486022949219, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7346245050430298, + "rewards/margins": 1.9178825616836548, + "rewards/rejected": -1.183258056640625, + "step": 596 + }, + { + "epoch": 0.76, + "learning_rate": 1.4208948315458275e-08, + "logits/chosen": -3.255610704421997, + "logits/rejected": -2.96073579788208, + "logps/chosen": -270.276611328125, + "logps/rejected": -1559.378173828125, + "loss": 0.3499, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9235389828681946, + "rewards/margins": 5.816305160522461, + "rewards/rejected": -4.892765998840332, + "step": 597 + }, + { + "epoch": 0.76, + "learning_rate": 1.4064930615808806e-08, + "logits/chosen": -3.2189974784851074, + "logits/rejected": -3.1255788803100586, + "logps/chosen": -246.2424774169922, + "logps/rejected": -976.677001953125, + "loss": 0.327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7222961187362671, + "rewards/margins": 4.649927139282227, + "rewards/rejected": -3.92763090133667, + "step": 598 + }, + { + "epoch": 0.76, + "learning_rate": 1.39215269473469e-08, + "logits/chosen": -3.244307041168213, + "logits/rejected": -3.1175436973571777, + "logps/chosen": -286.91278076171875, + "logps/rejected": -439.421630859375, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8845535516738892, + "rewards/margins": 2.334582567214966, + "rewards/rejected": -1.4500290155410767, + "step": 599 + }, + { + "epoch": 0.76, + "learning_rate": 1.3778739760445552e-08, + "logits/chosen": -3.1729612350463867, + "logits/rejected": -3.093705177307129, + "logps/chosen": -238.8199462890625, + "logps/rejected": -404.5430603027344, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9564521908760071, + "rewards/margins": 2.3683066368103027, + "rewards/rejected": -1.4118545055389404, + "step": 600 + }, + { + "epoch": 0.77, + "learning_rate": 1.3636571494943861e-08, + "logits/chosen": -3.239375352859497, + "logits/rejected": -3.108626127243042, + "logps/chosen": -244.0706329345703, + "logps/rejected": -718.9979248046875, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8864517211914062, + "rewards/margins": 3.942857503890991, + "rewards/rejected": -3.056405782699585, + "step": 601 + }, + { + "epoch": 0.77, + "learning_rate": 1.349502458010519e-08, + "logits/chosen": -3.262157678604126, + "logits/rejected": -3.16949725151062, + "logps/chosen": -242.5258331298828, + "logps/rejected": -1052.770263671875, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6070709228515625, + "rewards/margins": 5.153282165527344, + "rewards/rejected": -4.546211242675781, + "step": 602 + }, + { + "epoch": 0.77, + "learning_rate": 1.3354101434575805e-08, + "logits/chosen": -3.16715145111084, + "logits/rejected": -3.1702661514282227, + "logps/chosen": -269.1809387207031, + "logps/rejected": -627.1336669921875, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8443955183029175, + "rewards/margins": 3.5696229934692383, + "rewards/rejected": -2.7252273559570312, + "step": 603 + }, + { + "epoch": 0.77, + "learning_rate": 1.321380446634342e-08, + "logits/chosen": -3.2240030765533447, + "logits/rejected": -3.086742877960205, + "logps/chosen": -296.9527282714844, + "logps/rejected": -532.0386962890625, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7529968023300171, + "rewards/margins": 3.0920486450195312, + "rewards/rejected": -2.3390517234802246, + "step": 604 + }, + { + "epoch": 0.77, + "learning_rate": 1.3074136072696147e-08, + "logits/chosen": -3.2049050331115723, + "logits/rejected": -3.121495008468628, + "logps/chosen": -287.7156982421875, + "logps/rejected": -782.4032592773438, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9978119134902954, + "rewards/margins": 3.9007294178009033, + "rewards/rejected": -2.9029173851013184, + "step": 605 + }, + { + "epoch": 0.77, + "learning_rate": 1.2935098640181458e-08, + "logits/chosen": -3.296781301498413, + "logits/rejected": -3.1518893241882324, + "logps/chosen": -265.271484375, + "logps/rejected": -310.5017395019531, + "loss": 0.4158, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7511230707168579, + "rewards/margins": 1.7684905529022217, + "rewards/rejected": -1.0173676013946533, + "step": 606 + }, + { + "epoch": 0.77, + "learning_rate": 1.2796694544565478e-08, + "logits/chosen": -3.231159210205078, + "logits/rejected": -3.1284525394439697, + "logps/chosen": -289.57952880859375, + "logps/rejected": -861.7388916015625, + "loss": 0.3307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9803787469863892, + "rewards/margins": 4.5113725662231445, + "rewards/rejected": -3.530993938446045, + "step": 607 + }, + { + "epoch": 0.78, + "learning_rate": 1.2658926150792321e-08, + "logits/chosen": -3.1902260780334473, + "logits/rejected": -2.9929423332214355, + "logps/chosen": -267.03802490234375, + "logps/rejected": -1192.5030517578125, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9387588500976562, + "rewards/margins": 4.886149883270264, + "rewards/rejected": -3.9473910331726074, + "step": 608 + }, + { + "epoch": 0.78, + "learning_rate": 1.2521795812943703e-08, + "logits/chosen": -3.2729697227478027, + "logits/rejected": -3.162052631378174, + "logps/chosen": -284.13909912109375, + "logps/rejected": -643.8899536132812, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7565383911132812, + "rewards/margins": 2.675480842590332, + "rewards/rejected": -1.9189423322677612, + "step": 609 + }, + { + "epoch": 0.78, + "learning_rate": 1.2385305874198776e-08, + "logits/chosen": -3.2392334938049316, + "logits/rejected": -3.0918405055999756, + "logps/chosen": -297.8041687011719, + "logps/rejected": -1079.1787109375, + "loss": 0.3238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8178161382675171, + "rewards/margins": 4.450842380523682, + "rewards/rejected": -3.633026123046875, + "step": 610 + }, + { + "epoch": 0.78, + "learning_rate": 1.2249458666793966e-08, + "logits/chosen": -3.200162410736084, + "logits/rejected": -3.0358099937438965, + "logps/chosen": -280.21942138671875, + "logps/rejected": -1623.296875, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9172439575195312, + "rewards/margins": 7.196467876434326, + "rewards/rejected": -6.279223918914795, + "step": 611 + }, + { + "epoch": 0.78, + "learning_rate": 1.2114256511983274e-08, + "logits/chosen": -3.2220985889434814, + "logits/rejected": -3.039022445678711, + "logps/chosen": -261.5396423339844, + "logps/rejected": -321.00494384765625, + "loss": 0.358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8703445196151733, + "rewards/margins": 1.7916351556777954, + "rewards/rejected": -0.9212906360626221, + "step": 612 + }, + { + "epoch": 0.78, + "learning_rate": 1.1979701719998452e-08, + "logits/chosen": -3.2122907638549805, + "logits/rejected": -3.0807504653930664, + "logps/chosen": -282.5753173828125, + "logps/rejected": -493.34033203125, + "loss": 0.339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7439498901367188, + "rewards/margins": 2.802981376647949, + "rewards/rejected": -2.0590317249298096, + "step": 613 + }, + { + "epoch": 0.78, + "learning_rate": 1.1845796590009682e-08, + "logits/chosen": -3.134213447570801, + "logits/rejected": -3.0512943267822266, + "logps/chosen": -319.9018249511719, + "logps/rejected": -722.2220458984375, + "loss": 0.3872, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7176147699356079, + "rewards/margins": 3.3767120838165283, + "rewards/rejected": -2.659097194671631, + "step": 614 + }, + { + "epoch": 0.78, + "learning_rate": 1.1712543410086145e-08, + "logits/chosen": -3.264127731323242, + "logits/rejected": -3.0889129638671875, + "logps/chosen": -273.6406555175781, + "logps/rejected": -460.95587158203125, + "loss": 0.3414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.830718994140625, + "rewards/margins": 2.4604110717773438, + "rewards/rejected": -1.6296920776367188, + "step": 615 + }, + { + "epoch": 0.79, + "learning_rate": 1.157994445715706e-08, + "logits/chosen": -3.2200517654418945, + "logits/rejected": -3.0461206436157227, + "logps/chosen": -234.89010620117188, + "logps/rejected": -340.8524475097656, + "loss": 0.3625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.762555718421936, + "rewards/margins": 2.055816650390625, + "rewards/rejected": -1.2932610511779785, + "step": 616 + }, + { + "epoch": 0.79, + "learning_rate": 1.1448001996972645e-08, + "logits/chosen": -3.2228617668151855, + "logits/rejected": -3.0593819618225098, + "logps/chosen": -277.28985595703125, + "logps/rejected": -1326.9501953125, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.907885730266571, + "rewards/margins": 5.62481689453125, + "rewards/rejected": -4.716931343078613, + "step": 617 + }, + { + "epoch": 0.79, + "learning_rate": 1.1316718284065534e-08, + "logits/chosen": -3.23372220993042, + "logits/rejected": -3.0840184688568115, + "logps/chosen": -267.68267822265625, + "logps/rejected": -1322.51904296875, + "loss": 0.3269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8411957025527954, + "rewards/margins": 5.295495986938477, + "rewards/rejected": -4.4542999267578125, + "step": 618 + }, + { + "epoch": 0.79, + "learning_rate": 1.1186095561712128e-08, + "logits/chosen": -3.2667455673217773, + "logits/rejected": -3.1722030639648438, + "logps/chosen": -287.40716552734375, + "logps/rejected": -397.81927490234375, + "loss": 0.373, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9496619701385498, + "rewards/margins": 2.7843117713928223, + "rewards/rejected": -1.834649682044983, + "step": 619 + }, + { + "epoch": 0.79, + "learning_rate": 1.1056136061894383e-08, + "logits/chosen": -3.219339370727539, + "logits/rejected": -3.128828525543213, + "logps/chosen": -285.2663269042969, + "logps/rejected": -813.4326782226562, + "loss": 0.3254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6965850591659546, + "rewards/margins": 4.379861354827881, + "rewards/rejected": -3.6832761764526367, + "step": 620 + }, + { + "epoch": 0.79, + "learning_rate": 1.0926842005261549e-08, + "logits/chosen": -3.321390151977539, + "logits/rejected": -3.230783462524414, + "logps/chosen": -265.8953857421875, + "logps/rejected": -589.9327392578125, + "loss": 0.3482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.883923351764679, + "rewards/margins": 3.813711643218994, + "rewards/rejected": -2.929788112640381, + "step": 621 + }, + { + "epoch": 0.79, + "learning_rate": 1.0798215601092353e-08, + "logits/chosen": -3.245065212249756, + "logits/rejected": -3.2142539024353027, + "logps/chosen": -274.82958984375, + "logps/rejected": -750.7361450195312, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8068588376045227, + "rewards/margins": 3.627363681793213, + "rewards/rejected": -2.820504903793335, + "step": 622 + }, + { + "epoch": 0.79, + "learning_rate": 1.067025904725713e-08, + "logits/chosen": -3.267082691192627, + "logits/rejected": -3.1954727172851562, + "logps/chosen": -250.8492889404297, + "logps/rejected": -1094.5828857421875, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7159652709960938, + "rewards/margins": 4.980250358581543, + "rewards/rejected": -4.264285087585449, + "step": 623 + }, + { + "epoch": 0.8, + "learning_rate": 1.0542974530180326e-08, + "logits/chosen": -3.226515769958496, + "logits/rejected": -3.1095757484436035, + "logps/chosen": -295.47283935546875, + "logps/rejected": -537.826171875, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7907288074493408, + "rewards/margins": 2.8970580101013184, + "rewards/rejected": -2.1063294410705566, + "step": 624 + }, + { + "epoch": 0.8, + "learning_rate": 1.0416364224803182e-08, + "logits/chosen": -3.2347209453582764, + "logits/rejected": -3.0763349533081055, + "logps/chosen": -247.81588745117188, + "logps/rejected": -394.8608093261719, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8380180597305298, + "rewards/margins": 2.140908718109131, + "rewards/rejected": -1.3028907775878906, + "step": 625 + }, + { + "epoch": 0.8, + "learning_rate": 1.0290430294546448e-08, + "logits/chosen": -3.19057035446167, + "logits/rejected": -3.156205177307129, + "logps/chosen": -268.1761474609375, + "logps/rejected": -688.771484375, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8818244934082031, + "rewards/margins": 3.8507609367370605, + "rewards/rejected": -2.9689364433288574, + "step": 626 + }, + { + "epoch": 0.8, + "learning_rate": 1.016517489127357e-08, + "logits/chosen": -3.150466203689575, + "logits/rejected": -3.193265676498413, + "logps/chosen": -284.2330322265625, + "logps/rejected": -778.9207153320312, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9542198181152344, + "rewards/margins": 3.9362969398498535, + "rewards/rejected": -2.982077121734619, + "step": 627 + }, + { + "epoch": 0.8, + "learning_rate": 1.0040600155253764e-08, + "logits/chosen": -3.2606420516967773, + "logits/rejected": -3.105515480041504, + "logps/chosen": -315.841064453125, + "logps/rejected": -905.60107421875, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9799103140830994, + "rewards/margins": 4.161535739898682, + "rewards/rejected": -3.1816253662109375, + "step": 628 + }, + { + "epoch": 0.8, + "learning_rate": 9.916708215125585e-09, + "logits/chosen": -3.290992259979248, + "logits/rejected": -3.180893898010254, + "logps/chosen": -265.87713623046875, + "logps/rejected": -574.6396484375, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5150467157363892, + "rewards/margins": 2.876455783843994, + "rewards/rejected": -2.3614089488983154, + "step": 629 + }, + { + "epoch": 0.8, + "learning_rate": 9.793501187860431e-09, + "logits/chosen": -3.24247670173645, + "logits/rejected": -3.1031603813171387, + "logps/chosen": -259.26220703125, + "logps/rejected": -344.2506103515625, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8399360775947571, + "rewards/margins": 1.9401390552520752, + "rewards/rejected": -1.1002029180526733, + "step": 630 + }, + { + "epoch": 0.8, + "learning_rate": 9.670981178726485e-09, + "logits/chosen": -3.2606563568115234, + "logits/rejected": -3.1002097129821777, + "logps/chosen": -331.642822265625, + "logps/rejected": -502.83172607421875, + "loss": 0.3777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8525130748748779, + "rewards/margins": 2.450408935546875, + "rewards/rejected": -1.597895860671997, + "step": 631 + }, + { + "epoch": 0.81, + "learning_rate": 9.549150281252633e-09, + "logits/chosen": -3.224053382873535, + "logits/rejected": -3.048849582672119, + "logps/chosen": -296.49981689453125, + "logps/rejected": -566.3850708007812, + "loss": 0.412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9045372009277344, + "rewards/margins": 3.046886444091797, + "rewards/rejected": -2.1423492431640625, + "step": 632 + }, + { + "epoch": 0.81, + "learning_rate": 9.428010577192796e-09, + "logits/chosen": -3.230680465698242, + "logits/rejected": -3.096048355102539, + "logps/chosen": -263.2460021972656, + "logps/rejected": -856.3187255859375, + "loss": 0.3024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7858231067657471, + "rewards/margins": 4.621345520019531, + "rewards/rejected": -3.8355226516723633, + "step": 633 + }, + { + "epoch": 0.81, + "learning_rate": 9.307564136490254e-09, + "logits/chosen": -3.205601692199707, + "logits/rejected": -3.0472912788391113, + "logps/chosen": -257.90216064453125, + "logps/rejected": -291.5272216796875, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8356964588165283, + "rewards/margins": 1.7314949035644531, + "rewards/rejected": -0.8957985639572144, + "step": 634 + }, + { + "epoch": 0.81, + "learning_rate": 9.187813017242386e-09, + "logits/chosen": -3.25262713432312, + "logits/rejected": -3.1397504806518555, + "logps/chosen": -322.9963684082031, + "logps/rejected": -805.574951171875, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.837127685546875, + "rewards/margins": 4.050183296203613, + "rewards/rejected": -3.2130556106567383, + "step": 635 + }, + { + "epoch": 0.81, + "learning_rate": 9.068759265665382e-09, + "logits/chosen": -3.1995887756347656, + "logits/rejected": -3.1402158737182617, + "logps/chosen": -287.28558349609375, + "logps/rejected": -703.38037109375, + "loss": 0.3892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7043060660362244, + "rewards/margins": 3.5187606811523438, + "rewards/rejected": -2.8144547939300537, + "step": 636 + }, + { + "epoch": 0.81, + "learning_rate": 8.950404916059406e-09, + "logits/chosen": -3.2456765174865723, + "logits/rejected": -3.1079654693603516, + "logps/chosen": -253.7916259765625, + "logps/rejected": -606.0684814453125, + "loss": 0.3467, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6352379322052002, + "rewards/margins": 3.441295623779297, + "rewards/rejected": -2.8060576915740967, + "step": 637 + }, + { + "epoch": 0.81, + "learning_rate": 8.832751990773713e-09, + "logits/chosen": -3.21884822845459, + "logits/rejected": -3.1140284538269043, + "logps/chosen": -266.59344482421875, + "logps/rejected": -771.6070556640625, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8975380063056946, + "rewards/margins": 4.223201751708984, + "rewards/rejected": -3.3256638050079346, + "step": 638 + }, + { + "epoch": 0.81, + "learning_rate": 8.715802500172214e-09, + "logits/chosen": -3.2292959690093994, + "logits/rejected": -3.1140847206115723, + "logps/chosen": -270.13677978515625, + "logps/rejected": -493.5082702636719, + "loss": 0.3642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.775634765625, + "rewards/margins": 2.4709715843200684, + "rewards/rejected": -1.695336937904358, + "step": 639 + }, + { + "epoch": 0.82, + "learning_rate": 8.599558442598998e-09, + "logits/chosen": -3.277834892272949, + "logits/rejected": -3.188835620880127, + "logps/chosen": -247.6739501953125, + "logps/rejected": -803.728515625, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9148109555244446, + "rewards/margins": 5.250824928283691, + "rewards/rejected": -4.3360137939453125, + "step": 640 + }, + { + "epoch": 0.82, + "learning_rate": 8.484021804344305e-09, + "logits/chosen": -3.2526276111602783, + "logits/rejected": -3.0776143074035645, + "logps/chosen": -260.8329772949219, + "logps/rejected": -379.08697509765625, + "loss": 0.3778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7876556515693665, + "rewards/margins": 2.3095459938049316, + "rewards/rejected": -1.52189040184021, + "step": 641 + }, + { + "epoch": 0.82, + "learning_rate": 8.369194559610482e-09, + "logits/chosen": -3.2046070098876953, + "logits/rejected": -3.0955653190612793, + "logps/chosen": -259.64007568359375, + "logps/rejected": -1075.719482421875, + "loss": 0.3, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8056808710098267, + "rewards/margins": 4.905128479003906, + "rewards/rejected": -4.099447727203369, + "step": 642 + }, + { + "epoch": 0.82, + "learning_rate": 8.25507867047835e-09, + "logits/chosen": -3.1891536712646484, + "logits/rejected": -3.0932040214538574, + "logps/chosen": -270.5694885253906, + "logps/rejected": -538.80126953125, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9318817853927612, + "rewards/margins": 2.9625701904296875, + "rewards/rejected": -2.030688524246216, + "step": 643 + }, + { + "epoch": 0.82, + "learning_rate": 8.141676086873572e-09, + "logits/chosen": -3.233471632003784, + "logits/rejected": -3.099256992340088, + "logps/chosen": -291.05718994140625, + "logps/rejected": -618.6149291992188, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6541283130645752, + "rewards/margins": 3.2421700954437256, + "rewards/rejected": -2.5880417823791504, + "step": 644 + }, + { + "epoch": 0.82, + "learning_rate": 8.028988746533432e-09, + "logits/chosen": -3.2007462978363037, + "logits/rejected": -3.0597376823425293, + "logps/chosen": -240.42947387695312, + "logps/rejected": -897.369140625, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7757827639579773, + "rewards/margins": 4.001484394073486, + "rewards/rejected": -3.2257018089294434, + "step": 645 + }, + { + "epoch": 0.82, + "learning_rate": 7.917018574973644e-09, + "logits/chosen": -3.275796413421631, + "logits/rejected": -3.1278076171875, + "logps/chosen": -277.861572265625, + "logps/rejected": -1840.02783203125, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6886497735977173, + "rewards/margins": 7.736693382263184, + "rewards/rejected": -7.048044204711914, + "step": 646 + }, + { + "epoch": 0.82, + "learning_rate": 7.805767485455527e-09, + "logits/chosen": -3.168844699859619, + "logits/rejected": -3.1636595726013184, + "logps/chosen": -270.24725341796875, + "logps/rejected": -877.8204345703125, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6814849972724915, + "rewards/margins": 4.268237113952637, + "rewards/rejected": -3.586752414703369, + "step": 647 + }, + { + "epoch": 0.83, + "learning_rate": 7.695237378953223e-09, + "logits/chosen": -3.2921862602233887, + "logits/rejected": -3.137451171875, + "logps/chosen": -263.9300537109375, + "logps/rejected": -1130.2633056640625, + "loss": 0.3423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7772384881973267, + "rewards/margins": 5.009352207183838, + "rewards/rejected": -4.232113838195801, + "step": 648 + }, + { + "epoch": 0.83, + "learning_rate": 7.585430144121319e-09, + "logits/chosen": -3.240070104598999, + "logits/rejected": -3.1170809268951416, + "logps/chosen": -266.337158203125, + "logps/rejected": -790.9242553710938, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7010498046875, + "rewards/margins": 4.286992073059082, + "rewards/rejected": -3.585942268371582, + "step": 649 + }, + { + "epoch": 0.83, + "learning_rate": 7.476347657262455e-09, + "logits/chosen": -3.228572368621826, + "logits/rejected": -3.164642810821533, + "logps/chosen": -279.3407897949219, + "logps/rejected": -949.052978515625, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7148071527481079, + "rewards/margins": 4.368072509765625, + "rewards/rejected": -3.6532654762268066, + "step": 650 + }, + { + "epoch": 0.83, + "learning_rate": 7.367991782295391e-09, + "logits/chosen": -3.2447760105133057, + "logits/rejected": -3.0467052459716797, + "logps/chosen": -280.22955322265625, + "logps/rejected": -957.3624877929688, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8054282665252686, + "rewards/margins": 3.538623809814453, + "rewards/rejected": -2.7331955432891846, + "step": 651 + }, + { + "epoch": 0.83, + "learning_rate": 7.260364370723044e-09, + "logits/chosen": -3.3169057369232178, + "logits/rejected": -3.1210479736328125, + "logps/chosen": -257.0567626953125, + "logps/rejected": -1055.1539306640625, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8571395874023438, + "rewards/margins": 5.098426818847656, + "rewards/rejected": -4.2412872314453125, + "step": 652 + }, + { + "epoch": 0.83, + "learning_rate": 7.153467261600948e-09, + "logits/chosen": -3.215259075164795, + "logits/rejected": -3.0700697898864746, + "logps/chosen": -253.77047729492188, + "logps/rejected": -1324.9619140625, + "loss": 0.3481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8690963983535767, + "rewards/margins": 5.001731872558594, + "rewards/rejected": -4.132635593414307, + "step": 653 + }, + { + "epoch": 0.83, + "learning_rate": 7.047302281505735e-09, + "logits/chosen": -3.241987705230713, + "logits/rejected": -3.1387643814086914, + "logps/chosen": -267.185546875, + "logps/rejected": -390.2718811035156, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7913795709609985, + "rewards/margins": 2.4314613342285156, + "rewards/rejected": -1.6400818824768066, + "step": 654 + }, + { + "epoch": 0.83, + "learning_rate": 6.9418712445040165e-09, + "logits/chosen": -3.2298169136047363, + "logits/rejected": -3.0652990341186523, + "logps/chosen": -285.72515869140625, + "logps/rejected": -607.1198120117188, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.665869951248169, + "rewards/margins": 2.764878273010254, + "rewards/rejected": -2.099008321762085, + "step": 655 + }, + { + "epoch": 0.84, + "learning_rate": 6.837175952121305e-09, + "logits/chosen": -3.2055540084838867, + "logits/rejected": -3.108689785003662, + "logps/chosen": -258.4341125488281, + "logps/rejected": -440.30126953125, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7607628107070923, + "rewards/margins": 2.4394845962524414, + "rewards/rejected": -1.6787216663360596, + "step": 656 + }, + { + "epoch": 0.84, + "learning_rate": 6.733218193311291e-09, + "logits/chosen": -3.2488884925842285, + "logits/rejected": -3.1397905349731445, + "logps/chosen": -276.1654052734375, + "logps/rejected": -620.6270141601562, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8745224475860596, + "rewards/margins": 3.3997957706451416, + "rewards/rejected": -2.525273323059082, + "step": 657 + }, + { + "epoch": 0.84, + "learning_rate": 6.629999744425236e-09, + "logits/chosen": -3.195970058441162, + "logits/rejected": -3.0826218128204346, + "logps/chosen": -241.87709045410156, + "logps/rejected": -385.635986328125, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.882379949092865, + "rewards/margins": 2.1483407020568848, + "rewards/rejected": -1.265960693359375, + "step": 658 + }, + { + "epoch": 0.84, + "learning_rate": 6.527522369181654e-09, + "logits/chosen": -3.210036277770996, + "logits/rejected": -3.122143268585205, + "logps/chosen": -245.505859375, + "logps/rejected": -531.7161865234375, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7047874927520752, + "rewards/margins": 2.6935219764709473, + "rewards/rejected": -1.988734483718872, + "step": 659 + }, + { + "epoch": 0.84, + "learning_rate": 6.42578781863613e-09, + "logits/chosen": -3.2277917861938477, + "logits/rejected": -3.1478610038757324, + "logps/chosen": -274.95391845703125, + "logps/rejected": -816.93359375, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6882766485214233, + "rewards/margins": 4.314537048339844, + "rewards/rejected": -3.62626051902771, + "step": 660 + }, + { + "epoch": 0.84, + "learning_rate": 6.324797831151452e-09, + "logits/chosen": -3.258517026901245, + "logits/rejected": -3.088326930999756, + "logps/chosen": -272.386474609375, + "logps/rejected": -1103.228271484375, + "loss": 0.3149, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7975845336914062, + "rewards/margins": 4.125123977661133, + "rewards/rejected": -3.3275389671325684, + "step": 661 + }, + { + "epoch": 0.84, + "learning_rate": 6.22455413236786e-09, + "logits/chosen": -3.248830795288086, + "logits/rejected": -3.126649856567383, + "logps/chosen": -258.59112548828125, + "logps/rejected": -335.61822509765625, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7385597229003906, + "rewards/margins": 1.7032709121704102, + "rewards/rejected": -0.96471107006073, + "step": 662 + }, + { + "epoch": 0.85, + "learning_rate": 6.125058435173569e-09, + "logits/chosen": -3.1864848136901855, + "logits/rejected": -3.097672939300537, + "logps/chosen": -283.72235107421875, + "logps/rejected": -820.0333862304688, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6499595642089844, + "rewards/margins": 4.744304656982422, + "rewards/rejected": -4.0943450927734375, + "step": 663 + }, + { + "epoch": 0.85, + "learning_rate": 6.026312439675552e-09, + "logits/chosen": -3.2629382610321045, + "logits/rejected": -3.156233072280884, + "logps/chosen": -280.79217529296875, + "logps/rejected": -824.944091796875, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9498001337051392, + "rewards/margins": 4.445833206176758, + "rewards/rejected": -3.49603271484375, + "step": 664 + }, + { + "epoch": 0.85, + "learning_rate": 5.928317833170393e-09, + "logits/chosen": -3.2251627445220947, + "logits/rejected": -3.101989507675171, + "logps/chosen": -313.6543273925781, + "logps/rejected": -851.5701904296875, + "loss": 0.3639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9581406116485596, + "rewards/margins": 3.8793230056762695, + "rewards/rejected": -2.92118239402771, + "step": 665 + }, + { + "epoch": 0.85, + "learning_rate": 5.831076290115572e-09, + "logits/chosen": -3.257369041442871, + "logits/rejected": -3.1463353633880615, + "logps/chosen": -294.232421875, + "logps/rejected": -718.0497436523438, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5272110104560852, + "rewards/margins": 3.7657546997070312, + "rewards/rejected": -3.238543748855591, + "step": 666 + }, + { + "epoch": 0.85, + "learning_rate": 5.734589472100737e-09, + "logits/chosen": -3.259556293487549, + "logits/rejected": -3.074402332305908, + "logps/chosen": -284.96759033203125, + "logps/rejected": -534.6031494140625, + "loss": 0.3553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6967712640762329, + "rewards/margins": 2.7474396228790283, + "rewards/rejected": -2.050668478012085, + "step": 667 + }, + { + "epoch": 0.85, + "learning_rate": 5.638859027819409e-09, + "logits/chosen": -3.2867136001586914, + "logits/rejected": -3.0695364475250244, + "logps/chosen": -218.18319702148438, + "logps/rejected": -629.613037109375, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9582573175430298, + "rewards/margins": 3.0474328994750977, + "rewards/rejected": -2.0891754627227783, + "step": 668 + }, + { + "epoch": 0.85, + "learning_rate": 5.543886593040736e-09, + "logits/chosen": -3.2706522941589355, + "logits/rejected": -3.0805435180664062, + "logps/chosen": -287.7998046875, + "logps/rejected": -999.22314453125, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7398940920829773, + "rewards/margins": 4.46200704574585, + "rewards/rejected": -3.7221131324768066, + "step": 669 + }, + { + "epoch": 0.85, + "learning_rate": 5.44967379058161e-09, + "logits/chosen": -3.238269090652466, + "logits/rejected": -3.1634693145751953, + "logps/chosen": -250.25299072265625, + "logps/rejected": -668.2845458984375, + "loss": 0.3482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.672332763671875, + "rewards/margins": 3.0885252952575684, + "rewards/rejected": -2.4161927700042725, + "step": 670 + }, + { + "epoch": 0.86, + "learning_rate": 5.356222230278856e-09, + "logits/chosen": -3.1937990188598633, + "logits/rejected": -3.086355686187744, + "logps/chosen": -303.9136962890625, + "logps/rejected": -504.7870178222656, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8293396234512329, + "rewards/margins": 2.6176252365112305, + "rewards/rejected": -1.7882858514785767, + "step": 671 + }, + { + "epoch": 0.86, + "learning_rate": 5.263533508961826e-09, + "logits/chosen": -3.199711561203003, + "logits/rejected": -3.1121110916137695, + "logps/chosen": -239.69541931152344, + "logps/rejected": -266.79083251953125, + "loss": 0.3462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8468979001045227, + "rewards/margins": 1.6607437133789062, + "rewards/rejected": -0.8138458728790283, + "step": 672 + }, + { + "epoch": 0.86, + "learning_rate": 5.17160921042501e-09, + "logits/chosen": -3.2185745239257812, + "logits/rejected": -3.1409969329833984, + "logps/chosen": -256.9867858886719, + "logps/rejected": -480.81829833984375, + "loss": 0.351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.786974310874939, + "rewards/margins": 2.595003604888916, + "rewards/rejected": -1.8080291748046875, + "step": 673 + }, + { + "epoch": 0.86, + "learning_rate": 5.080450905401057e-09, + "logits/chosen": -3.261504888534546, + "logits/rejected": -3.139303684234619, + "logps/chosen": -262.05413818359375, + "logps/rejected": -1486.87451171875, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.754229724407196, + "rewards/margins": 5.502081394195557, + "rewards/rejected": -4.747851848602295, + "step": 674 + }, + { + "epoch": 0.86, + "learning_rate": 4.9900601515338705e-09, + "logits/chosen": -3.249941349029541, + "logits/rejected": -3.0796918869018555, + "logps/chosen": -260.467041015625, + "logps/rejected": -803.6678466796875, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7054840326309204, + "rewards/margins": 3.3023316860198975, + "rewards/rejected": -2.5968475341796875, + "step": 675 + }, + { + "epoch": 0.86, + "learning_rate": 4.9004384933520545e-09, + "logits/chosen": -3.2344675064086914, + "logits/rejected": -3.189659595489502, + "logps/chosen": -291.79449462890625, + "logps/rejected": -1072.9443359375, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7296584844589233, + "rewards/margins": 4.878309726715088, + "rewards/rejected": -4.148651123046875, + "step": 676 + }, + { + "epoch": 0.86, + "learning_rate": 4.811587462242461e-09, + "logits/chosen": -3.2319564819335938, + "logits/rejected": -3.199946880340576, + "logps/chosen": -299.3994140625, + "logps/rejected": -822.6114501953125, + "loss": 0.3593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8007248044013977, + "rewards/margins": 4.590837001800537, + "rewards/rejected": -3.790112257003784, + "step": 677 + }, + { + "epoch": 0.86, + "learning_rate": 4.7235085764240625e-09, + "logits/chosen": -3.2128543853759766, + "logits/rejected": -3.1113505363464355, + "logps/chosen": -273.83349609375, + "logps/rejected": -1055.688720703125, + "loss": 0.3421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6371170282363892, + "rewards/margins": 4.050736904144287, + "rewards/rejected": -3.4136199951171875, + "step": 678 + }, + { + "epoch": 0.87, + "learning_rate": 4.636203340922007e-09, + "logits/chosen": -3.1609649658203125, + "logits/rejected": -3.1033201217651367, + "logps/chosen": -258.3902893066406, + "logps/rejected": -696.2514038085938, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7696243524551392, + "rewards/margins": 3.2448716163635254, + "rewards/rejected": -2.475247383117676, + "step": 679 + }, + { + "epoch": 0.87, + "learning_rate": 4.549673247541874e-09, + "logits/chosen": -3.2252838611602783, + "logits/rejected": -3.0784544944763184, + "logps/chosen": -288.12017822265625, + "logps/rejected": -520.4573974609375, + "loss": 0.3831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8203033804893494, + "rewards/margins": 2.9860596656799316, + "rewards/rejected": -2.1657562255859375, + "step": 680 + }, + { + "epoch": 0.87, + "learning_rate": 4.463919774844233e-09, + "logits/chosen": -3.268551826477051, + "logits/rejected": -3.1248412132263184, + "logps/chosen": -283.7618408203125, + "logps/rejected": -472.517333984375, + "loss": 0.4078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8618515133857727, + "rewards/margins": 2.7366943359375, + "rewards/rejected": -1.874842882156372, + "step": 681 + }, + { + "epoch": 0.87, + "learning_rate": 4.37894438811931e-09, + "logits/chosen": -3.286487102508545, + "logits/rejected": -3.150979518890381, + "logps/chosen": -262.02197265625, + "logps/rejected": -691.384521484375, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8526397943496704, + "rewards/margins": 3.3571276664733887, + "rewards/rejected": -2.5044877529144287, + "step": 682 + }, + { + "epoch": 0.87, + "learning_rate": 4.294748539362031e-09, + "logits/chosen": -3.2541635036468506, + "logits/rejected": -3.050356388092041, + "logps/chosen": -235.84060668945312, + "logps/rejected": -1407.5906982421875, + "loss": 0.3202, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6642807126045227, + "rewards/margins": 5.1970624923706055, + "rewards/rejected": -4.532782077789307, + "step": 683 + }, + { + "epoch": 0.87, + "learning_rate": 4.2113336672471245e-09, + "logits/chosen": -3.244515895843506, + "logits/rejected": -3.1308298110961914, + "logps/chosen": -279.16925048828125, + "logps/rejected": -567.5035400390625, + "loss": 0.3236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7613601684570312, + "rewards/margins": 2.6106858253479004, + "rewards/rejected": -1.8493255376815796, + "step": 684 + }, + { + "epoch": 0.87, + "learning_rate": 4.128701197104628e-09, + "logits/chosen": -3.241577625274658, + "logits/rejected": -3.07431697845459, + "logps/chosen": -237.4542694091797, + "logps/rejected": -461.625732421875, + "loss": 0.3607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9119964838027954, + "rewards/margins": 2.5098204612731934, + "rewards/rejected": -1.5978240966796875, + "step": 685 + }, + { + "epoch": 0.87, + "learning_rate": 4.0468525408954454e-09, + "logits/chosen": -3.2609798908233643, + "logits/rejected": -3.1983654499053955, + "logps/chosen": -255.9934844970703, + "logps/rejected": -547.359375, + "loss": 0.3631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6943817138671875, + "rewards/margins": 2.9671037197113037, + "rewards/rejected": -2.272722005844116, + "step": 686 + }, + { + "epoch": 0.88, + "learning_rate": 3.9657890971873e-09, + "logits/chosen": -3.25838041305542, + "logits/rejected": -3.1053104400634766, + "logps/chosen": -225.74244689941406, + "logps/rejected": -677.2281494140625, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.870343029499054, + "rewards/margins": 3.202092170715332, + "rewards/rejected": -2.3317489624023438, + "step": 687 + }, + { + "epoch": 0.88, + "learning_rate": 3.8855122511307626e-09, + "logits/chosen": -3.293426513671875, + "logits/rejected": -3.148578643798828, + "logps/chosen": -292.54693603515625, + "logps/rejected": -596.8907470703125, + "loss": 0.3399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9063476324081421, + "rewards/margins": 3.223806858062744, + "rewards/rejected": -2.3174591064453125, + "step": 688 + }, + { + "epoch": 0.88, + "learning_rate": 3.8060233744356625e-09, + "logits/chosen": -3.2181599140167236, + "logits/rejected": -3.127042770385742, + "logps/chosen": -238.89984130859375, + "logps/rejected": -811.244140625, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7790260314941406, + "rewards/margins": 4.129645347595215, + "rewards/rejected": -3.3506195545196533, + "step": 689 + }, + { + "epoch": 0.88, + "learning_rate": 3.727323825347578e-09, + "logits/chosen": -3.2575578689575195, + "logits/rejected": -3.1952733993530273, + "logps/chosen": -246.6340789794922, + "logps/rejected": -408.73077392578125, + "loss": 0.3356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8521164059638977, + "rewards/margins": 2.6924514770507812, + "rewards/rejected": -1.8403351306915283, + "step": 690 + }, + { + "epoch": 0.88, + "learning_rate": 3.649414948624652e-09, + "logits/chosen": -3.213984489440918, + "logits/rejected": -3.1122210025787354, + "logps/chosen": -250.63784790039062, + "logps/rejected": -838.983642578125, + "loss": 0.3307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7247978448867798, + "rewards/margins": 4.397411346435547, + "rewards/rejected": -3.6726136207580566, + "step": 691 + }, + { + "epoch": 0.88, + "learning_rate": 3.5722980755146515e-09, + "logits/chosen": -3.2584238052368164, + "logits/rejected": -3.191072463989258, + "logps/chosen": -277.1600036621094, + "logps/rejected": -814.9759521484375, + "loss": 0.3095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7908523678779602, + "rewards/margins": 4.456608772277832, + "rewards/rejected": -3.6657562255859375, + "step": 692 + }, + { + "epoch": 0.88, + "learning_rate": 3.4959745237321427e-09, + "logits/chosen": -3.287440299987793, + "logits/rejected": -3.120765209197998, + "logps/chosen": -272.9002685546875, + "logps/rejected": -598.68798828125, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.710436999797821, + "rewards/margins": 3.089216709136963, + "rewards/rejected": -2.378779649734497, + "step": 693 + }, + { + "epoch": 0.88, + "learning_rate": 3.4204455974360556e-09, + "logits/chosen": -3.2636165618896484, + "logits/rejected": -2.959214210510254, + "logps/chosen": -266.0849609375, + "logps/rejected": -963.9035034179688, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9084358215332031, + "rewards/margins": 4.102610111236572, + "rewards/rejected": -3.194174289703369, + "step": 694 + }, + { + "epoch": 0.89, + "learning_rate": 3.3457125872073388e-09, + "logits/chosen": -3.271927833557129, + "logits/rejected": -3.118931770324707, + "logps/chosen": -276.086669921875, + "logps/rejected": -741.8486328125, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7736099362373352, + "rewards/margins": 3.788041591644287, + "rewards/rejected": -3.014431953430176, + "step": 695 + }, + { + "epoch": 0.89, + "learning_rate": 3.2717767700269627e-09, + "logits/chosen": -3.292332172393799, + "logits/rejected": -3.170046329498291, + "logps/chosen": -258.46307373046875, + "logps/rejected": -480.6207275390625, + "loss": 0.3891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6386825442314148, + "rewards/margins": 2.8224151134490967, + "rewards/rejected": -2.183732509613037, + "step": 696 + }, + { + "epoch": 0.89, + "learning_rate": 3.198639409254017e-09, + "logits/chosen": -3.247661590576172, + "logits/rejected": -3.131441831588745, + "logps/chosen": -238.3041534423828, + "logps/rejected": -921.005859375, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0082595348358154, + "rewards/margins": 4.042790412902832, + "rewards/rejected": -3.0345306396484375, + "step": 697 + }, + { + "epoch": 0.89, + "learning_rate": 3.1263017546042324e-09, + "logits/chosen": -3.2331037521362305, + "logits/rejected": -3.081249237060547, + "logps/chosen": -249.25355529785156, + "logps/rejected": -694.486083984375, + "loss": 0.3217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.674426257610321, + "rewards/margins": 3.427755832672119, + "rewards/rejected": -2.7533295154571533, + "step": 698 + }, + { + "epoch": 0.89, + "learning_rate": 3.054765042128521e-09, + "logits/chosen": -3.1900482177734375, + "logits/rejected": -3.093799591064453, + "logps/chosen": -236.83412170410156, + "logps/rejected": -520.18115234375, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7543953061103821, + "rewards/margins": 3.082756996154785, + "rewards/rejected": -2.3283615112304688, + "step": 699 + }, + { + "epoch": 0.89, + "learning_rate": 2.9840304941919412e-09, + "logits/chosen": -3.220369338989258, + "logits/rejected": -3.2053914070129395, + "logps/chosen": -287.3332214355469, + "logps/rejected": -786.89306640625, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8777207136154175, + "rewards/margins": 4.007301330566406, + "rewards/rejected": -3.1295807361602783, + "step": 700 + }, + { + "epoch": 0.89, + "learning_rate": 2.9140993194527286e-09, + "logits/chosen": -3.2392807006835938, + "logits/rejected": -3.0909109115600586, + "logps/chosen": -260.4556579589844, + "logps/rejected": -813.931640625, + "loss": 0.3628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6974403858184814, + "rewards/margins": 3.4515204429626465, + "rewards/rejected": -2.754080295562744, + "step": 701 + }, + { + "epoch": 0.89, + "learning_rate": 2.8449727128417366e-09, + "logits/chosen": -3.267853260040283, + "logits/rejected": -3.182178258895874, + "logps/chosen": -247.05870056152344, + "logps/rejected": -615.30517578125, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8354248404502869, + "rewards/margins": 3.8016600608825684, + "rewards/rejected": -2.966235399246216, + "step": 702 + }, + { + "epoch": 0.9, + "learning_rate": 2.7766518555419394e-09, + "logits/chosen": -3.2319235801696777, + "logits/rejected": -3.165846347808838, + "logps/chosen": -247.34738159179688, + "logps/rejected": -676.7479858398438, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8024368286132812, + "rewards/margins": 3.728058099746704, + "rewards/rejected": -2.9256210327148438, + "step": 703 + }, + { + "epoch": 0.9, + "learning_rate": 2.709137914968268e-09, + "logits/chosen": -3.2162444591522217, + "logits/rejected": -3.0837414264678955, + "logps/chosen": -243.372314453125, + "logps/rejected": -660.4501953125, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7896636724472046, + "rewards/margins": 3.3770782947540283, + "rewards/rejected": -2.587414503097534, + "step": 704 + }, + { + "epoch": 0.9, + "learning_rate": 2.642432044747711e-09, + "logits/chosen": -3.196443557739258, + "logits/rejected": -3.0466270446777344, + "logps/chosen": -251.4720458984375, + "logps/rejected": -1462.810302734375, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8089348077774048, + "rewards/margins": 6.035475730895996, + "rewards/rejected": -5.226541042327881, + "step": 705 + }, + { + "epoch": 0.9, + "learning_rate": 2.57653538469953e-09, + "logits/chosen": -3.295912981033325, + "logits/rejected": -3.1006479263305664, + "logps/chosen": -279.2758483886719, + "logps/rejected": -409.15374755859375, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9290558099746704, + "rewards/margins": 2.2592804431915283, + "rewards/rejected": -1.330224633216858, + "step": 706 + }, + { + "epoch": 0.9, + "learning_rate": 2.51144906081584e-09, + "logits/chosen": -3.260690450668335, + "logits/rejected": -3.1627049446105957, + "logps/chosen": -310.6244201660156, + "logps/rejected": -937.1246948242188, + "loss": 0.3595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7407837510108948, + "rewards/margins": 4.040018081665039, + "rewards/rejected": -3.29923415184021, + "step": 707 + }, + { + "epoch": 0.9, + "learning_rate": 2.4471741852423233e-09, + "logits/chosen": -3.2277631759643555, + "logits/rejected": -3.2096168994903564, + "logps/chosen": -280.6416931152344, + "logps/rejected": -825.831298828125, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9268081188201904, + "rewards/margins": 4.5566911697387695, + "rewards/rejected": -3.6298828125, + "step": 708 + }, + { + "epoch": 0.9, + "learning_rate": 2.3837118562592794e-09, + "logits/chosen": -3.2638626098632812, + "logits/rejected": -3.102417469024658, + "logps/chosen": -274.8654479980469, + "logps/rejected": -562.6609497070312, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7448402643203735, + "rewards/margins": 3.2488350868225098, + "rewards/rejected": -2.5039947032928467, + "step": 709 + }, + { + "epoch": 0.91, + "learning_rate": 2.3210631582627928e-09, + "logits/chosen": -3.1848137378692627, + "logits/rejected": -3.088799476623535, + "logps/chosen": -265.83502197265625, + "logps/rejected": -957.3095703125, + "loss": 0.3259, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8876541256904602, + "rewards/margins": 4.531257629394531, + "rewards/rejected": -3.643603801727295, + "step": 710 + }, + { + "epoch": 0.91, + "learning_rate": 2.259229161746279e-09, + "logits/chosen": -3.2678003311157227, + "logits/rejected": -3.099699020385742, + "logps/chosen": -291.0670166015625, + "logps/rejected": -1650.10107421875, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9131790399551392, + "rewards/margins": 6.925355911254883, + "rewards/rejected": -6.012176513671875, + "step": 711 + }, + { + "epoch": 0.91, + "learning_rate": 2.198210923282118e-09, + "logits/chosen": -3.267836570739746, + "logits/rejected": -3.194185495376587, + "logps/chosen": -283.29205322265625, + "logps/rejected": -1275.681396484375, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9833039045333862, + "rewards/margins": 5.798611640930176, + "rewards/rejected": -4.8153076171875, + "step": 712 + }, + { + "epoch": 0.91, + "learning_rate": 2.1380094855036614e-09, + "logits/chosen": -3.161910057067871, + "logits/rejected": -3.0553908348083496, + "logps/chosen": -257.5887145996094, + "logps/rejected": -903.1353759765625, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.703289806842804, + "rewards/margins": 4.209485054016113, + "rewards/rejected": -3.506195068359375, + "step": 713 + }, + { + "epoch": 0.91, + "learning_rate": 2.0786258770873644e-09, + "logits/chosen": -3.2198996543884277, + "logits/rejected": -3.1350200176239014, + "logps/chosen": -275.4100646972656, + "logps/rejected": -591.3868408203125, + "loss": 0.3359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8524383902549744, + "rewards/margins": 3.5670089721679688, + "rewards/rejected": -2.7145707607269287, + "step": 714 + }, + { + "epoch": 0.91, + "learning_rate": 2.020061112735266e-09, + "logits/chosen": -3.208892822265625, + "logits/rejected": -3.1348705291748047, + "logps/chosen": -266.6751403808594, + "logps/rejected": -668.69970703125, + "loss": 0.3239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7301651239395142, + "rewards/margins": 3.865414619445801, + "rewards/rejected": -3.135249614715576, + "step": 715 + }, + { + "epoch": 0.91, + "learning_rate": 1.9623161931575925e-09, + "logits/chosen": -3.2849957942962646, + "logits/rejected": -3.0897085666656494, + "logps/chosen": -251.29005432128906, + "logps/rejected": -499.2642822265625, + "loss": 0.3275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9853919744491577, + "rewards/margins": 3.070183753967285, + "rewards/rejected": -2.084791660308838, + "step": 716 + }, + { + "epoch": 0.91, + "learning_rate": 1.905392105055703e-09, + "logits/chosen": -3.225524425506592, + "logits/rejected": -3.1768405437469482, + "logps/chosen": -286.3305358886719, + "logps/rejected": -645.0118408203125, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.836285412311554, + "rewards/margins": 3.3062286376953125, + "rewards/rejected": -2.4699432849884033, + "step": 717 + }, + { + "epoch": 0.92, + "learning_rate": 1.8492898211051989e-09, + "logits/chosen": -3.2478179931640625, + "logits/rejected": -3.050589084625244, + "logps/chosen": -260.5660400390625, + "logps/rejected": -1230.2308349609375, + "loss": 0.3249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6514480710029602, + "rewards/margins": 4.570167541503906, + "rewards/rejected": -3.918719530105591, + "step": 718 + }, + { + "epoch": 0.92, + "learning_rate": 1.7940102999393193e-09, + "logits/chosen": -3.2723186016082764, + "logits/rejected": -3.1444506645202637, + "logps/chosen": -274.0333251953125, + "logps/rejected": -599.7052001953125, + "loss": 0.3528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7245285511016846, + "rewards/margins": 3.026782989501953, + "rewards/rejected": -2.3022544384002686, + "step": 719 + }, + { + "epoch": 0.92, + "learning_rate": 1.7395544861325716e-09, + "logits/chosen": -3.2096025943756104, + "logits/rejected": -3.0759613513946533, + "logps/chosen": -283.5645751953125, + "logps/rejected": -748.816650390625, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9683471918106079, + "rewards/margins": 4.067049026489258, + "rewards/rejected": -3.0987014770507812, + "step": 720 + }, + { + "epoch": 0.92, + "learning_rate": 1.6859233101845506e-09, + "logits/chosen": -3.1942310333251953, + "logits/rejected": -3.133124589920044, + "logps/chosen": -290.6807861328125, + "logps/rejected": -678.25634765625, + "loss": 0.3575, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6171172857284546, + "rewards/margins": 3.8121705055236816, + "rewards/rejected": -3.1950531005859375, + "step": 721 + }, + { + "epoch": 0.92, + "learning_rate": 1.6331176885040876e-09, + "logits/chosen": -3.1938343048095703, + "logits/rejected": -3.006105422973633, + "logps/chosen": -290.2166748046875, + "logps/rejected": -925.1244506835938, + "loss": 0.361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8114364743232727, + "rewards/margins": 3.6546661853790283, + "rewards/rejected": -2.8432297706604004, + "step": 722 + }, + { + "epoch": 0.92, + "learning_rate": 1.5811385233935548e-09, + "logits/chosen": -3.247927665710449, + "logits/rejected": -3.078775644302368, + "logps/chosen": -266.84808349609375, + "logps/rejected": -712.5299072265625, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7813278436660767, + "rewards/margins": 3.100482225418091, + "rewards/rejected": -2.3191542625427246, + "step": 723 + }, + { + "epoch": 0.92, + "learning_rate": 1.5299867030334812e-09, + "logits/chosen": -3.1916146278381348, + "logits/rejected": -3.1143581867218018, + "logps/chosen": -235.01876831054688, + "logps/rejected": -463.373779296875, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7832992672920227, + "rewards/margins": 2.787903070449829, + "rewards/rejected": -2.004603624343872, + "step": 724 + }, + { + "epoch": 0.92, + "learning_rate": 1.4796631014673322e-09, + "logits/chosen": -3.1896274089813232, + "logits/rejected": -3.1627588272094727, + "logps/chosen": -243.4674530029297, + "logps/rejected": -908.631591796875, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.662915050983429, + "rewards/margins": 4.625475883483887, + "rewards/rejected": -3.9625611305236816, + "step": 725 + }, + { + "epoch": 0.93, + "learning_rate": 1.4301685785866214e-09, + "logits/chosen": -3.245669364929199, + "logits/rejected": -3.140486240386963, + "logps/chosen": -301.97479248046875, + "logps/rejected": -586.5032958984375, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7652572393417358, + "rewards/margins": 3.026467800140381, + "rewards/rejected": -2.2612106800079346, + "step": 726 + }, + { + "epoch": 0.93, + "learning_rate": 1.3815039801161721e-09, + "logits/chosen": -3.3180336952209473, + "logits/rejected": -3.1277036666870117, + "logps/chosen": -249.8519287109375, + "logps/rejected": -627.7752685546875, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.734178900718689, + "rewards/margins": 3.0266809463500977, + "rewards/rejected": -2.292501926422119, + "step": 727 + }, + { + "epoch": 0.93, + "learning_rate": 1.3336701375997127e-09, + "logits/chosen": -3.2787365913391113, + "logits/rejected": -2.996774911880493, + "logps/chosen": -271.5485534667969, + "logps/rejected": -1180.1640625, + "loss": 0.3107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7199935913085938, + "rewards/margins": 4.852476596832275, + "rewards/rejected": -4.132483005523682, + "step": 728 + }, + { + "epoch": 0.93, + "learning_rate": 1.2866678683856268e-09, + "logits/chosen": -3.2658824920654297, + "logits/rejected": -3.1012046337127686, + "logps/chosen": -249.36492919921875, + "logps/rejected": -660.359375, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.625898003578186, + "rewards/margins": 3.0846352577209473, + "rewards/rejected": -2.458737373352051, + "step": 729 + }, + { + "epoch": 0.93, + "learning_rate": 1.240497975613014e-09, + "logits/chosen": -3.196291446685791, + "logits/rejected": -3.0902352333068848, + "logps/chosen": -304.57073974609375, + "logps/rejected": -779.8431396484375, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7532882690429688, + "rewards/margins": 4.315361022949219, + "rewards/rejected": -3.56207275390625, + "step": 730 + }, + { + "epoch": 0.93, + "learning_rate": 1.1951612481979567e-09, + "logits/chosen": -3.2860960960388184, + "logits/rejected": -3.161202907562256, + "logps/chosen": -241.6154022216797, + "logps/rejected": -663.6712646484375, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7309372425079346, + "rewards/margins": 3.5549697875976562, + "rewards/rejected": -2.8240325450897217, + "step": 731 + }, + { + "epoch": 0.93, + "learning_rate": 1.1506584608200365e-09, + "logits/chosen": -3.2892873287200928, + "logits/rejected": -3.16640567779541, + "logps/chosen": -251.26898193359375, + "logps/rejected": -402.244140625, + "loss": 0.3157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8501823544502258, + "rewards/margins": 2.683452606201172, + "rewards/rejected": -1.8332703113555908, + "step": 732 + }, + { + "epoch": 0.93, + "learning_rate": 1.1069903739091002e-09, + "logits/chosen": -3.244429111480713, + "logits/rejected": -3.2159647941589355, + "logps/chosen": -295.02203369140625, + "logps/rejected": -794.1224365234375, + "loss": 0.3255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6122085452079773, + "rewards/margins": 4.165153503417969, + "rewards/rejected": -3.552945137023926, + "step": 733 + }, + { + "epoch": 0.94, + "learning_rate": 1.064157733632276e-09, + "logits/chosen": -3.1834800243377686, + "logits/rejected": -3.0741047859191895, + "logps/chosen": -284.9871520996094, + "logps/rejected": -541.8126220703125, + "loss": 0.3239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7936661243438721, + "rewards/margins": 2.8771209716796875, + "rewards/rejected": -2.0834550857543945, + "step": 734 + }, + { + "epoch": 0.94, + "learning_rate": 1.0221612718812e-09, + "logits/chosen": -3.237389326095581, + "logits/rejected": -3.07759428024292, + "logps/chosen": -279.8023681640625, + "logps/rejected": -1011.9724731445312, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8318130373954773, + "rewards/margins": 4.437558174133301, + "rewards/rejected": -3.6057450771331787, + "step": 735 + }, + { + "epoch": 0.94, + "learning_rate": 9.81001706259532e-10, + "logits/chosen": -3.237783432006836, + "logits/rejected": -3.1208324432373047, + "logps/chosen": -242.7189178466797, + "logps/rejected": -791.5379028320312, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8612457513809204, + "rewards/margins": 4.023770332336426, + "rewards/rejected": -3.162524461746216, + "step": 736 + }, + { + "epoch": 0.94, + "learning_rate": 9.40679740070688e-10, + "logits/chosen": -3.1774487495422363, + "logits/rejected": -3.1197712421417236, + "logps/chosen": -259.9789733886719, + "logps/rejected": -542.2491455078125, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7642425298690796, + "rewards/margins": 2.929464817047119, + "rewards/rejected": -2.16522216796875, + "step": 737 + }, + { + "epoch": 0.94, + "learning_rate": 9.011960623058201e-10, + "logits/chosen": -3.2950077056884766, + "logits/rejected": -3.227872610092163, + "logps/chosen": -254.4993896484375, + "logps/rejected": -852.16796875, + "loss": 0.3206, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7386634945869446, + "rewards/margins": 4.2903361320495605, + "rewards/rejected": -3.5516724586486816, + "step": 738 + }, + { + "epoch": 0.94, + "learning_rate": 8.625513476320289e-10, + "logits/chosen": -3.2643775939941406, + "logits/rejected": -3.189666271209717, + "logps/chosen": -262.58404541015625, + "logps/rejected": -596.9677124023438, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9893425107002258, + "rewards/margins": 3.2499167919158936, + "rewards/rejected": -2.2605743408203125, + "step": 739 + }, + { + "epoch": 0.94, + "learning_rate": 8.247462563808816e-10, + "logits/chosen": -3.21728515625, + "logits/rejected": -3.0666680335998535, + "logps/chosen": -291.3544006347656, + "logps/rejected": -851.084228515625, + "loss": 0.3168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7477188110351562, + "rewards/margins": 3.4995832443237305, + "rewards/rejected": -2.7518646717071533, + "step": 740 + }, + { + "epoch": 0.94, + "learning_rate": 7.877814345370715e-10, + "logits/chosen": -3.215179920196533, + "logits/rejected": -3.007049083709717, + "logps/chosen": -279.4347839355469, + "logps/rejected": -269.97100830078125, + "loss": 0.3962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6481499075889587, + "rewards/margins": 1.1411064863204956, + "rewards/rejected": -0.49295657873153687, + "step": 741 + }, + { + "epoch": 0.95, + "learning_rate": 7.516575137274162e-10, + "logits/chosen": -3.276155471801758, + "logits/rejected": -3.137566328048706, + "logps/chosen": -269.69696044921875, + "logps/rejected": -524.1314697265625, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.862139880657196, + "rewards/margins": 2.9753904342651367, + "rewards/rejected": -2.113250732421875, + "step": 742 + }, + { + "epoch": 0.95, + "learning_rate": 7.163751112100435e-10, + "logits/chosen": -3.2774996757507324, + "logits/rejected": -3.088372230529785, + "logps/chosen": -248.509033203125, + "logps/rejected": -549.463623046875, + "loss": 0.3369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7405639886856079, + "rewards/margins": 2.6836180686950684, + "rewards/rejected": -1.94305419921875, + "step": 743 + }, + { + "epoch": 0.95, + "learning_rate": 6.819348298638839e-10, + "logits/chosen": -3.1899666786193848, + "logits/rejected": -3.0932657718658447, + "logps/chosen": -258.34307861328125, + "logps/rejected": -614.6270751953125, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8479377627372742, + "rewards/margins": 2.856722354888916, + "rewards/rejected": -2.008784532546997, + "step": 744 + }, + { + "epoch": 0.95, + "learning_rate": 6.483372581783054e-10, + "logits/chosen": -3.1687300205230713, + "logits/rejected": -3.098332643508911, + "logps/chosen": -290.94110107421875, + "logps/rejected": -1243.50390625, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7831131219863892, + "rewards/margins": 5.572526931762695, + "rewards/rejected": -4.7894134521484375, + "step": 745 + }, + { + "epoch": 0.95, + "learning_rate": 6.15582970243117e-10, + "logits/chosen": -3.206592559814453, + "logits/rejected": -3.086352825164795, + "logps/chosen": -245.0193634033203, + "logps/rejected": -591.01611328125, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7600967288017273, + "rewards/margins": 3.1328444480895996, + "rewards/rejected": -2.3727478981018066, + "step": 746 + }, + { + "epoch": 0.95, + "learning_rate": 5.83672525738721e-10, + "logits/chosen": -3.1772079467773438, + "logits/rejected": -3.054492473602295, + "logps/chosen": -300.0500793457031, + "logps/rejected": -300.74859619140625, + "loss": 0.3519, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7056305408477783, + "rewards/margins": 1.7714340686798096, + "rewards/rejected": -1.0658035278320312, + "step": 747 + }, + { + "epoch": 0.95, + "learning_rate": 5.526064699265753e-10, + "logits/chosen": -3.2356808185577393, + "logits/rejected": -3.0519495010375977, + "logps/chosen": -282.499267578125, + "logps/rejected": -932.0460815429688, + "loss": 0.3279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.853436291217804, + "rewards/margins": 3.9296326637268066, + "rewards/rejected": -3.0761964321136475, + "step": 748 + }, + { + "epoch": 0.95, + "learning_rate": 5.223853336398632e-10, + "logits/chosen": -3.180403232574463, + "logits/rejected": -3.0743350982666016, + "logps/chosen": -268.9346923828125, + "logps/rejected": -828.4962768554688, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8042511343955994, + "rewards/margins": 3.4726991653442383, + "rewards/rejected": -2.668447971343994, + "step": 749 + }, + { + "epoch": 0.96, + "learning_rate": 4.930096332744105e-10, + "logits/chosen": -3.3077754974365234, + "logits/rejected": -3.1419870853424072, + "logps/chosen": -263.8321228027344, + "logps/rejected": -1491.879638671875, + "loss": 0.3, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7661323547363281, + "rewards/margins": 6.633875370025635, + "rewards/rejected": -5.867743015289307, + "step": 750 + }, + { + "epoch": 0.96, + "learning_rate": 4.644798707798936e-10, + "logits/chosen": -3.2130894660949707, + "logits/rejected": -3.1068239212036133, + "logps/chosen": -262.74560546875, + "logps/rejected": -580.1153564453125, + "loss": 0.3137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.773572564125061, + "rewards/margins": 3.1661338806152344, + "rewards/rejected": -2.392561435699463, + "step": 751 + }, + { + "epoch": 0.96, + "learning_rate": 4.3679653365124024e-10, + "logits/chosen": -3.2136569023132324, + "logits/rejected": -3.1148934364318848, + "logps/chosen": -265.31878662109375, + "logps/rejected": -521.2979736328125, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8238251209259033, + "rewards/margins": 2.897752285003662, + "rewards/rejected": -2.073927402496338, + "step": 752 + }, + { + "epoch": 0.96, + "learning_rate": 4.0996009492029195e-10, + "logits/chosen": -3.2566826343536377, + "logits/rejected": -2.959892511367798, + "logps/chosen": -266.380126953125, + "logps/rejected": -821.2305297851562, + "loss": 0.3342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7738784551620483, + "rewards/margins": 3.315302848815918, + "rewards/rejected": -2.541424512863159, + "step": 753 + }, + { + "epoch": 0.96, + "learning_rate": 3.8397101314774914e-10, + "logits/chosen": -3.244494676589966, + "logits/rejected": -3.109090566635132, + "logps/chosen": -284.2798156738281, + "logps/rejected": -387.2193603515625, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6839447021484375, + "rewards/margins": 2.285792589187622, + "rewards/rejected": -1.6018478870391846, + "step": 754 + }, + { + "epoch": 0.96, + "learning_rate": 3.588297324153056e-10, + "logits/chosen": -3.2005679607391357, + "logits/rejected": -3.1760993003845215, + "logps/chosen": -233.9751434326172, + "logps/rejected": -641.6821899414062, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.93316650390625, + "rewards/margins": 3.639535665512085, + "rewards/rejected": -2.706368923187256, + "step": 755 + }, + { + "epoch": 0.96, + "learning_rate": 3.345366823180928e-10, + "logits/chosen": -3.244020700454712, + "logits/rejected": -3.1025521755218506, + "logps/chosen": -279.36761474609375, + "logps/rejected": -456.66046142578125, + "loss": 0.3419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8468765020370483, + "rewards/margins": 2.666677951812744, + "rewards/rejected": -1.8198013305664062, + "step": 756 + }, + { + "epoch": 0.96, + "learning_rate": 3.110922779573033e-10, + "logits/chosen": -3.2357048988342285, + "logits/rejected": -2.9610538482666016, + "logps/chosen": -270.8417663574219, + "logps/rejected": -1546.591552734375, + "loss": 0.3399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6949211359024048, + "rewards/margins": 5.888591766357422, + "rewards/rejected": -5.193670749664307, + "step": 757 + }, + { + "epoch": 0.97, + "learning_rate": 2.8849691993311776e-10, + "logits/chosen": -3.2729992866516113, + "logits/rejected": -2.972492218017578, + "logps/chosen": -288.4589538574219, + "logps/rejected": -421.06414794921875, + "loss": 0.3693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9675582647323608, + "rewards/margins": 2.444027900695801, + "rewards/rejected": -1.4764695167541504, + "step": 758 + }, + { + "epoch": 0.97, + "learning_rate": 2.667509943378721e-10, + "logits/chosen": -3.1999292373657227, + "logits/rejected": -3.1754584312438965, + "logps/chosen": -274.7830810546875, + "logps/rejected": -1343.01025390625, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8687469363212585, + "rewards/margins": 6.242410182952881, + "rewards/rejected": -5.373663425445557, + "step": 759 + }, + { + "epoch": 0.97, + "learning_rate": 2.4585487274942915e-10, + "logits/chosen": -3.1733903884887695, + "logits/rejected": -3.1261205673217773, + "logps/chosen": -248.01893615722656, + "logps/rejected": -580.7425537109375, + "loss": 0.3034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8438758850097656, + "rewards/margins": 3.4302589893341064, + "rewards/rejected": -2.586383104324341, + "step": 760 + }, + { + "epoch": 0.97, + "learning_rate": 2.2580891222485632e-10, + "logits/chosen": -3.328033208847046, + "logits/rejected": -3.115169048309326, + "logps/chosen": -264.9375915527344, + "logps/rejected": -551.7763671875, + "loss": 0.3268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6848167181015015, + "rewards/margins": 2.7769737243652344, + "rewards/rejected": -2.0921568870544434, + "step": 761 + }, + { + "epoch": 0.97, + "learning_rate": 2.0661345529430774e-10, + "logits/chosen": -3.1726202964782715, + "logits/rejected": -3.15685772895813, + "logps/chosen": -274.47528076171875, + "logps/rejected": -710.4139404296875, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9225906133651733, + "rewards/margins": 3.796435832977295, + "rewards/rejected": -2.873845100402832, + "step": 762 + }, + { + "epoch": 0.97, + "learning_rate": 1.8826882995517934e-10, + "logits/chosen": -3.261428117752075, + "logits/rejected": -3.142282485961914, + "logps/chosen": -257.0000915527344, + "logps/rejected": -879.7728881835938, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8506546020507812, + "rewards/margins": 4.062477111816406, + "rewards/rejected": -3.211822748184204, + "step": 763 + }, + { + "epoch": 0.97, + "learning_rate": 1.7077534966650763e-10, + "logits/chosen": -3.2588436603546143, + "logits/rejected": -3.1177663803100586, + "logps/chosen": -260.50341796875, + "logps/rejected": -1181.957275390625, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.860827624797821, + "rewards/margins": 5.7147216796875, + "rewards/rejected": -4.853894233703613, + "step": 764 + }, + { + "epoch": 0.98, + "learning_rate": 1.541333133436018e-10, + "logits/chosen": -3.2223997116088867, + "logits/rejected": -3.1893138885498047, + "logps/chosen": -280.55743408203125, + "logps/rejected": -791.2471923828125, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7037551999092102, + "rewards/margins": 3.6880202293395996, + "rewards/rejected": -2.984265089035034, + "step": 765 + }, + { + "epoch": 0.98, + "learning_rate": 1.3834300535294218e-10, + "logits/chosen": -3.2197790145874023, + "logits/rejected": -3.0522758960723877, + "logps/chosen": -268.6368408203125, + "logps/rejected": -349.5597229003906, + "loss": 0.3923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9023330807685852, + "rewards/margins": 2.2535529136657715, + "rewards/rejected": -1.351219892501831, + "step": 766 + }, + { + "epoch": 0.98, + "learning_rate": 1.2340469550733423e-10, + "logits/chosen": -3.2812423706054688, + "logits/rejected": -3.2126519680023193, + "logps/chosen": -300.838134765625, + "logps/rejected": -968.5159301757812, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.59710693359375, + "rewards/margins": 4.763433933258057, + "rewards/rejected": -4.166326999664307, + "step": 767 + }, + { + "epoch": 0.98, + "learning_rate": 1.0931863906127325e-10, + "logits/chosen": -3.2551398277282715, + "logits/rejected": -3.1493053436279297, + "logps/chosen": -260.9712219238281, + "logps/rejected": -354.7039794921875, + "loss": 0.3856, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.011939287185669, + "rewards/margins": 2.198974609375, + "rewards/rejected": -1.1870354413986206, + "step": 768 + }, + { + "epoch": 0.98, + "learning_rate": 9.608507670659238e-11, + "logits/chosen": -3.2277350425720215, + "logits/rejected": -3.0649795532226562, + "logps/chosen": -283.4414367675781, + "logps/rejected": -876.6177368164062, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9997742176055908, + "rewards/margins": 3.989114284515381, + "rewards/rejected": -2.989340305328369, + "step": 769 + }, + { + "epoch": 0.98, + "learning_rate": 8.370423456837139e-11, + "logits/chosen": -3.237583637237549, + "logits/rejected": -3.1231043338775635, + "logps/chosen": -275.6099548339844, + "logps/rejected": -750.8046875, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8201202154159546, + "rewards/margins": 3.0992066860198975, + "rewards/rejected": -2.2790863513946533, + "step": 770 + }, + { + "epoch": 0.98, + "learning_rate": 7.217632420102871e-11, + "logits/chosen": -3.1603832244873047, + "logits/rejected": -3.0298027992248535, + "logps/chosen": -306.5656433105469, + "logps/rejected": -540.7810668945312, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9627029895782471, + "rewards/margins": 2.71112060546875, + "rewards/rejected": -1.7484177350997925, + "step": 771 + }, + { + "epoch": 0.98, + "learning_rate": 6.150154258476314e-11, + "logits/chosen": -3.2853355407714844, + "logits/rejected": -2.9978137016296387, + "logps/chosen": -269.1598205566406, + "logps/rejected": -1476.406005859375, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8668854236602783, + "rewards/margins": 5.680981636047363, + "rewards/rejected": -4.814096450805664, + "step": 772 + }, + { + "epoch": 0.99, + "learning_rate": 5.168007212212333e-11, + "logits/chosen": -3.252532720565796, + "logits/rejected": -3.141303539276123, + "logps/chosen": -233.550537109375, + "logps/rejected": -740.9815063476562, + "loss": 0.3099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6773475408554077, + "rewards/margins": 4.4308695793151855, + "rewards/rejected": -3.7535219192504883, + "step": 773 + }, + { + "epoch": 0.99, + "learning_rate": 4.271208063494902e-11, + "logits/chosen": -3.269381046295166, + "logits/rejected": -3.1623361110687256, + "logps/chosen": -292.16741943359375, + "logps/rejected": -649.5860595703125, + "loss": 0.3566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8583450317382812, + "rewards/margins": 3.690342903137207, + "rewards/rejected": -2.831997871398926, + "step": 774 + }, + { + "epoch": 0.99, + "learning_rate": 3.459772136146788e-11, + "logits/chosen": -3.250115394592285, + "logits/rejected": -3.0605008602142334, + "logps/chosen": -287.2698669433594, + "logps/rejected": -576.034912109375, + "loss": 0.3346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6132835149765015, + "rewards/margins": 2.829606056213379, + "rewards/rejected": -2.216322422027588, + "step": 775 + }, + { + "epoch": 0.99, + "learning_rate": 2.733713295369755e-11, + "logits/chosen": -3.3037662506103516, + "logits/rejected": -3.089189052581787, + "logps/chosen": -295.5542297363281, + "logps/rejected": -360.92413330078125, + "loss": 0.3323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5710541009902954, + "rewards/margins": 1.9531419277191162, + "rewards/rejected": -1.3820877075195312, + "step": 776 + }, + { + "epoch": 0.99, + "learning_rate": 2.093043947505868e-11, + "logits/chosen": -3.2141501903533936, + "logits/rejected": -3.291757583618164, + "logps/chosen": -243.57797241210938, + "logps/rejected": -4216.7783203125, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8366310596466064, + "rewards/margins": 5.403995513916016, + "rewards/rejected": -4.567364692687988, + "step": 777 + }, + { + "epoch": 0.99, + "learning_rate": 1.53777503982655e-11, + "logits/chosen": -3.0898919105529785, + "logits/rejected": -3.090935707092285, + "logps/chosen": -285.8406677246094, + "logps/rejected": -1261.04052734375, + "loss": 0.3072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8803146481513977, + "rewards/margins": 5.479741096496582, + "rewards/rejected": -4.59942626953125, + "step": 778 + }, + { + "epoch": 0.99, + "learning_rate": 1.0679160603449533e-11, + "logits/chosen": -3.2726240158081055, + "logits/rejected": -3.170853614807129, + "logps/chosen": -277.84033203125, + "logps/rejected": -625.343994140625, + "loss": 0.3613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.848914384841919, + "rewards/margins": 3.3255743980407715, + "rewards/rejected": -2.4766602516174316, + "step": 779 + }, + { + "epoch": 0.99, + "learning_rate": 6.834750376549792e-12, + "logits/chosen": -3.2160568237304688, + "logits/rejected": -3.014195442199707, + "logps/chosen": -238.14572143554688, + "logps/rejected": -394.39349365234375, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9970337152481079, + "rewards/margins": 2.2811570167541504, + "rewards/rejected": -1.284123182296753, + "step": 780 + }, + { + "epoch": 1.0, + "learning_rate": 3.844585407936085e-12, + "logits/chosen": -3.2312543392181396, + "logits/rejected": -3.0997586250305176, + "logps/chosen": -249.39633178710938, + "logps/rejected": -1080.747314453125, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9487960934638977, + "rewards/margins": 4.349076747894287, + "rewards/rejected": -3.400280714035034, + "step": 781 + }, + { + "epoch": 1.0, + "learning_rate": 1.7087167912710475e-12, + "logits/chosen": -3.2074296474456787, + "logits/rejected": -3.070702075958252, + "logps/chosen": -298.8362731933594, + "logps/rejected": -513.0867919921875, + "loss": 0.3294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8147506713867188, + "rewards/margins": 2.73960280418396, + "rewards/rejected": -1.9248520135879517, + "step": 782 + }, + { + "epoch": 1.0, + "learning_rate": 4.271810226552652e-13, + "logits/chosen": -3.3006386756896973, + "logits/rejected": -3.164454460144043, + "logps/chosen": -254.7488250732422, + "logps/rejected": -371.5185852050781, + "loss": 0.377, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0237960815429688, + "rewards/margins": 2.247328996658325, + "rewards/rejected": -1.2235329151153564, + "step": 783 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -3.231198310852051, + "logits/rejected": -3.140446901321411, + "logps/chosen": -252.65731811523438, + "logps/rejected": -626.374755859375, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8486343622207642, + "rewards/margins": 3.1328446865081787, + "rewards/rejected": -2.284210205078125, + "step": 784 + }, + { + "epoch": 1.0, + "step": 784, + "total_flos": 0.0, + "train_loss": 0.4313411161455573, + "train_runtime": 1590.1011, + "train_samples_per_second": 3.946, + "train_steps_per_second": 0.493 + } + ], + "logging_steps": 1.0, + "max_steps": 784, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a8f77894bfe3a8aed328c6b36c330e5b27602137 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "up_proj", + "down_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bd92f5f4fa9b954830223880d08619957de4bb7 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c6bfae40f9b7708d704a111c6aff04008b4e4aecf729c5bdbeceaec513395f +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7752f8b87520f017a1d916c992f10fa6992ec615 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter1_lora_2ep/trainer_state.json @@ -0,0 +1,21982 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9987253027405991, + "eval_steps": 500, + "global_step": 1568, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.083333333333333e-09, + "logits/chosen": -3.2160720825195312, + "logits/rejected": -3.116873264312744, + "logps/chosen": -228.813232421875, + "logps/rejected": -513.1785888671875, + "loss": 0.9784, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.038027193397283554, + "rewards/margins": -0.09404220432043076, + "rewards/rejected": 0.0560150146484375, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.166666666666666e-09, + "logits/chosen": -3.262388229370117, + "logits/rejected": -3.188769817352295, + "logps/chosen": -260.372314453125, + "logps/rejected": -616.3881225585938, + "loss": 0.9729, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.016930390149354935, + "rewards/margins": 0.035233307629823685, + "rewards/rejected": -0.05216369777917862, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6.25e-09, + "logits/chosen": -3.162508964538574, + "logits/rejected": -3.056567430496216, + "logps/chosen": -303.6380310058594, + "logps/rejected": -502.1252136230469, + "loss": 0.9607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02143554762005806, + "rewards/margins": 0.02834320068359375, + "rewards/rejected": -0.04977874830365181, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -3.1569342613220215, + "logits/rejected": -3.0980818271636963, + "logps/chosen": -300.2929382324219, + "logps/rejected": -647.7689819335938, + "loss": 1.0093, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0026519775856286287, + "rewards/margins": -0.01221618615090847, + "rewards/rejected": 0.01486816443502903, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.0416666666666667e-08, + "logits/chosen": -3.2336626052856445, + "logits/rejected": -3.1037120819091797, + "logps/chosen": -280.24578857421875, + "logps/rejected": -545.6590576171875, + "loss": 0.9879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0165557861328125, + "rewards/margins": 0.02866516262292862, + "rewards/rejected": -0.04522094875574112, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 1.25e-08, + "logits/chosen": -3.172194480895996, + "logits/rejected": -3.0337448120117188, + "logps/chosen": -270.47637939453125, + "logps/rejected": -458.59619140625, + "loss": 0.9769, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.013661956414580345, + "rewards/margins": -0.039986420422792435, + "rewards/rejected": 0.05364837870001793, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 1.4583333333333333e-08, + "logits/chosen": -3.1379523277282715, + "logits/rejected": -3.082609176635742, + "logps/chosen": -315.0887451171875, + "logps/rejected": -438.10546875, + "loss": 1.0155, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0042877197265625, + "rewards/margins": -0.03752441331744194, + "rewards/rejected": 0.03323669731616974, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -3.225519895553589, + "logits/rejected": -3.0722622871398926, + "logps/chosen": -282.7359619140625, + "logps/rejected": -554.39990234375, + "loss": 1.0053, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0002700798213481903, + "rewards/margins": 0.02559814788401127, + "rewards/rejected": -0.02586822584271431, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 1.875e-08, + "logits/chosen": -3.1536917686462402, + "logits/rejected": -3.1214256286621094, + "logps/chosen": -308.546875, + "logps/rejected": -499.80755615234375, + "loss": 0.9751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01824493333697319, + "rewards/margins": 0.02762145921587944, + "rewards/rejected": -0.00937652587890625, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333335e-08, + "logits/chosen": -3.1781020164489746, + "logits/rejected": -3.1165146827697754, + "logps/chosen": -274.4998779296875, + "logps/rejected": -719.0789794921875, + "loss": 1.0044, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02658996731042862, + "rewards/margins": 0.05048218369483948, + "rewards/rejected": -0.023892216384410858, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 2.2916666666666663e-08, + "logits/chosen": -3.211332321166992, + "logits/rejected": -3.0435261726379395, + "logps/chosen": -242.57339477539062, + "logps/rejected": -992.177734375, + "loss": 0.9739, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.022696685045957565, + "rewards/margins": 0.08044663071632385, + "rewards/rejected": -0.10314331203699112, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 2.5e-08, + "logits/chosen": -3.215311050415039, + "logits/rejected": -3.0630276203155518, + "logps/chosen": -254.53173828125, + "logps/rejected": -437.38568115234375, + "loss": 0.9891, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01290206890553236, + "rewards/margins": -0.034897614270448685, + "rewards/rejected": 0.02199554443359375, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 2.708333333333333e-08, + "logits/chosen": -3.2038016319274902, + "logits/rejected": -3.074310779571533, + "logps/chosen": -250.2421875, + "logps/rejected": -527.8236694335938, + "loss": 0.9812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0048614502884447575, + "rewards/margins": 0.04114609211683273, + "rewards/rejected": -0.036284640431404114, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 2.9166666666666666e-08, + "logits/chosen": -3.236393451690674, + "logits/rejected": -3.0463991165161133, + "logps/chosen": -251.21554565429688, + "logps/rejected": -768.9014282226562, + "loss": 0.9666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014916992746293545, + "rewards/margins": 0.04535217210650444, + "rewards/rejected": -0.03043518215417862, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 3.125e-08, + "logits/chosen": -3.171485424041748, + "logits/rejected": -3.062551498413086, + "logps/chosen": -320.338134765625, + "logps/rejected": -499.44189453125, + "loss": 0.9721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04815826565027237, + "rewards/margins": 0.10358123481273651, + "rewards/rejected": -0.05542297288775444, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -3.1894989013671875, + "logits/rejected": -3.0834197998046875, + "logps/chosen": -261.7540283203125, + "logps/rejected": -541.110107421875, + "loss": 0.9998, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.021759033203125, + "rewards/margins": -0.09294281154870987, + "rewards/rejected": 0.07118377834558487, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 3.541666666666667e-08, + "logits/chosen": -3.163898229598999, + "logits/rejected": -3.0923538208007812, + "logps/chosen": -286.5225830078125, + "logps/rejected": -1414.6285400390625, + "loss": 0.9871, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.038421630859375, + "rewards/margins": -0.05072326958179474, + "rewards/rejected": 0.01230163499712944, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 3.75e-08, + "logits/chosen": -3.1565704345703125, + "logits/rejected": -2.9779415130615234, + "logps/chosen": -279.67279052734375, + "logps/rejected": -1317.944091796875, + "loss": 1.0016, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02803649939596653, + "rewards/margins": 0.05693665146827698, + "rewards/rejected": -0.02890014834702015, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 3.958333333333333e-08, + "logits/chosen": -3.1108860969543457, + "logits/rejected": -3.099325656890869, + "logps/chosen": -305.38897705078125, + "logps/rejected": -655.6672973632812, + "loss": 0.9764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0108184814453125, + "rewards/margins": 0.0239410400390625, + "rewards/rejected": -0.01312255859375, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -3.1829283237457275, + "logits/rejected": -3.1425957679748535, + "logps/chosen": -264.92681884765625, + "logps/rejected": -442.5179748535156, + "loss": 0.9822, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03589477390050888, + "rewards/margins": -0.02271728403866291, + "rewards/rejected": -0.01317749172449112, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 4.375e-08, + "logits/chosen": -3.1370558738708496, + "logits/rejected": -3.12188982963562, + "logps/chosen": -267.9302978515625, + "logps/rejected": -444.0398254394531, + "loss": 0.9758, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.006340790539979935, + "rewards/margins": -0.005979157984256744, + "rewards/rejected": -0.0003616334870457649, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 4.583333333333333e-08, + "logits/chosen": -3.2154922485351562, + "logits/rejected": -3.0788745880126953, + "logps/chosen": -272.93212890625, + "logps/rejected": -630.7208862304688, + "loss": 0.9729, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.03484649956226349, + "rewards/margins": 0.03205566108226776, + "rewards/rejected": 0.00279083289206028, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 4.791666666666667e-08, + "logits/chosen": -3.2290127277374268, + "logits/rejected": -3.0649361610412598, + "logps/chosen": -261.24188232421875, + "logps/rejected": -1407.8760986328125, + "loss": 0.9481, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.017928315326571465, + "rewards/margins": -0.09315872192382812, + "rewards/rejected": 0.11108703911304474, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 5e-08, + "logits/chosen": -3.2038955688476562, + "logits/rejected": -3.147711753845215, + "logps/chosen": -254.70594787597656, + "logps/rejected": -560.027099609375, + "loss": 0.9752, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01219100970774889, + "rewards/margins": -0.061858370900154114, + "rewards/rejected": 0.0496673621237278, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 5.208333333333333e-08, + "logits/chosen": -3.2017221450805664, + "logits/rejected": -3.1628756523132324, + "logps/chosen": -255.76927185058594, + "logps/rejected": -526.4248046875, + "loss": 0.981, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.004953003488481045, + "rewards/margins": -0.06301575154066086, + "rewards/rejected": 0.05806274712085724, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 5.416666666666666e-08, + "logits/chosen": -3.1421289443969727, + "logits/rejected": -3.0238499641418457, + "logps/chosen": -286.767578125, + "logps/rejected": -1003.1739501953125, + "loss": 0.9633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0010772701352834702, + "rewards/margins": 0.10674743354320526, + "rewards/rejected": -0.10567016899585724, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 5.625e-08, + "logits/chosen": -3.103497266769409, + "logits/rejected": -3.0402722358703613, + "logps/chosen": -267.00555419921875, + "logps/rejected": -483.06646728515625, + "loss": 0.9852, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.04073791578412056, + "rewards/margins": 0.00591735914349556, + "rewards/rejected": 0.034820556640625, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -3.1880197525024414, + "logits/rejected": -3.0958144664764404, + "logps/chosen": -318.2054138183594, + "logps/rejected": -503.2127685546875, + "loss": 0.9687, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.015017700381577015, + "rewards/margins": 0.06238480284810066, + "rewards/rejected": -0.0774025022983551, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 6.041666666666666e-08, + "logits/chosen": -3.135651111602783, + "logits/rejected": -3.070690631866455, + "logps/chosen": -280.51934814453125, + "logps/rejected": -327.61822509765625, + "loss": 0.9686, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00839691236615181, + "rewards/margins": 0.007021332159638405, + "rewards/rejected": 0.0013755802065134048, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 6.25e-08, + "logits/chosen": -3.1419360637664795, + "logits/rejected": -3.1197147369384766, + "logps/chosen": -252.90023803710938, + "logps/rejected": -481.4837341308594, + "loss": 0.9417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01748352125287056, + "rewards/margins": 0.03903503715991974, + "rewards/rejected": -0.0565185584127903, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 6.458333333333333e-08, + "logits/chosen": -3.196711778640747, + "logits/rejected": -3.0994741916656494, + "logps/chosen": -277.0928955078125, + "logps/rejected": -455.7370910644531, + "loss": 0.9286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03488006442785263, + "rewards/margins": 0.05620574951171875, + "rewards/rejected": -0.02132568322122097, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -3.1843104362487793, + "logits/rejected": -3.1021299362182617, + "logps/chosen": -258.0096435546875, + "logps/rejected": -303.19549560546875, + "loss": 0.9688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01157531701028347, + "rewards/margins": 0.0220489501953125, + "rewards/rejected": -0.01047363318502903, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 6.875e-08, + "logits/chosen": -3.234867572784424, + "logits/rejected": -3.143336296081543, + "logps/chosen": -273.82415771484375, + "logps/rejected": -591.5932006835938, + "loss": 0.954, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.006175231654196978, + "rewards/margins": 0.01397705264389515, + "rewards/rejected": -0.007801821455359459, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 7.083333333333334e-08, + "logits/chosen": -3.162611961364746, + "logits/rejected": -3.159562826156616, + "logps/chosen": -268.53057861328125, + "logps/rejected": -909.307373046875, + "loss": 0.9239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0007064819801598787, + "rewards/margins": 0.06980133056640625, + "rewards/rejected": -0.06909485161304474, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 7.291666666666666e-08, + "logits/chosen": -3.1407461166381836, + "logits/rejected": -3.0989646911621094, + "logps/chosen": -340.39117431640625, + "logps/rejected": -1248.60546875, + "loss": 0.9674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04994964599609375, + "rewards/margins": 0.20284882187843323, + "rewards/rejected": -0.15289917588233948, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 7.5e-08, + "logits/chosen": -3.1865830421447754, + "logits/rejected": -3.044377326965332, + "logps/chosen": -300.4569091796875, + "logps/rejected": -511.9309997558594, + "loss": 0.9212, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.03917083516716957, + "rewards/margins": 0.004975892603397369, + "rewards/rejected": -0.04414672777056694, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 7.708333333333333e-08, + "logits/chosen": -3.22495174407959, + "logits/rejected": -3.124211311340332, + "logps/chosen": -288.8163146972656, + "logps/rejected": -1002.459228515625, + "loss": 0.9506, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.007839965634047985, + "rewards/margins": 0.12685851752758026, + "rewards/rejected": -0.1190185546875, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 7.916666666666665e-08, + "logits/chosen": -3.243981122970581, + "logits/rejected": -3.113055944442749, + "logps/chosen": -228.15895080566406, + "logps/rejected": -465.3115539550781, + "loss": 0.8959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03850860521197319, + "rewards/margins": 0.04250335693359375, + "rewards/rejected": -0.0039947498589754105, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 8.124999999999999e-08, + "logits/chosen": -3.1378655433654785, + "logits/rejected": -3.093940258026123, + "logps/chosen": -260.021728515625, + "logps/rejected": -329.66412353515625, + "loss": 0.9248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.038828279823064804, + "rewards/margins": 0.09033584594726562, + "rewards/rejected": -0.05150756984949112, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -3.1547114849090576, + "logits/rejected": -3.0631442070007324, + "logps/chosen": -281.63446044921875, + "logps/rejected": -418.96502685546875, + "loss": 0.8842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04261932149529457, + "rewards/margins": 0.127308651804924, + "rewards/rejected": -0.08468933403491974, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 8.541666666666666e-08, + "logits/chosen": -3.1707653999328613, + "logits/rejected": -3.0565524101257324, + "logps/chosen": -305.5696105957031, + "logps/rejected": -477.30572509765625, + "loss": 0.9653, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.006466674618422985, + "rewards/margins": -0.007003783248364925, + "rewards/rejected": 0.013470458798110485, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 8.75e-08, + "logits/chosen": -3.1844217777252197, + "logits/rejected": -3.118379831314087, + "logps/chosen": -280.1917419433594, + "logps/rejected": -503.5762634277344, + "loss": 0.9009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07630844414234161, + "rewards/margins": 0.2034813016653061, + "rewards/rejected": -0.12717285752296448, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 8.958333333333333e-08, + "logits/chosen": -3.268772602081299, + "logits/rejected": -3.154224395751953, + "logps/chosen": -271.33245849609375, + "logps/rejected": -976.4696044921875, + "loss": 0.9328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01361083984375, + "rewards/margins": 0.22583618760108948, + "rewards/rejected": -0.21222534775733948, + "step": 43 + }, + { + "epoch": 0.06, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -3.194074869155884, + "logits/rejected": -3.0982847213745117, + "logps/chosen": -278.3538818359375, + "logps/rejected": -745.4373779296875, + "loss": 0.8807, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0008178707212209702, + "rewards/margins": 0.07906495034694672, + "rewards/rejected": -0.0782470703125, + "step": 44 + }, + { + "epoch": 0.06, + "learning_rate": 9.375e-08, + "logits/chosen": -3.1571826934814453, + "logits/rejected": -2.950911045074463, + "logps/chosen": -281.2884216308594, + "logps/rejected": -874.3759765625, + "loss": 0.8854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0030899043194949627, + "rewards/margins": 0.18744048476219177, + "rewards/rejected": -0.19053038954734802, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 9.583333333333334e-08, + "logits/chosen": -3.1229805946350098, + "logits/rejected": -3.096945285797119, + "logps/chosen": -261.8172607421875, + "logps/rejected": -443.3437805175781, + "loss": 0.9223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06317748874425888, + "rewards/margins": 0.17722931504249573, + "rewards/rejected": -0.11405181884765625, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 9.791666666666666e-08, + "logits/chosen": -3.2058846950531006, + "logits/rejected": -3.00537109375, + "logps/chosen": -277.34979248046875, + "logps/rejected": -867.1187744140625, + "loss": 0.8539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.059278108179569244, + "rewards/margins": 0.2875373661518097, + "rewards/rejected": -0.22825928032398224, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 1e-07, + "logits/chosen": -3.1786038875579834, + "logits/rejected": -3.1307311058044434, + "logps/chosen": -271.93426513671875, + "logps/rejected": -588.9378051757812, + "loss": 0.8701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.045740511268377304, + "rewards/margins": 0.20500412583351135, + "rewards/rejected": -0.15926361083984375, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 9.999989320463029e-08, + "logits/chosen": -3.257197856903076, + "logits/rejected": -3.1238017082214355, + "logps/chosen": -288.357177734375, + "logps/rejected": -760.0885009765625, + "loss": 0.9123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0481720007956028, + "rewards/margins": 0.319570928812027, + "rewards/rejected": -0.2713989317417145, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 9.999957281897734e-08, + "logits/chosen": -3.193697690963745, + "logits/rejected": -3.077686309814453, + "logps/chosen": -281.4578552246094, + "logps/rejected": -733.7327270507812, + "loss": 0.8992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0038879397325217724, + "rewards/margins": 0.19324646890163422, + "rewards/rejected": -0.18935853242874146, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 9.99990388444098e-08, + "logits/chosen": -3.199371099472046, + "logits/rejected": -3.1750969886779785, + "logps/chosen": -318.56195068359375, + "logps/rejected": -711.3070068359375, + "loss": 0.8796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02282257005572319, + "rewards/margins": 0.14188385009765625, + "rewards/rejected": -0.16470642387866974, + "step": 51 + }, + { + "epoch": 0.07, + "learning_rate": 9.999829128320872e-08, + "logits/chosen": -3.12086820602417, + "logits/rejected": -3.038437604904175, + "logps/chosen": -249.22779846191406, + "logps/rejected": -540.29345703125, + "loss": 0.8826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012033844366669655, + "rewards/margins": 0.1426384001970291, + "rewards/rejected": -0.15467223525047302, + "step": 52 + }, + { + "epoch": 0.07, + "learning_rate": 9.999733013856752e-08, + "logits/chosen": -3.1936192512512207, + "logits/rejected": -2.9944775104522705, + "logps/chosen": -292.6288757324219, + "logps/rejected": -929.4454345703125, + "loss": 0.848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011485290713608265, + "rewards/margins": 0.3644088804721832, + "rewards/rejected": -0.375894159078598, + "step": 53 + }, + { + "epoch": 0.07, + "learning_rate": 9.999615541459205e-08, + "logits/chosen": -3.1767067909240723, + "logits/rejected": -3.0608229637145996, + "logps/chosen": -266.0338134765625, + "logps/rejected": -802.327392578125, + "loss": 0.849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0717674270272255, + "rewards/margins": 0.4225608706474304, + "rewards/rejected": -0.3507934808731079, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 9.99947671163005e-08, + "logits/chosen": -3.202366828918457, + "logits/rejected": -3.024186134338379, + "logps/chosen": -305.95294189453125, + "logps/rejected": -919.6752319335938, + "loss": 0.8818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0071319579146802425, + "rewards/margins": 0.27996826171875, + "rewards/rejected": -0.287100225687027, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 9.999316524962345e-08, + "logits/chosen": -3.186246871948242, + "logits/rejected": -3.0646610260009766, + "logps/chosen": -277.02239990234375, + "logps/rejected": -390.7312316894531, + "loss": 0.798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02223663404583931, + "rewards/margins": 0.13908232748508453, + "rewards/rejected": -0.11684569716453552, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 9.999134982140374e-08, + "logits/chosen": -3.1924657821655273, + "logits/rejected": -3.029886245727539, + "logps/chosen": -267.8794250488281, + "logps/rejected": -338.04425048828125, + "loss": 0.8427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031280517578125, + "rewards/margins": 0.11741486191749573, + "rewards/rejected": -0.12054291367530823, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 9.998932083939655e-08, + "logits/chosen": -3.2617745399475098, + "logits/rejected": -3.1488914489746094, + "logps/chosen": -282.4534912109375, + "logps/rejected": -626.9369506835938, + "loss": 0.7874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07355804741382599, + "rewards/margins": 0.38763427734375, + "rewards/rejected": -0.3140762448310852, + "step": 58 + }, + { + "epoch": 0.08, + "learning_rate": 9.998707831226934e-08, + "logits/chosen": -3.1994433403015137, + "logits/rejected": -3.0850977897644043, + "logps/chosen": -285.01068115234375, + "logps/rejected": -905.3438720703125, + "loss": 0.7747, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01233520545065403, + "rewards/margins": 0.5060150623321533, + "rewards/rejected": -0.5183502435684204, + "step": 59 + }, + { + "epoch": 0.08, + "learning_rate": 9.998462224960173e-08, + "logits/chosen": -3.217484951019287, + "logits/rejected": -3.0168731212615967, + "logps/chosen": -252.2427520751953, + "logps/rejected": -519.7081298828125, + "loss": 0.8094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.026590729132294655, + "rewards/margins": 0.23439711332321167, + "rewards/rejected": -0.20780639350414276, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 9.99819526618856e-08, + "logits/chosen": -3.2062971591949463, + "logits/rejected": -3.02833890914917, + "logps/chosen": -279.2534484863281, + "logps/rejected": -1459.6044921875, + "loss": 0.8161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03407440334558487, + "rewards/margins": 0.6063522696495056, + "rewards/rejected": -0.572277843952179, + "step": 61 + }, + { + "epoch": 0.08, + "learning_rate": 9.997906956052494e-08, + "logits/chosen": -3.17598295211792, + "logits/rejected": -3.150437593460083, + "logps/chosen": -294.87060546875, + "logps/rejected": -773.4576416015625, + "loss": 0.8492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01239776611328125, + "rewards/margins": 0.4330093264579773, + "rewards/rejected": -0.42061156034469604, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 9.99759729578358e-08, + "logits/chosen": -3.1815438270568848, + "logits/rejected": -3.0886073112487793, + "logps/chosen": -320.5029296875, + "logps/rejected": -785.784912109375, + "loss": 0.8177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03809967264533043, + "rewards/margins": 0.36235809326171875, + "rewards/rejected": -0.3242584466934204, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 9.99726628670463e-08, + "logits/chosen": -3.183877944946289, + "logits/rejected": -3.163311004638672, + "logps/chosen": -280.00445556640625, + "logps/rejected": -743.2929077148438, + "loss": 0.7434, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03443603590130806, + "rewards/margins": 0.42883604764938354, + "rewards/rejected": -0.394400030374527, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 9.996913930229653e-08, + "logits/chosen": -3.2698493003845215, + "logits/rejected": -3.1127243041992188, + "logps/chosen": -284.6041564941406, + "logps/rejected": -1419.257080078125, + "loss": 0.7859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12257537990808487, + "rewards/margins": 0.8045883178710938, + "rewards/rejected": -0.6820129752159119, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 9.996540227863853e-08, + "logits/chosen": -3.1761038303375244, + "logits/rejected": -3.1068661212921143, + "logps/chosen": -265.7035827636719, + "logps/rejected": -736.8728637695312, + "loss": 0.785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.100164033472538, + "rewards/margins": 0.4553337097167969, + "rewards/rejected": -0.3551696836948395, + "step": 66 + }, + { + "epoch": 0.09, + "learning_rate": 9.996145181203615e-08, + "logits/chosen": -3.1764135360717773, + "logits/rejected": -3.083183765411377, + "logps/chosen": -282.3936767578125, + "logps/rejected": -294.25006103515625, + "loss": 0.8043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14238891005516052, + "rewards/margins": 0.3243652582168579, + "rewards/rejected": -0.181976318359375, + "step": 67 + }, + { + "epoch": 0.09, + "learning_rate": 9.995728791936504e-08, + "logits/chosen": -3.171771287918091, + "logits/rejected": -3.1400091648101807, + "logps/chosen": -305.0639343261719, + "logps/rejected": -540.78466796875, + "loss": 0.7861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0929718017578125, + "rewards/margins": 0.5325103998184204, + "rewards/rejected": -0.4395385980606079, + "step": 68 + }, + { + "epoch": 0.09, + "learning_rate": 9.995291061841263e-08, + "logits/chosen": -3.226702928543091, + "logits/rejected": -3.111063003540039, + "logps/chosen": -285.179931640625, + "logps/rejected": -510.65863037109375, + "loss": 0.8043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07011566311120987, + "rewards/margins": 0.3138717710971832, + "rewards/rejected": -0.24375611543655396, + "step": 69 + }, + { + "epoch": 0.09, + "learning_rate": 9.994831992787787e-08, + "logits/chosen": -3.2125039100646973, + "logits/rejected": -3.089855909347534, + "logps/chosen": -271.67071533203125, + "logps/rejected": -827.3577880859375, + "loss": 0.771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07589798420667648, + "rewards/margins": 0.5981117486953735, + "rewards/rejected": -0.5222137570381165, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 9.99435158673714e-08, + "logits/chosen": -3.136058807373047, + "logits/rejected": -3.0803821086883545, + "logps/chosen": -311.8917541503906, + "logps/rejected": -559.4263916015625, + "loss": 0.7408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02620239183306694, + "rewards/margins": 0.3621826171875, + "rewards/rejected": -0.33598023653030396, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 9.993849845741523e-08, + "logits/chosen": -3.1250009536743164, + "logits/rejected": -3.120492696762085, + "logps/chosen": -311.35736083984375, + "logps/rejected": -617.95849609375, + "loss": 0.7948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034698486328125, + "rewards/margins": 0.39517515897750854, + "rewards/rejected": -0.36047667264938354, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 9.993326771944284e-08, + "logits/chosen": -3.2614989280700684, + "logits/rejected": -2.992682456970215, + "logps/chosen": -266.8008117675781, + "logps/rejected": -1057.927978515625, + "loss": 0.7337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12925414741039276, + "rewards/margins": 0.6587082147598267, + "rewards/rejected": -0.5294540524482727, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 9.992782367579898e-08, + "logits/chosen": -3.2696480751037598, + "logits/rejected": -3.079080104827881, + "logps/chosen": -270.3592529296875, + "logps/rejected": -1104.71142578125, + "loss": 0.8107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08394317328929901, + "rewards/margins": 0.5948501825332642, + "rewards/rejected": -0.510906994342804, + "step": 74 + }, + { + "epoch": 0.1, + "learning_rate": 9.992216634973954e-08, + "logits/chosen": -3.2466201782226562, + "logits/rejected": -3.1814823150634766, + "logps/chosen": -242.46347045898438, + "logps/rejected": -806.4146728515625, + "loss": 0.7166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1473030149936676, + "rewards/margins": 0.6134742498397827, + "rewards/rejected": -0.4661712646484375, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 9.991629576543162e-08, + "logits/chosen": -3.2225537300109863, + "logits/rejected": -3.1546361446380615, + "logps/chosen": -269.8409118652344, + "logps/rejected": -585.1063232421875, + "loss": 0.7679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11861725151538849, + "rewards/margins": 0.5199036002159119, + "rewards/rejected": -0.4012863039970398, + "step": 76 + }, + { + "epoch": 0.1, + "learning_rate": 9.991021194795325e-08, + "logits/chosen": -3.2067008018493652, + "logits/rejected": -3.115208387374878, + "logps/chosen": -280.01416015625, + "logps/rejected": -524.9996337890625, + "loss": 0.7947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07481002807617188, + "rewards/margins": 0.45271986722946167, + "rewards/rejected": -0.3779098689556122, + "step": 77 + }, + { + "epoch": 0.1, + "learning_rate": 9.99039149232934e-08, + "logits/chosen": -3.2523365020751953, + "logits/rejected": -3.1055026054382324, + "logps/chosen": -253.22398376464844, + "logps/rejected": -577.3729248046875, + "loss": 0.7132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13011321425437927, + "rewards/margins": 0.6036087274551392, + "rewards/rejected": -0.4734954833984375, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 9.989740471835175e-08, + "logits/chosen": -3.169661521911621, + "logits/rejected": -3.079310894012451, + "logps/chosen": -271.7655944824219, + "logps/rejected": -554.1292724609375, + "loss": 0.7752, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10278931260108948, + "rewards/margins": 0.501751720905304, + "rewards/rejected": -0.39896243810653687, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 9.989068136093872e-08, + "logits/chosen": -3.2336912155151367, + "logits/rejected": -3.132707118988037, + "logps/chosen": -259.81243896484375, + "logps/rejected": -1606.9918212890625, + "loss": 0.6973, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13345031440258026, + "rewards/margins": 1.3930327892303467, + "rewards/rejected": -1.25958251953125, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 9.988374487977524e-08, + "logits/chosen": -3.2029507160186768, + "logits/rejected": -3.09244966506958, + "logps/chosen": -314.7593994140625, + "logps/rejected": -1065.3642578125, + "loss": 0.7336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11187134683132172, + "rewards/margins": 0.663403332233429, + "rewards/rejected": -0.551531970500946, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 9.987659530449266e-08, + "logits/chosen": -3.199596405029297, + "logits/rejected": -3.1096653938293457, + "logps/chosen": -281.1257019042969, + "logps/rejected": -444.63873291015625, + "loss": 0.7078, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14261627197265625, + "rewards/margins": 0.43872833251953125, + "rewards/rejected": -0.296112060546875, + "step": 82 + }, + { + "epoch": 0.11, + "learning_rate": 9.986923266563265e-08, + "logits/chosen": -3.2381019592285156, + "logits/rejected": -3.028777599334717, + "logps/chosen": -284.0218505859375, + "logps/rejected": -1526.9461669921875, + "loss": 0.6627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10843353718519211, + "rewards/margins": 1.131840467453003, + "rewards/rejected": -1.023406982421875, + "step": 83 + }, + { + "epoch": 0.11, + "learning_rate": 9.986165699464705e-08, + "logits/chosen": -3.1655545234680176, + "logits/rejected": -2.9359562397003174, + "logps/chosen": -279.2701110839844, + "logps/rejected": -1057.541015625, + "loss": 0.7059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14710846543312073, + "rewards/margins": 0.8957366943359375, + "rewards/rejected": -0.7486282587051392, + "step": 84 + }, + { + "epoch": 0.11, + "learning_rate": 9.985386832389771e-08, + "logits/chosen": -3.1607260704040527, + "logits/rejected": -3.092021942138672, + "logps/chosen": -234.6072235107422, + "logps/rejected": -447.5625, + "loss": 0.7798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10792236775159836, + "rewards/margins": 0.4702514410018921, + "rewards/rejected": -0.3623290956020355, + "step": 85 + }, + { + "epoch": 0.11, + "learning_rate": 9.98458666866564e-08, + "logits/chosen": -3.3136324882507324, + "logits/rejected": -3.139284133911133, + "logps/chosen": -231.22926330566406, + "logps/rejected": -787.1784057617188, + "loss": 0.6607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.214823916554451, + "rewards/margins": 0.8829711675643921, + "rewards/rejected": -0.6681472659111023, + "step": 86 + }, + { + "epoch": 0.11, + "learning_rate": 9.983765211710463e-08, + "logits/chosen": -3.2421560287475586, + "logits/rejected": -3.006103992462158, + "logps/chosen": -287.4139404296875, + "logps/rejected": -1959.246826171875, + "loss": 0.6523, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10525665432214737, + "rewards/margins": 1.5860306024551392, + "rewards/rejected": -1.48077392578125, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 9.982922465033349e-08, + "logits/chosen": -3.2269139289855957, + "logits/rejected": -3.0324316024780273, + "logps/chosen": -220.0435028076172, + "logps/rejected": -381.56866455078125, + "loss": 0.6713, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10450515896081924, + "rewards/margins": 0.3902549743652344, + "rewards/rejected": -0.2857498228549957, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 9.98205843223436e-08, + "logits/chosen": -3.1761178970336914, + "logits/rejected": -3.053684949874878, + "logps/chosen": -325.59991455078125, + "logps/rejected": -616.6306762695312, + "loss": 0.7275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07862702012062073, + "rewards/margins": 0.6512802243232727, + "rewards/rejected": -0.5726531744003296, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 9.981173117004483e-08, + "logits/chosen": -3.1650190353393555, + "logits/rejected": -3.0751547813415527, + "logps/chosen": -260.510986328125, + "logps/rejected": -1072.6854248046875, + "loss": 0.6866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13109131157398224, + "rewards/margins": 0.8797866702079773, + "rewards/rejected": -0.7486953735351562, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 9.980266523125618e-08, + "logits/chosen": -3.165071487426758, + "logits/rejected": -3.085299491882324, + "logps/chosen": -248.90084838867188, + "logps/rejected": -615.3164672851562, + "loss": 0.6517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1541183441877365, + "rewards/margins": 0.6969253420829773, + "rewards/rejected": -0.5428069829940796, + "step": 91 + }, + { + "epoch": 0.12, + "learning_rate": 9.979338654470568e-08, + "logits/chosen": -3.2059860229492188, + "logits/rejected": -3.1472020149230957, + "logps/chosen": -259.24945068359375, + "logps/rejected": -511.408935546875, + "loss": 0.7187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10056228935718536, + "rewards/margins": 0.6428810358047485, + "rewards/rejected": -0.5423187017440796, + "step": 92 + }, + { + "epoch": 0.12, + "learning_rate": 9.978389515003019e-08, + "logits/chosen": -3.1962389945983887, + "logits/rejected": -3.0831127166748047, + "logps/chosen": -257.1407165527344, + "logps/rejected": -645.3926391601562, + "loss": 0.6768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10799102485179901, + "rewards/margins": 0.9336516857147217, + "rewards/rejected": -0.8256607055664062, + "step": 93 + }, + { + "epoch": 0.12, + "learning_rate": 9.977419108777513e-08, + "logits/chosen": -3.2097105979919434, + "logits/rejected": -3.088120698928833, + "logps/chosen": -253.2802276611328, + "logps/rejected": -352.763916015625, + "loss": 0.7522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09417800605297089, + "rewards/margins": 0.41408005356788635, + "rewards/rejected": -0.31990206241607666, + "step": 94 + }, + { + "epoch": 0.12, + "learning_rate": 9.976427439939452e-08, + "logits/chosen": -3.1437268257141113, + "logits/rejected": -3.077453374862671, + "logps/chosen": -282.72711181640625, + "logps/rejected": -481.770751953125, + "loss": 0.6349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.131571963429451, + "rewards/margins": 0.4720596373081207, + "rewards/rejected": -0.34048768877983093, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 9.975414512725057e-08, + "logits/chosen": -3.2068982124328613, + "logits/rejected": -3.064347743988037, + "logps/chosen": -269.8858337402344, + "logps/rejected": -601.646484375, + "loss": 0.7404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16364747285842896, + "rewards/margins": 0.6702987551689148, + "rewards/rejected": -0.5066512823104858, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 9.974380331461367e-08, + "logits/chosen": -3.2277512550354004, + "logits/rejected": -3.0628695487976074, + "logps/chosen": -266.51727294921875, + "logps/rejected": -460.13629150390625, + "loss": 0.6687, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21073302626609802, + "rewards/margins": 0.6071014404296875, + "rewards/rejected": -0.3963684141635895, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 9.973324900566212e-08, + "logits/chosen": -3.160217761993408, + "logits/rejected": -3.09956693649292, + "logps/chosen": -274.408447265625, + "logps/rejected": -736.2503662109375, + "loss": 0.6831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2348373532295227, + "rewards/margins": 0.9842514395713806, + "rewards/rejected": -0.7494140863418579, + "step": 98 + }, + { + "epoch": 0.13, + "learning_rate": 9.972248224548199e-08, + "logits/chosen": -3.201866626739502, + "logits/rejected": -3.096522808074951, + "logps/chosen": -289.00732421875, + "logps/rejected": -525.35302734375, + "loss": 0.6246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11997681111097336, + "rewards/margins": 0.6957962512969971, + "rewards/rejected": -0.5758193731307983, + "step": 99 + }, + { + "epoch": 0.13, + "learning_rate": 9.971150308006688e-08, + "logits/chosen": -3.2037835121154785, + "logits/rejected": -3.1464905738830566, + "logps/chosen": -284.58319091796875, + "logps/rejected": -782.2645874023438, + "loss": 0.5623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.136424258351326, + "rewards/margins": 0.9927139282226562, + "rewards/rejected": -0.8562896847724915, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 9.970031155631774e-08, + "logits/chosen": -3.1880202293395996, + "logits/rejected": -3.0587856769561768, + "logps/chosen": -231.9755859375, + "logps/rejected": -843.9732666015625, + "loss": 0.6085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1641128659248352, + "rewards/margins": 1.0662704706192017, + "rewards/rejected": -0.9021576046943665, + "step": 101 + }, + { + "epoch": 0.13, + "learning_rate": 9.968890772204271e-08, + "logits/chosen": -3.230224847793579, + "logits/rejected": -3.138334274291992, + "logps/chosen": -259.01763916015625, + "logps/rejected": -551.449462890625, + "loss": 0.6503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17834550142288208, + "rewards/margins": 0.7734062671661377, + "rewards/rejected": -0.5950607061386108, + "step": 102 + }, + { + "epoch": 0.13, + "learning_rate": 9.967729162595682e-08, + "logits/chosen": -3.1852293014526367, + "logits/rejected": -3.1402664184570312, + "logps/chosen": -277.75213623046875, + "logps/rejected": -676.699462890625, + "loss": 0.6462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22199097275733948, + "rewards/margins": 1.2156769037246704, + "rewards/rejected": -0.9936859607696533, + "step": 103 + }, + { + "epoch": 0.13, + "learning_rate": 9.96654633176819e-08, + "logits/chosen": -3.2243077754974365, + "logits/rejected": -3.12252140045166, + "logps/chosen": -282.6531982421875, + "logps/rejected": -1416.501220703125, + "loss": 0.5756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14244231581687927, + "rewards/margins": 1.602299451828003, + "rewards/rejected": -1.4598572254180908, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 9.965342284774632e-08, + "logits/chosen": -3.247711181640625, + "logits/rejected": -3.153960704803467, + "logps/chosen": -278.6795654296875, + "logps/rejected": -654.02099609375, + "loss": 0.6542, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11280670762062073, + "rewards/margins": 0.9271713495254517, + "rewards/rejected": -0.8143646717071533, + "step": 105 + }, + { + "epoch": 0.14, + "learning_rate": 9.964117026758469e-08, + "logits/chosen": -3.239744186401367, + "logits/rejected": -3.1258089542388916, + "logps/chosen": -261.47900390625, + "logps/rejected": -463.5251770019531, + "loss": 0.6447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17209625244140625, + "rewards/margins": 0.6465332508087158, + "rewards/rejected": -0.4744369387626648, + "step": 106 + }, + { + "epoch": 0.14, + "learning_rate": 9.96287056295378e-08, + "logits/chosen": -3.1938138008117676, + "logits/rejected": -3.1662421226501465, + "logps/chosen": -290.7647705078125, + "logps/rejected": -686.7113647460938, + "loss": 0.5691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25023192167282104, + "rewards/margins": 1.246826171875, + "rewards/rejected": -0.996594250202179, + "step": 107 + }, + { + "epoch": 0.14, + "learning_rate": 9.961602898685224e-08, + "logits/chosen": -3.2110204696655273, + "logits/rejected": -3.0666472911834717, + "logps/chosen": -258.8492126464844, + "logps/rejected": -1062.20703125, + "loss": 0.6215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23620986938476562, + "rewards/margins": 1.5008338689804077, + "rewards/rejected": -1.264623999595642, + "step": 108 + }, + { + "epoch": 0.14, + "learning_rate": 9.960314039368032e-08, + "logits/chosen": -3.210683822631836, + "logits/rejected": -3.0614748001098633, + "logps/chosen": -247.9495849609375, + "logps/rejected": -355.55047607421875, + "loss": 0.6268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1479850858449936, + "rewards/margins": 0.5364036560058594, + "rewards/rejected": -0.388418585062027, + "step": 109 + }, + { + "epoch": 0.14, + "learning_rate": 9.959003990507971e-08, + "logits/chosen": -3.1105692386627197, + "logits/rejected": -3.07321834564209, + "logps/chosen": -266.68585205078125, + "logps/rejected": -1147.29052734375, + "loss": 0.54, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21409912407398224, + "rewards/margins": 1.6652679443359375, + "rewards/rejected": -1.4511687755584717, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 9.957672757701325e-08, + "logits/chosen": -3.251760482788086, + "logits/rejected": -3.0415406227111816, + "logps/chosen": -268.53240966796875, + "logps/rejected": -493.2273254394531, + "loss": 0.5852, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19534912705421448, + "rewards/margins": 0.6813934445381165, + "rewards/rejected": -0.486044317483902, + "step": 111 + }, + { + "epoch": 0.14, + "learning_rate": 9.956320346634876e-08, + "logits/chosen": -3.194080352783203, + "logits/rejected": -3.129650592803955, + "logps/chosen": -294.19061279296875, + "logps/rejected": -423.96368408203125, + "loss": 0.6164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23742979764938354, + "rewards/margins": 0.5286300778388977, + "rewards/rejected": -0.29120028018951416, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 9.954946763085872e-08, + "logits/chosen": -3.164581775665283, + "logits/rejected": -3.106788158416748, + "logps/chosen": -272.8618469238281, + "logps/rejected": -421.9893493652344, + "loss": 0.6435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.239665225148201, + "rewards/margins": 0.7452530264854431, + "rewards/rejected": -0.5055878162384033, + "step": 113 + }, + { + "epoch": 0.15, + "learning_rate": 9.953552012922011e-08, + "logits/chosen": -3.090869426727295, + "logits/rejected": -3.1295149326324463, + "logps/chosen": -309.06646728515625, + "logps/rejected": -478.02374267578125, + "loss": 0.6493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.243896484375, + "rewards/margins": 0.7292068600654602, + "rewards/rejected": -0.4853103756904602, + "step": 114 + }, + { + "epoch": 0.15, + "learning_rate": 9.952136102101402e-08, + "logits/chosen": -3.241001605987549, + "logits/rejected": -3.1297473907470703, + "logps/chosen": -318.3747253417969, + "logps/rejected": -888.8642578125, + "loss": 0.612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17977295815944672, + "rewards/margins": 1.1820921897888184, + "rewards/rejected": -1.0023193359375, + "step": 115 + }, + { + "epoch": 0.15, + "learning_rate": 9.950699036672558e-08, + "logits/chosen": -3.2643814086914062, + "logits/rejected": -3.101897716522217, + "logps/chosen": -293.7010192871094, + "logps/rejected": -1214.974609375, + "loss": 0.6435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13135680556297302, + "rewards/margins": 1.6106293201446533, + "rewards/rejected": -1.479272484779358, + "step": 116 + }, + { + "epoch": 0.15, + "learning_rate": 9.949240822774356e-08, + "logits/chosen": -3.248342275619507, + "logits/rejected": -3.078291416168213, + "logps/chosen": -262.1155700683594, + "logps/rejected": -591.5318603515625, + "loss": 0.5753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19619140028953552, + "rewards/margins": 0.9688034057617188, + "rewards/rejected": -0.7726119756698608, + "step": 117 + }, + { + "epoch": 0.15, + "learning_rate": 9.947761466636013e-08, + "logits/chosen": -3.2115421295166016, + "logits/rejected": -3.072525978088379, + "logps/chosen": -269.2483215332031, + "logps/rejected": -443.0014953613281, + "loss": 0.6958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1600540280342102, + "rewards/margins": 0.5699356198310852, + "rewards/rejected": -0.409881591796875, + "step": 118 + }, + { + "epoch": 0.15, + "learning_rate": 9.946260974577068e-08, + "logits/chosen": -3.2777202129364014, + "logits/rejected": -3.073972463607788, + "logps/chosen": -245.99383544921875, + "logps/rejected": -796.1928100585938, + "loss": 0.4675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24498367309570312, + "rewards/margins": 1.1427109241485596, + "rewards/rejected": -0.8977272510528564, + "step": 119 + }, + { + "epoch": 0.15, + "learning_rate": 9.944739353007342e-08, + "logits/chosen": -3.177346706390381, + "logits/rejected": -3.147524356842041, + "logps/chosen": -312.6208801269531, + "logps/rejected": -684.0214233398438, + "loss": 0.5865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2543289065361023, + "rewards/margins": 1.460310459136963, + "rewards/rejected": -1.2059814929962158, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 9.943196608426923e-08, + "logits/chosen": -3.2449841499328613, + "logits/rejected": -3.0008444786071777, + "logps/chosen": -222.39759826660156, + "logps/rejected": -1243.8115234375, + "loss": 0.5442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2198539674282074, + "rewards/margins": 1.6731011867523193, + "rewards/rejected": -1.4532470703125, + "step": 121 + }, + { + "epoch": 0.16, + "learning_rate": 9.941632747426128e-08, + "logits/chosen": -3.2468159198760986, + "logits/rejected": -3.1973018646240234, + "logps/chosen": -303.6324157714844, + "logps/rejected": -782.1910400390625, + "loss": 0.6288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06877747178077698, + "rewards/margins": 1.035070776939392, + "rewards/rejected": -0.9662933349609375, + "step": 122 + }, + { + "epoch": 0.16, + "learning_rate": 9.940047776685483e-08, + "logits/chosen": -3.243259906768799, + "logits/rejected": -3.151745319366455, + "logps/chosen": -265.5019836425781, + "logps/rejected": -1204.794921875, + "loss": 0.549, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1705528199672699, + "rewards/margins": 2.0877466201782227, + "rewards/rejected": -1.9171936511993408, + "step": 123 + }, + { + "epoch": 0.16, + "learning_rate": 9.938441702975688e-08, + "logits/chosen": -3.2389540672302246, + "logits/rejected": -3.0693516731262207, + "logps/chosen": -256.31536865234375, + "logps/rejected": -402.4072265625, + "loss": 0.6647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17390747368335724, + "rewards/margins": 0.5038040280342102, + "rewards/rejected": -0.32989656925201416, + "step": 124 + }, + { + "epoch": 0.16, + "learning_rate": 9.936814533157593e-08, + "logits/chosen": -3.208456516265869, + "logits/rejected": -2.98256778717041, + "logps/chosen": -255.40550231933594, + "logps/rejected": -623.2733154296875, + "loss": 0.6518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12956543266773224, + "rewards/margins": 0.6958023309707642, + "rewards/rejected": -0.5662368535995483, + "step": 125 + }, + { + "epoch": 0.16, + "learning_rate": 9.93516627418217e-08, + "logits/chosen": -3.227449893951416, + "logits/rejected": -3.1592659950256348, + "logps/chosen": -251.93746948242188, + "logps/rejected": -676.3363037109375, + "loss": 0.5501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16344603896141052, + "rewards/margins": 1.1991760730743408, + "rewards/rejected": -1.035730004310608, + "step": 126 + }, + { + "epoch": 0.16, + "learning_rate": 9.93349693309047e-08, + "logits/chosen": -3.1886425018310547, + "logits/rejected": -3.056748867034912, + "logps/chosen": -284.7598571777344, + "logps/rejected": -529.6925048828125, + "loss": 0.5677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25884246826171875, + "rewards/margins": 0.9355255365371704, + "rewards/rejected": -0.6766830682754517, + "step": 127 + }, + { + "epoch": 0.16, + "learning_rate": 9.931806517013611e-08, + "logits/chosen": -3.2468912601470947, + "logits/rejected": -3.0396924018859863, + "logps/chosen": -267.09564208984375, + "logps/rejected": -760.130126953125, + "loss": 0.566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22487182915210724, + "rewards/margins": 1.2168610095977783, + "rewards/rejected": -0.9919891357421875, + "step": 128 + }, + { + "epoch": 0.16, + "learning_rate": 9.930095033172738e-08, + "logits/chosen": -3.2426605224609375, + "logits/rejected": -3.1436939239501953, + "logps/chosen": -280.37567138671875, + "logps/rejected": -552.4033203125, + "loss": 0.5691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3011840879917145, + "rewards/margins": 1.3328766822814941, + "rewards/rejected": -1.0316925048828125, + "step": 129 + }, + { + "epoch": 0.17, + "learning_rate": 9.928362488878995e-08, + "logits/chosen": -3.1615662574768066, + "logits/rejected": -3.0224289894104004, + "logps/chosen": -292.93927001953125, + "logps/rejected": -878.30029296875, + "loss": 0.532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14185333251953125, + "rewards/margins": 1.2912917137145996, + "rewards/rejected": -1.149438500404358, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 9.926608891533486e-08, + "logits/chosen": -3.247861862182617, + "logits/rejected": -3.079556465148926, + "logps/chosen": -250.73573303222656, + "logps/rejected": -771.010498046875, + "loss": 0.5698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27498167753219604, + "rewards/margins": 1.4379425048828125, + "rewards/rejected": -1.1629607677459717, + "step": 131 + }, + { + "epoch": 0.17, + "learning_rate": 9.924834248627259e-08, + "logits/chosen": -3.219128131866455, + "logits/rejected": -3.074057102203369, + "logps/chosen": -275.6302490234375, + "logps/rejected": -482.7510986328125, + "loss": 0.5715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22872620820999146, + "rewards/margins": 1.0031509399414062, + "rewards/rejected": -0.7744247913360596, + "step": 132 + }, + { + "epoch": 0.17, + "learning_rate": 9.923038567741255e-08, + "logits/chosen": -3.2520318031311035, + "logits/rejected": -3.056931495666504, + "logps/chosen": -286.5071716308594, + "logps/rejected": -480.0301818847656, + "loss": 0.5817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23899993300437927, + "rewards/margins": 1.0568360090255737, + "rewards/rejected": -0.8178360462188721, + "step": 133 + }, + { + "epoch": 0.17, + "learning_rate": 9.921221856546293e-08, + "logits/chosen": -3.1083152294158936, + "logits/rejected": -3.0445289611816406, + "logps/chosen": -271.7095947265625, + "logps/rejected": -473.6827697753906, + "loss": 0.5177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3998405337333679, + "rewards/margins": 0.9884514212608337, + "rewards/rejected": -0.588610827922821, + "step": 134 + }, + { + "epoch": 0.17, + "learning_rate": 9.919384122803026e-08, + "logits/chosen": -3.2525997161865234, + "logits/rejected": -3.1064953804016113, + "logps/chosen": -302.04107666015625, + "logps/rejected": -718.86767578125, + "loss": 0.5191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21579742431640625, + "rewards/margins": 1.4499282836914062, + "rewards/rejected": -1.234130859375, + "step": 135 + }, + { + "epoch": 0.17, + "learning_rate": 9.917525374361911e-08, + "logits/chosen": -3.213495969772339, + "logits/rejected": -3.051905870437622, + "logps/chosen": -303.1468811035156, + "logps/rejected": -1091.150634765625, + "loss": 0.5329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28872835636138916, + "rewards/margins": 1.7222061157226562, + "rewards/rejected": -1.433477759361267, + "step": 136 + }, + { + "epoch": 0.17, + "learning_rate": 9.91564561916318e-08, + "logits/chosen": -3.1949124336242676, + "logits/rejected": -3.1221063137054443, + "logps/chosen": -253.89834594726562, + "logps/rejected": -642.7247314453125, + "loss": 0.4993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19946594536304474, + "rewards/margins": 1.215765357017517, + "rewards/rejected": -1.0162994861602783, + "step": 137 + }, + { + "epoch": 0.18, + "learning_rate": 9.913744865236797e-08, + "logits/chosen": -3.2202677726745605, + "logits/rejected": -3.0765042304992676, + "logps/chosen": -247.27407836914062, + "logps/rejected": -668.519287109375, + "loss": 0.5345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24391022324562073, + "rewards/margins": 1.066328525543213, + "rewards/rejected": -0.822418212890625, + "step": 138 + }, + { + "epoch": 0.18, + "learning_rate": 9.911823120702431e-08, + "logits/chosen": -3.2298240661621094, + "logits/rejected": -3.1098361015319824, + "logps/chosen": -246.12326049804688, + "logps/rejected": -625.9581298828125, + "loss": 0.5221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18134918808937073, + "rewards/margins": 1.2459275722503662, + "rewards/rejected": -1.0645782947540283, + "step": 139 + }, + { + "epoch": 0.18, + "learning_rate": 9.909880393769419e-08, + "logits/chosen": -3.222661256790161, + "logits/rejected": -3.041456699371338, + "logps/chosen": -291.0609436035156, + "logps/rejected": -938.2486572265625, + "loss": 0.5916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18115845322608948, + "rewards/margins": 1.4888489246368408, + "rewards/rejected": -1.3076903820037842, + "step": 140 + }, + { + "epoch": 0.18, + "learning_rate": 9.907916692736729e-08, + "logits/chosen": -3.2472496032714844, + "logits/rejected": -3.1287484169006348, + "logps/chosen": -280.4447326660156, + "logps/rejected": -503.16302490234375, + "loss": 0.5606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17201538383960724, + "rewards/margins": 1.07867431640625, + "rewards/rejected": -0.9066588878631592, + "step": 141 + }, + { + "epoch": 0.18, + "learning_rate": 9.905932025992931e-08, + "logits/chosen": -3.179827928543091, + "logits/rejected": -3.113107919692993, + "logps/chosen": -268.305908203125, + "logps/rejected": -384.58099365234375, + "loss": 0.5406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18470841646194458, + "rewards/margins": 0.8244522213935852, + "rewards/rejected": -0.6397438049316406, + "step": 142 + }, + { + "epoch": 0.18, + "learning_rate": 9.903926402016151e-08, + "logits/chosen": -3.239659309387207, + "logits/rejected": -3.0796966552734375, + "logps/chosen": -274.66326904296875, + "logps/rejected": -390.9772033691406, + "loss": 0.5819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2205306887626648, + "rewards/margins": 0.7012740969657898, + "rewards/rejected": -0.480743408203125, + "step": 143 + }, + { + "epoch": 0.18, + "learning_rate": 9.901899829374047e-08, + "logits/chosen": -3.2654917240142822, + "logits/rejected": -3.099322557449341, + "logps/chosen": -270.8184814453125, + "logps/rejected": -1717.161376953125, + "loss": 0.4448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1869461089372635, + "rewards/margins": 2.8116347789764404, + "rewards/rejected": -2.6246886253356934, + "step": 144 + }, + { + "epoch": 0.18, + "learning_rate": 9.899852316723758e-08, + "logits/chosen": -3.248325824737549, + "logits/rejected": -3.0615110397338867, + "logps/chosen": -265.6180114746094, + "logps/rejected": -586.982666015625, + "loss": 0.5172, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20800094306468964, + "rewards/margins": 1.1657295227050781, + "rewards/rejected": -0.9577286243438721, + "step": 145 + }, + { + "epoch": 0.19, + "learning_rate": 9.89778387281188e-08, + "logits/chosen": -3.233670711517334, + "logits/rejected": -3.1113529205322266, + "logps/chosen": -250.3822021484375, + "logps/rejected": -579.1903076171875, + "loss": 0.57, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.214183047413826, + "rewards/margins": 1.2480881214141846, + "rewards/rejected": -1.033905029296875, + "step": 146 + }, + { + "epoch": 0.19, + "learning_rate": 9.895694506474422e-08, + "logits/chosen": -3.2114858627319336, + "logits/rejected": -3.144461154937744, + "logps/chosen": -269.06134033203125, + "logps/rejected": -777.7671508789062, + "loss": 0.5401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22104796767234802, + "rewards/margins": 1.366236925125122, + "rewards/rejected": -1.1451889276504517, + "step": 147 + }, + { + "epoch": 0.19, + "learning_rate": 9.893584226636772e-08, + "logits/chosen": -3.2535901069641113, + "logits/rejected": -3.099276065826416, + "logps/chosen": -278.61517333984375, + "logps/rejected": -1753.6534423828125, + "loss": 0.4756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2865997552871704, + "rewards/margins": 3.022418260574341, + "rewards/rejected": -2.735818386077881, + "step": 148 + }, + { + "epoch": 0.19, + "learning_rate": 9.891453042313654e-08, + "logits/chosen": -3.181734323501587, + "logits/rejected": -3.036571979522705, + "logps/chosen": -283.84405517578125, + "logps/rejected": -568.04296875, + "loss": 0.5588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23611298203468323, + "rewards/margins": 1.23192298412323, + "rewards/rejected": -0.9958099722862244, + "step": 149 + }, + { + "epoch": 0.19, + "learning_rate": 9.889300962609089e-08, + "logits/chosen": -3.195742607116699, + "logits/rejected": -3.1205122470855713, + "logps/chosen": -264.334716796875, + "logps/rejected": -565.1089477539062, + "loss": 0.4925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3145751953125, + "rewards/margins": 1.1852936744689941, + "rewards/rejected": -0.8707184195518494, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 9.887127996716368e-08, + "logits/chosen": -3.2708187103271484, + "logits/rejected": -3.0733423233032227, + "logps/chosen": -246.65176391601562, + "logps/rejected": -334.5085754394531, + "loss": 0.5879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25448915362358093, + "rewards/margins": 0.7974258661270142, + "rewards/rejected": -0.5429366827011108, + "step": 151 + }, + { + "epoch": 0.19, + "learning_rate": 9.884934153917997e-08, + "logits/chosen": -3.184823513031006, + "logits/rejected": -3.0961954593658447, + "logps/chosen": -276.26141357421875, + "logps/rejected": -841.406005859375, + "loss": 0.4475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28274765610694885, + "rewards/margins": 1.8017175197601318, + "rewards/rejected": -1.5189697742462158, + "step": 152 + }, + { + "epoch": 0.2, + "learning_rate": 9.882719443585664e-08, + "logits/chosen": -3.2115273475646973, + "logits/rejected": -3.1466097831726074, + "logps/chosen": -260.9636535644531, + "logps/rejected": -838.67333984375, + "loss": 0.4838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23430481553077698, + "rewards/margins": 1.6442382335662842, + "rewards/rejected": -1.4099334478378296, + "step": 153 + }, + { + "epoch": 0.2, + "learning_rate": 9.880483875180204e-08, + "logits/chosen": -3.143486976623535, + "logits/rejected": -3.0724308490753174, + "logps/chosen": -278.4436340332031, + "logps/rejected": -460.38214111328125, + "loss": 0.5293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36572569608688354, + "rewards/margins": 1.0957932472229004, + "rewards/rejected": -0.7300674915313721, + "step": 154 + }, + { + "epoch": 0.2, + "learning_rate": 9.87822745825155e-08, + "logits/chosen": -3.225785732269287, + "logits/rejected": -3.170255184173584, + "logps/chosen": -265.0966491699219, + "logps/rejected": -785.968505859375, + "loss": 0.4707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22892609238624573, + "rewards/margins": 1.5027389526367188, + "rewards/rejected": -1.2738128900527954, + "step": 155 + }, + { + "epoch": 0.2, + "learning_rate": 9.875950202438699e-08, + "logits/chosen": -3.1921072006225586, + "logits/rejected": -3.0408854484558105, + "logps/chosen": -276.91455078125, + "logps/rejected": -944.6664428710938, + "loss": 0.4668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38026124238967896, + "rewards/margins": 1.8744019269943237, + "rewards/rejected": -1.494140625, + "step": 156 + }, + { + "epoch": 0.2, + "learning_rate": 9.873652117469663e-08, + "logits/chosen": -3.204911470413208, + "logits/rejected": -3.0389461517333984, + "logps/chosen": -266.77593994140625, + "logps/rejected": -577.806640625, + "loss": 0.5499, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3053573668003082, + "rewards/margins": 1.4305343627929688, + "rewards/rejected": -1.125177025794983, + "step": 157 + }, + { + "epoch": 0.2, + "learning_rate": 9.871333213161437e-08, + "logits/chosen": -3.213468074798584, + "logits/rejected": -3.0189995765686035, + "logps/chosen": -334.8372497558594, + "logps/rejected": -615.3815307617188, + "loss": 0.5714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07346954196691513, + "rewards/margins": 1.0463242530822754, + "rewards/rejected": -0.9728546142578125, + "step": 158 + }, + { + "epoch": 0.2, + "learning_rate": 9.868993499419951e-08, + "logits/chosen": -3.267897129058838, + "logits/rejected": -3.059178352355957, + "logps/chosen": -257.0511474609375, + "logps/rejected": -469.9270935058594, + "loss": 0.5261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28245848417282104, + "rewards/margins": 1.0313873291015625, + "rewards/rejected": -0.7489288449287415, + "step": 159 + }, + { + "epoch": 0.2, + "learning_rate": 9.866632986240029e-08, + "logits/chosen": -3.1535983085632324, + "logits/rejected": -3.0448060035705566, + "logps/chosen": -300.3434143066406, + "logps/rejected": -672.9609985351562, + "loss": 0.6046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3080795407295227, + "rewards/margins": 1.2571029663085938, + "rewards/rejected": -0.9490234851837158, + "step": 160 + }, + { + "epoch": 0.21, + "learning_rate": 9.864251683705345e-08, + "logits/chosen": -3.169032335281372, + "logits/rejected": -3.132946729660034, + "logps/chosen": -286.8828125, + "logps/rejected": -518.8457641601562, + "loss": 0.4715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.302175909280777, + "rewards/margins": 1.3608673810958862, + "rewards/rejected": -1.058691382408142, + "step": 161 + }, + { + "epoch": 0.21, + "learning_rate": 9.861849601988382e-08, + "logits/chosen": -3.2565698623657227, + "logits/rejected": -3.1206250190734863, + "logps/chosen": -268.0730895996094, + "logps/rejected": -506.7103576660156, + "loss": 0.4845, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4218429625034332, + "rewards/margins": 1.3583542108535767, + "rewards/rejected": -0.9365112781524658, + "step": 162 + }, + { + "epoch": 0.21, + "learning_rate": 9.859426751350391e-08, + "logits/chosen": -3.207357883453369, + "logits/rejected": -3.058323383331299, + "logps/chosen": -267.0477294921875, + "logps/rejected": -395.2390441894531, + "loss": 0.5258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2816208004951477, + "rewards/margins": 1.0184006690979004, + "rewards/rejected": -0.7367798089981079, + "step": 163 + }, + { + "epoch": 0.21, + "learning_rate": 9.856983142141337e-08, + "logits/chosen": -3.2393784523010254, + "logits/rejected": -3.144376754760742, + "logps/chosen": -222.49801635742188, + "logps/rejected": -345.9996032714844, + "loss": 0.5386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24199828505516052, + "rewards/margins": 0.8463333249092102, + "rewards/rejected": -0.6043350696563721, + "step": 164 + }, + { + "epoch": 0.21, + "learning_rate": 9.85451878479987e-08, + "logits/chosen": -3.2603158950805664, + "logits/rejected": -3.109013557434082, + "logps/chosen": -274.7796936035156, + "logps/rejected": -264.59600830078125, + "loss": 0.5611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3210617005825043, + "rewards/margins": 0.7324180603027344, + "rewards/rejected": -0.4113563597202301, + "step": 165 + }, + { + "epoch": 0.21, + "learning_rate": 9.852033689853267e-08, + "logits/chosen": -3.223738670349121, + "logits/rejected": -3.0806682109832764, + "logps/chosen": -260.593505859375, + "logps/rejected": -737.2300415039062, + "loss": 0.5274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3524765074253082, + "rewards/margins": 1.7037644386291504, + "rewards/rejected": -1.351287841796875, + "step": 166 + }, + { + "epoch": 0.21, + "learning_rate": 9.849527867917391e-08, + "logits/chosen": -3.2088708877563477, + "logits/rejected": -3.1032683849334717, + "logps/chosen": -287.3129577636719, + "logps/rejected": -491.2164306640625, + "loss": 0.4969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2938888669013977, + "rewards/margins": 1.3999786376953125, + "rewards/rejected": -1.1060898303985596, + "step": 167 + }, + { + "epoch": 0.21, + "learning_rate": 9.847001329696651e-08, + "logits/chosen": -3.2338943481445312, + "logits/rejected": -3.0949904918670654, + "logps/chosen": -260.27093505859375, + "logps/rejected": -3349.701171875, + "loss": 0.5043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31128236651420593, + "rewards/margins": 3.8713576793670654, + "rewards/rejected": -3.560075283050537, + "step": 168 + }, + { + "epoch": 0.22, + "learning_rate": 9.844454085983951e-08, + "logits/chosen": -3.149369955062866, + "logits/rejected": -3.0459909439086914, + "logps/chosen": -264.021728515625, + "logps/rejected": -628.3539428710938, + "loss": 0.4455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39017945528030396, + "rewards/margins": 1.4119873046875, + "rewards/rejected": -1.0218079090118408, + "step": 169 + }, + { + "epoch": 0.22, + "learning_rate": 9.841886147660644e-08, + "logits/chosen": -3.172433376312256, + "logits/rejected": -3.0432538986206055, + "logps/chosen": -298.2574462890625, + "logps/rejected": -516.0426025390625, + "loss": 0.5029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24589692056179047, + "rewards/margins": 1.2851914167404175, + "rewards/rejected": -1.0392944812774658, + "step": 170 + }, + { + "epoch": 0.22, + "learning_rate": 9.839297525696486e-08, + "logits/chosen": -3.2448902130126953, + "logits/rejected": -3.0848453044891357, + "logps/chosen": -226.3628387451172, + "logps/rejected": -901.438720703125, + "loss": 0.5123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.313864141702652, + "rewards/margins": 1.852778673171997, + "rewards/rejected": -1.5389145612716675, + "step": 171 + }, + { + "epoch": 0.22, + "learning_rate": 9.836688231149591e-08, + "logits/chosen": -3.3046979904174805, + "logits/rejected": -3.0704832077026367, + "logps/chosen": -269.2979736328125, + "logps/rejected": -1023.6092529296875, + "loss": 0.5366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23932114243507385, + "rewards/margins": 2.151400089263916, + "rewards/rejected": -1.912078857421875, + "step": 172 + }, + { + "epoch": 0.22, + "learning_rate": 9.834058275166383e-08, + "logits/chosen": -3.258103370666504, + "logits/rejected": -3.1752169132232666, + "logps/chosen": -273.9762268066406, + "logps/rejected": -789.5421142578125, + "loss": 0.4635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37164002656936646, + "rewards/margins": 1.9648804664611816, + "rewards/rejected": -1.5932403802871704, + "step": 173 + }, + { + "epoch": 0.22, + "learning_rate": 9.831407668981545e-08, + "logits/chosen": -3.139047622680664, + "logits/rejected": -3.067471742630005, + "logps/chosen": -289.52545166015625, + "logps/rejected": -834.518798828125, + "loss": 0.5443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24037933349609375, + "rewards/margins": 1.3973863124847412, + "rewards/rejected": -1.157006859779358, + "step": 174 + }, + { + "epoch": 0.22, + "learning_rate": 9.828736423917977e-08, + "logits/chosen": -3.2231204509735107, + "logits/rejected": -3.1439108848571777, + "logps/chosen": -291.41204833984375, + "logps/rejected": -580.0955200195312, + "loss": 0.4754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24271240830421448, + "rewards/margins": 1.507807970046997, + "rewards/rejected": -1.265095591545105, + "step": 175 + }, + { + "epoch": 0.22, + "learning_rate": 9.826044551386743e-08, + "logits/chosen": -3.20489501953125, + "logits/rejected": -3.145662307739258, + "logps/chosen": -260.7786560058594, + "logps/rejected": -633.7889404296875, + "loss": 0.5646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2912399172782898, + "rewards/margins": 1.4832322597503662, + "rewards/rejected": -1.1919922828674316, + "step": 176 + }, + { + "epoch": 0.23, + "learning_rate": 9.823332062887024e-08, + "logits/chosen": -3.210176706314087, + "logits/rejected": -2.9391188621520996, + "logps/chosen": -241.83291625976562, + "logps/rejected": -411.1343078613281, + "loss": 0.4918, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41097337007522583, + "rewards/margins": 1.0000312328338623, + "rewards/rejected": -0.5890579223632812, + "step": 177 + }, + { + "epoch": 0.23, + "learning_rate": 9.820598970006067e-08, + "logits/chosen": -3.2345714569091797, + "logits/rejected": -3.0980710983276367, + "logps/chosen": -274.321533203125, + "logps/rejected": -1033.1317138671875, + "loss": 0.4404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23038789629936218, + "rewards/margins": 2.1081650257110596, + "rewards/rejected": -1.877777099609375, + "step": 178 + }, + { + "epoch": 0.23, + "learning_rate": 9.817845284419142e-08, + "logits/chosen": -3.245854139328003, + "logits/rejected": -3.1518614292144775, + "logps/chosen": -283.2853088378906, + "logps/rejected": -731.5399169921875, + "loss": 0.5099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23818130791187286, + "rewards/margins": 2.177436113357544, + "rewards/rejected": -1.9392547607421875, + "step": 179 + }, + { + "epoch": 0.23, + "learning_rate": 9.81507101788948e-08, + "logits/chosen": -3.2000699043273926, + "logits/rejected": -3.0319747924804688, + "logps/chosen": -297.0812683105469, + "logps/rejected": -1294.87060546875, + "loss": 0.4385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24126282334327698, + "rewards/margins": 2.4475250244140625, + "rewards/rejected": -2.2062621116638184, + "step": 180 + }, + { + "epoch": 0.23, + "learning_rate": 9.812276182268236e-08, + "logits/chosen": -3.219921588897705, + "logits/rejected": -3.1312918663024902, + "logps/chosen": -275.4583740234375, + "logps/rejected": -696.95947265625, + "loss": 0.4358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32626649737358093, + "rewards/margins": 1.6398072242736816, + "rewards/rejected": -1.3135406970977783, + "step": 181 + }, + { + "epoch": 0.23, + "learning_rate": 9.80946078949443e-08, + "logits/chosen": -3.2544808387756348, + "logits/rejected": -3.0932726860046387, + "logps/chosen": -264.73150634765625, + "logps/rejected": -575.011962890625, + "loss": 0.5005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3516426086425781, + "rewards/margins": 1.4327843189239502, + "rewards/rejected": -1.081141710281372, + "step": 182 + }, + { + "epoch": 0.23, + "learning_rate": 9.806624851594898e-08, + "logits/chosen": -3.188765048980713, + "logits/rejected": -3.1140708923339844, + "logps/chosen": -268.145263671875, + "logps/rejected": -450.9249267578125, + "loss": 0.5164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32348936796188354, + "rewards/margins": 1.272613525390625, + "rewards/rejected": -0.9491241574287415, + "step": 183 + }, + { + "epoch": 0.23, + "learning_rate": 9.803768380684241e-08, + "logits/chosen": -3.215010643005371, + "logits/rejected": -3.0662121772766113, + "logps/chosen": -299.958251953125, + "logps/rejected": -666.9825439453125, + "loss": 0.4949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4185226559638977, + "rewards/margins": 1.4386610984802246, + "rewards/rejected": -1.0201385021209717, + "step": 184 + }, + { + "epoch": 0.24, + "learning_rate": 9.800891388964773e-08, + "logits/chosen": -3.175143241882324, + "logits/rejected": -3.1861531734466553, + "logps/chosen": -305.9064636230469, + "logps/rejected": -487.818115234375, + "loss": 0.5003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3361160159111023, + "rewards/margins": 1.44670569896698, + "rewards/rejected": -1.110589623451233, + "step": 185 + }, + { + "epoch": 0.24, + "learning_rate": 9.797993888726472e-08, + "logits/chosen": -3.22625732421875, + "logits/rejected": -3.1223316192626953, + "logps/chosen": -258.72235107421875, + "logps/rejected": -434.039306640625, + "loss": 0.4644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41947633028030396, + "rewards/margins": 1.1879730224609375, + "rewards/rejected": -0.7684967517852783, + "step": 186 + }, + { + "epoch": 0.24, + "learning_rate": 9.795075892346922e-08, + "logits/chosen": -3.2722277641296387, + "logits/rejected": -3.0678138732910156, + "logps/chosen": -288.0643310546875, + "logps/rejected": -401.45758056640625, + "loss": 0.506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33347779512405396, + "rewards/margins": 0.9601806998252869, + "rewards/rejected": -0.6267029047012329, + "step": 187 + }, + { + "epoch": 0.24, + "learning_rate": 9.792137412291263e-08, + "logits/chosen": -3.2514100074768066, + "logits/rejected": -3.0106067657470703, + "logps/chosen": -232.86703491210938, + "logps/rejected": -626.617431640625, + "loss": 0.461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42511290311813354, + "rewards/margins": 1.6106643676757812, + "rewards/rejected": -1.1855515241622925, + "step": 188 + }, + { + "epoch": 0.24, + "learning_rate": 9.789178461112137e-08, + "logits/chosen": -3.2642273902893066, + "logits/rejected": -3.164060115814209, + "logps/chosen": -240.45135498046875, + "logps/rejected": -782.7747802734375, + "loss": 0.4409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40656739473342896, + "rewards/margins": 2.5774261951446533, + "rewards/rejected": -2.170858860015869, + "step": 189 + }, + { + "epoch": 0.24, + "learning_rate": 9.786199051449635e-08, + "logits/chosen": -3.315053939819336, + "logits/rejected": -3.2095682621002197, + "logps/chosen": -249.96511840820312, + "logps/rejected": -561.0657958984375, + "loss": 0.4241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37504807114601135, + "rewards/margins": 1.7413337230682373, + "rewards/rejected": -1.366285800933838, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 9.783199196031243e-08, + "logits/chosen": -3.1874380111694336, + "logits/rejected": -3.0788397789001465, + "logps/chosen": -251.83441162109375, + "logps/rejected": -507.23077392578125, + "loss": 0.4635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32575762271881104, + "rewards/margins": 1.390570878982544, + "rewards/rejected": -1.064813256263733, + "step": 191 + }, + { + "epoch": 0.24, + "learning_rate": 9.780178907671787e-08, + "logits/chosen": -3.1654984951019287, + "logits/rejected": -3.0723187923431396, + "logps/chosen": -250.41741943359375, + "logps/rejected": -539.8017578125, + "loss": 0.4661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34297487139701843, + "rewards/margins": 1.41269850730896, + "rewards/rejected": -1.0697236061096191, + "step": 192 + }, + { + "epoch": 0.25, + "learning_rate": 9.777138199273383e-08, + "logits/chosen": -3.2433810234069824, + "logits/rejected": -3.1652798652648926, + "logps/chosen": -266.55126953125, + "logps/rejected": -672.142822265625, + "loss": 0.4092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4403488039970398, + "rewards/margins": 2.1439146995544434, + "rewards/rejected": -1.703566074371338, + "step": 193 + }, + { + "epoch": 0.25, + "learning_rate": 9.774077083825372e-08, + "logits/chosen": -3.173769474029541, + "logits/rejected": -3.0213992595672607, + "logps/chosen": -269.7110290527344, + "logps/rejected": -722.140625, + "loss": 0.4336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3803772032260895, + "rewards/margins": 2.0730743408203125, + "rewards/rejected": -1.6926971673965454, + "step": 194 + }, + { + "epoch": 0.25, + "learning_rate": 9.770995574404272e-08, + "logits/chosen": -3.186433792114258, + "logits/rejected": -3.072674512863159, + "logps/chosen": -283.803466796875, + "logps/rejected": -1443.7650146484375, + "loss": 0.3862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4304649233818054, + "rewards/margins": 2.952242374420166, + "rewards/rejected": -2.521777391433716, + "step": 195 + }, + { + "epoch": 0.25, + "learning_rate": 9.767893684173721e-08, + "logits/chosen": -3.1106820106506348, + "logits/rejected": -3.074197769165039, + "logps/chosen": -262.6031799316406, + "logps/rejected": -633.93798828125, + "loss": 0.3897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34806138277053833, + "rewards/margins": 1.7886863946914673, + "rewards/rejected": -1.4406249523162842, + "step": 196 + }, + { + "epoch": 0.25, + "learning_rate": 9.764771426384418e-08, + "logits/chosen": -3.2389261722564697, + "logits/rejected": -3.1183664798736572, + "logps/chosen": -270.6965026855469, + "logps/rejected": -761.942138671875, + "loss": 0.3733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46275559067726135, + "rewards/margins": 2.4058737754821777, + "rewards/rejected": -1.9431183338165283, + "step": 197 + }, + { + "epoch": 0.25, + "learning_rate": 9.761628814374073e-08, + "logits/chosen": -3.2122058868408203, + "logits/rejected": -3.038640260696411, + "logps/chosen": -257.5174560546875, + "logps/rejected": -289.1808166503906, + "loss": 0.4555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41326904296875, + "rewards/margins": 0.9734085202217102, + "rewards/rejected": -0.5601394772529602, + "step": 198 + }, + { + "epoch": 0.25, + "learning_rate": 9.75846586156734e-08, + "logits/chosen": -3.2372827529907227, + "logits/rejected": -3.07850980758667, + "logps/chosen": -272.5516357421875, + "logps/rejected": -557.7568359375, + "loss": 0.4569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4325973391532898, + "rewards/margins": 1.997262716293335, + "rewards/rejected": -1.5646653175354004, + "step": 199 + }, + { + "epoch": 0.25, + "learning_rate": 9.755282581475768e-08, + "logits/chosen": -3.235910415649414, + "logits/rejected": -3.089332342147827, + "logps/chosen": -236.2532958984375, + "logps/rejected": -1750.127685546875, + "loss": 0.4379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.391448974609375, + "rewards/margins": 4.05812406539917, + "rewards/rejected": -3.666674852371216, + "step": 200 + }, + { + "epoch": 0.26, + "learning_rate": 9.752078987697741e-08, + "logits/chosen": -3.1654224395751953, + "logits/rejected": -3.0406336784362793, + "logps/chosen": -284.47418212890625, + "logps/rejected": -515.8695068359375, + "loss": 0.4803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39317476749420166, + "rewards/margins": 1.4911880493164062, + "rewards/rejected": -1.0980134010314941, + "step": 201 + }, + { + "epoch": 0.26, + "learning_rate": 9.748855093918415e-08, + "logits/chosen": -3.216062068939209, + "logits/rejected": -3.0938215255737305, + "logps/chosen": -250.43356323242188, + "logps/rejected": -726.6353759765625, + "loss": 0.4611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34628450870513916, + "rewards/margins": 1.9542831182479858, + "rewards/rejected": -1.6079986095428467, + "step": 202 + }, + { + "epoch": 0.26, + "learning_rate": 9.745610913909673e-08, + "logits/chosen": -3.2117505073547363, + "logits/rejected": -3.092308521270752, + "logps/chosen": -263.11871337890625, + "logps/rejected": -1031.346923828125, + "loss": 0.4436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4303955137729645, + "rewards/margins": 2.5630249977111816, + "rewards/rejected": -2.13262939453125, + "step": 203 + }, + { + "epoch": 0.26, + "learning_rate": 9.742346461530047e-08, + "logits/chosen": -3.2385478019714355, + "logits/rejected": -3.184821128845215, + "logps/chosen": -291.24456787109375, + "logps/rejected": -724.9385986328125, + "loss": 0.4421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5045753717422485, + "rewards/margins": 2.4131264686584473, + "rewards/rejected": -1.9085509777069092, + "step": 204 + }, + { + "epoch": 0.26, + "learning_rate": 9.739061750724673e-08, + "logits/chosen": -3.2172722816467285, + "logits/rejected": -3.1219232082366943, + "logps/chosen": -268.7904052734375, + "logps/rejected": -953.8858642578125, + "loss": 0.4139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3453254699707031, + "rewards/margins": 2.478247880935669, + "rewards/rejected": -2.132922410964966, + "step": 205 + }, + { + "epoch": 0.26, + "learning_rate": 9.73575679552523e-08, + "logits/chosen": -3.1787116527557373, + "logits/rejected": -3.1187210083007812, + "logps/chosen": -271.2843322753906, + "logps/rejected": -551.035400390625, + "loss": 0.4626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4052070677280426, + "rewards/margins": 1.8273736238479614, + "rewards/rejected": -1.4221664667129517, + "step": 206 + }, + { + "epoch": 0.26, + "learning_rate": 9.73243161004987e-08, + "logits/chosen": -3.1978397369384766, + "logits/rejected": -3.1118366718292236, + "logps/chosen": -287.1034851074219, + "logps/rejected": -431.9917297363281, + "loss": 0.4393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5292328000068665, + "rewards/margins": 1.4467575550079346, + "rewards/rejected": -0.9175246953964233, + "step": 207 + }, + { + "epoch": 0.27, + "learning_rate": 9.729086208503173e-08, + "logits/chosen": -3.2190704345703125, + "logits/rejected": -3.069451332092285, + "logps/chosen": -251.0436553955078, + "logps/rejected": -1213.1497802734375, + "loss": 0.4235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3710075616836548, + "rewards/margins": 3.3922202587127686, + "rewards/rejected": -3.0212128162384033, + "step": 208 + }, + { + "epoch": 0.27, + "learning_rate": 9.725720605176073e-08, + "logits/chosen": -3.164036750793457, + "logits/rejected": -3.0543994903564453, + "logps/chosen": -281.1207580566406, + "logps/rejected": -1239.105224609375, + "loss": 0.4482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47401583194732666, + "rewards/margins": 2.8612473011016846, + "rewards/rejected": -2.3872313499450684, + "step": 209 + }, + { + "epoch": 0.27, + "learning_rate": 9.722334814445807e-08, + "logits/chosen": -3.235909938812256, + "logits/rejected": -3.0878870487213135, + "logps/chosen": -299.517578125, + "logps/rejected": -1072.396728515625, + "loss": 0.4719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4749084711074829, + "rewards/margins": 2.5473785400390625, + "rewards/rejected": -2.072470188140869, + "step": 210 + }, + { + "epoch": 0.27, + "learning_rate": 9.718928850775842e-08, + "logits/chosen": -3.1497340202331543, + "logits/rejected": -3.0613696575164795, + "logps/chosen": -286.3797607421875, + "logps/rejected": -637.7598876953125, + "loss": 0.4572, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4342086911201477, + "rewards/margins": 1.8312041759490967, + "rewards/rejected": -1.3969955444335938, + "step": 211 + }, + { + "epoch": 0.27, + "learning_rate": 9.715502728715826e-08, + "logits/chosen": -3.2214980125427246, + "logits/rejected": -3.160120964050293, + "logps/chosen": -249.05923461914062, + "logps/rejected": -722.234375, + "loss": 0.4389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46506041288375854, + "rewards/margins": 2.497143507003784, + "rewards/rejected": -2.032083034515381, + "step": 212 + }, + { + "epoch": 0.27, + "learning_rate": 9.712056462901518e-08, + "logits/chosen": -3.2981090545654297, + "logits/rejected": -3.0962064266204834, + "logps/chosen": -270.9426574707031, + "logps/rejected": -513.313232421875, + "loss": 0.4337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3810470700263977, + "rewards/margins": 1.6717653274536133, + "rewards/rejected": -1.2907180786132812, + "step": 213 + }, + { + "epoch": 0.27, + "learning_rate": 9.708590068054727e-08, + "logits/chosen": -3.179104804992676, + "logits/rejected": -3.091134786605835, + "logps/chosen": -285.07623291015625, + "logps/rejected": -1263.51416015625, + "loss": 0.3994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4152816832065582, + "rewards/margins": 3.2759509086608887, + "rewards/rejected": -2.8606691360473633, + "step": 214 + }, + { + "epoch": 0.27, + "learning_rate": 9.70510355898325e-08, + "logits/chosen": -3.282536029815674, + "logits/rejected": -3.155398368835449, + "logps/chosen": -288.767333984375, + "logps/rejected": -388.8443603515625, + "loss": 0.4605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.396636962890625, + "rewards/margins": 1.4424073696136475, + "rewards/rejected": -1.045770287513733, + "step": 215 + }, + { + "epoch": 0.28, + "learning_rate": 9.701596950580806e-08, + "logits/chosen": -3.209287643432617, + "logits/rejected": -3.1359901428222656, + "logps/chosen": -261.07305908203125, + "logps/rejected": -642.0045776367188, + "loss": 0.4015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37994384765625, + "rewards/margins": 2.182272434234619, + "rewards/rejected": -1.8023285865783691, + "step": 216 + }, + { + "epoch": 0.28, + "learning_rate": 9.69807025782698e-08, + "logits/chosen": -3.1520299911499023, + "logits/rejected": -3.0273594856262207, + "logps/chosen": -274.6333923339844, + "logps/rejected": -1077.9573974609375, + "loss": 0.3499, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38332825899124146, + "rewards/margins": 2.7642884254455566, + "rewards/rejected": -2.380959987640381, + "step": 217 + }, + { + "epoch": 0.28, + "learning_rate": 9.694523495787148e-08, + "logits/chosen": -3.1252574920654297, + "logits/rejected": -3.067542552947998, + "logps/chosen": -241.58226013183594, + "logps/rejected": -1046.5009765625, + "loss": 0.3746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44672393798828125, + "rewards/margins": 3.1983232498168945, + "rewards/rejected": -2.7515993118286133, + "step": 218 + }, + { + "epoch": 0.28, + "learning_rate": 9.690956679612422e-08, + "logits/chosen": -3.2656197547912598, + "logits/rejected": -3.0989465713500977, + "logps/chosen": -248.74612426757812, + "logps/rejected": -617.51416015625, + "loss": 0.4169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4584816098213196, + "rewards/margins": 2.1454567909240723, + "rewards/rejected": -1.686975121498108, + "step": 219 + }, + { + "epoch": 0.28, + "learning_rate": 9.687369824539577e-08, + "logits/chosen": -3.2141857147216797, + "logits/rejected": -3.1777150630950928, + "logps/chosen": -253.2391815185547, + "logps/rejected": -588.52783203125, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5095809698104858, + "rewards/margins": 2.108638048171997, + "rewards/rejected": -1.5990569591522217, + "step": 220 + }, + { + "epoch": 0.28, + "learning_rate": 9.683762945890996e-08, + "logits/chosen": -3.2420880794525146, + "logits/rejected": -2.960965156555176, + "logps/chosen": -290.34857177734375, + "logps/rejected": -954.3245239257812, + "loss": 0.4227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4347427487373352, + "rewards/margins": 2.6515884399414062, + "rewards/rejected": -2.216845750808716, + "step": 221 + }, + { + "epoch": 0.28, + "learning_rate": 9.680136059074597e-08, + "logits/chosen": -3.2651357650756836, + "logits/rejected": -3.1121537685394287, + "logps/chosen": -276.9623718261719, + "logps/rejected": -631.782470703125, + "loss": 0.4215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3558647334575653, + "rewards/margins": 1.8641563653945923, + "rewards/rejected": -1.5082916021347046, + "step": 222 + }, + { + "epoch": 0.28, + "learning_rate": 9.676489179583769e-08, + "logits/chosen": -3.187706470489502, + "logits/rejected": -2.9792871475219727, + "logps/chosen": -263.8983154296875, + "logps/rejected": -1412.278564453125, + "loss": 0.4222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5372360348701477, + "rewards/margins": 3.4809250831604004, + "rewards/rejected": -2.9436891078948975, + "step": 223 + }, + { + "epoch": 0.29, + "learning_rate": 9.672822322997304e-08, + "logits/chosen": -3.224914073944092, + "logits/rejected": -3.1383492946624756, + "logps/chosen": -236.2212371826172, + "logps/rejected": -781.2587890625, + "loss": 0.4189, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3374748229980469, + "rewards/margins": 2.2779083251953125, + "rewards/rejected": -1.9404335021972656, + "step": 224 + }, + { + "epoch": 0.29, + "learning_rate": 9.669135504979335e-08, + "logits/chosen": -3.228166103363037, + "logits/rejected": -2.926187038421631, + "logps/chosen": -234.7311553955078, + "logps/rejected": -302.494873046875, + "loss": 0.447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5320358276367188, + "rewards/margins": 1.1444001197814941, + "rewards/rejected": -0.6123642325401306, + "step": 225 + }, + { + "epoch": 0.29, + "learning_rate": 9.665428741279266e-08, + "logits/chosen": -3.226377010345459, + "logits/rejected": -3.1390130519866943, + "logps/chosen": -240.16323852539062, + "logps/rejected": -389.778564453125, + "loss": 0.3759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33330613374710083, + "rewards/margins": 1.3988990783691406, + "rewards/rejected": -1.0655930042266846, + "step": 226 + }, + { + "epoch": 0.29, + "learning_rate": 9.661702047731703e-08, + "logits/chosen": -3.1911964416503906, + "logits/rejected": -2.986327648162842, + "logps/chosen": -276.9782409667969, + "logps/rejected": -1114.984619140625, + "loss": 0.4652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.304678350687027, + "rewards/margins": 2.481074571609497, + "rewards/rejected": -2.176396369934082, + "step": 227 + }, + { + "epoch": 0.29, + "learning_rate": 9.657955440256394e-08, + "logits/chosen": -3.2535195350646973, + "logits/rejected": -3.085602283477783, + "logps/chosen": -235.68276977539062, + "logps/rejected": -1171.50390625, + "loss": 0.385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44765397906303406, + "rewards/margins": 2.9393715858459473, + "rewards/rejected": -2.491717576980591, + "step": 228 + }, + { + "epoch": 0.29, + "learning_rate": 9.65418893485815e-08, + "logits/chosen": -3.234443426132202, + "logits/rejected": -3.067420244216919, + "logps/chosen": -238.26654052734375, + "logps/rejected": -268.98699951171875, + "loss": 0.4777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5521515011787415, + "rewards/margins": 1.1198753118515015, + "rewards/rejected": -0.5677238702774048, + "step": 229 + }, + { + "epoch": 0.29, + "learning_rate": 9.650402547626786e-08, + "logits/chosen": -3.1608548164367676, + "logits/rejected": -3.1033921241760254, + "logps/chosen": -301.2421875, + "logps/rejected": -476.74444580078125, + "loss": 0.4505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48400115966796875, + "rewards/margins": 1.5679290294647217, + "rewards/rejected": -1.083927869796753, + "step": 230 + }, + { + "epoch": 0.29, + "learning_rate": 9.646596294737045e-08, + "logits/chosen": -3.2006359100341797, + "logits/rejected": -3.1540355682373047, + "logps/chosen": -299.9100646972656, + "logps/rejected": -481.8157653808594, + "loss": 0.4266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.448172003030777, + "rewards/margins": 1.9495636224746704, + "rewards/rejected": -1.5013916492462158, + "step": 231 + }, + { + "epoch": 0.3, + "learning_rate": 9.642770192448534e-08, + "logits/chosen": -3.26326322555542, + "logits/rejected": -3.136384963989258, + "logps/chosen": -242.44256591796875, + "logps/rejected": -522.1060791015625, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43759462237358093, + "rewards/margins": 1.5862091779708862, + "rewards/rejected": -1.148614525794983, + "step": 232 + }, + { + "epoch": 0.3, + "learning_rate": 9.638924257105657e-08, + "logits/chosen": -3.1739630699157715, + "logits/rejected": -2.9961609840393066, + "logps/chosen": -264.6397399902344, + "logps/rejected": -588.5004272460938, + "loss": 0.4175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5168739557266235, + "rewards/margins": 1.7570762634277344, + "rewards/rejected": -1.2402023077011108, + "step": 233 + }, + { + "epoch": 0.3, + "learning_rate": 9.635058505137534e-08, + "logits/chosen": -3.2108583450317383, + "logits/rejected": -3.1135120391845703, + "logps/chosen": -280.93011474609375, + "logps/rejected": -916.8199462890625, + "loss": 0.3661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4774429202079773, + "rewards/margins": 2.8963088989257812, + "rewards/rejected": -2.4188661575317383, + "step": 234 + }, + { + "epoch": 0.3, + "learning_rate": 9.631172953057943e-08, + "logits/chosen": -3.2293906211853027, + "logits/rejected": -3.100785970687866, + "logps/chosen": -260.33148193359375, + "logps/rejected": -894.453857421875, + "loss": 0.3941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4162033200263977, + "rewards/margins": 2.5516128540039062, + "rewards/rejected": -2.1354095935821533, + "step": 235 + }, + { + "epoch": 0.3, + "learning_rate": 9.627267617465242e-08, + "logits/chosen": -3.2292909622192383, + "logits/rejected": -3.2386913299560547, + "logps/chosen": -267.63177490234375, + "logps/rejected": -3644.83349609375, + "loss": 0.394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3762146234512329, + "rewards/margins": 6.458050727844238, + "rewards/rejected": -6.081836223602295, + "step": 236 + }, + { + "epoch": 0.3, + "learning_rate": 9.623342515042302e-08, + "logits/chosen": -3.2407212257385254, + "logits/rejected": -3.0675582885742188, + "logps/chosen": -310.95599365234375, + "logps/rejected": -512.9564819335938, + "loss": 0.4728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5058532953262329, + "rewards/margins": 2.0526633262634277, + "rewards/rejected": -1.546810269355774, + "step": 237 + }, + { + "epoch": 0.3, + "learning_rate": 9.619397662556434e-08, + "logits/chosen": -3.1488447189331055, + "logits/rejected": -3.019704580307007, + "logps/chosen": -257.37298583984375, + "logps/rejected": -422.4077453613281, + "loss": 0.4248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4384208619594574, + "rewards/margins": 1.3016685247421265, + "rewards/rejected": -0.8632476925849915, + "step": 238 + }, + { + "epoch": 0.3, + "learning_rate": 9.615433076859315e-08, + "logits/chosen": -3.2262442111968994, + "logits/rejected": -3.0421924591064453, + "logps/chosen": -256.154296875, + "logps/rejected": -860.4146728515625, + "loss": 0.4039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4683120846748352, + "rewards/margins": 2.450578451156616, + "rewards/rejected": -1.9822663068771362, + "step": 239 + }, + { + "epoch": 0.31, + "learning_rate": 9.611448774886923e-08, + "logits/chosen": -3.231635093688965, + "logits/rejected": -3.1283769607543945, + "logps/chosen": -264.90435791015625, + "logps/rejected": -412.39959716796875, + "loss": 0.4123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4030601382255554, + "rewards/margins": 1.4354912042617798, + "rewards/rejected": -1.0324310064315796, + "step": 240 + }, + { + "epoch": 0.31, + "learning_rate": 9.607444773659458e-08, + "logits/chosen": -3.26914644241333, + "logits/rejected": -3.1175894737243652, + "logps/chosen": -264.73187255859375, + "logps/rejected": -456.4767150878906, + "loss": 0.4487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5444366335868835, + "rewards/margins": 1.7768677473068237, + "rewards/rejected": -1.2324310541152954, + "step": 241 + }, + { + "epoch": 0.31, + "learning_rate": 9.603421090281269e-08, + "logits/chosen": -3.2529654502868652, + "logits/rejected": -3.1305747032165527, + "logps/chosen": -271.2380676269531, + "logps/rejected": -618.7832641601562, + "loss": 0.4726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4480384886264801, + "rewards/margins": 2.051868438720703, + "rewards/rejected": -1.603830099105835, + "step": 242 + }, + { + "epoch": 0.31, + "learning_rate": 9.59937774194079e-08, + "logits/chosen": -3.2287425994873047, + "logits/rejected": -3.0351781845092773, + "logps/chosen": -273.13934326171875, + "logps/rejected": -726.583740234375, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5913535952568054, + "rewards/margins": 2.1437554359436035, + "rewards/rejected": -1.5524017810821533, + "step": 243 + }, + { + "epoch": 0.31, + "learning_rate": 9.595314745910454e-08, + "logits/chosen": -3.144148349761963, + "logits/rejected": -3.011216402053833, + "logps/chosen": -297.7477722167969, + "logps/rejected": -461.8304443359375, + "loss": 0.416, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.375283807516098, + "rewards/margins": 1.3392014503479004, + "rewards/rejected": -0.96391761302948, + "step": 244 + }, + { + "epoch": 0.31, + "learning_rate": 9.591232119546629e-08, + "logits/chosen": -3.183408737182617, + "logits/rejected": -2.979210376739502, + "logps/chosen": -220.3045654296875, + "logps/rejected": -1357.40185546875, + "loss": 0.4284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49509507417678833, + "rewards/margins": 3.560051918029785, + "rewards/rejected": -3.0649566650390625, + "step": 245 + }, + { + "epoch": 0.31, + "learning_rate": 9.587129880289538e-08, + "logits/chosen": -3.2800111770629883, + "logits/rejected": -3.0943350791931152, + "logps/chosen": -312.2218933105469, + "logps/rejected": -788.5858764648438, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.567095935344696, + "rewards/margins": 2.636791229248047, + "rewards/rejected": -2.069695234298706, + "step": 246 + }, + { + "epoch": 0.31, + "learning_rate": 9.583008045663185e-08, + "logits/chosen": -3.2023544311523438, + "logits/rejected": -3.1296324729919434, + "logps/chosen": -256.33599853515625, + "logps/rejected": -841.1145629882812, + "loss": 0.375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3579704165458679, + "rewards/margins": 2.720012664794922, + "rewards/rejected": -2.3620424270629883, + "step": 247 + }, + { + "epoch": 0.32, + "learning_rate": 9.578866633275286e-08, + "logits/chosen": -3.233800172805786, + "logits/rejected": -3.1606435775756836, + "logps/chosen": -259.7479248046875, + "logps/rejected": -660.0242919921875, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32262498140335083, + "rewards/margins": 2.1165032386779785, + "rewards/rejected": -1.793878197669983, + "step": 248 + }, + { + "epoch": 0.32, + "learning_rate": 9.574705660817189e-08, + "logits/chosen": -3.180581569671631, + "logits/rejected": -3.1484689712524414, + "logps/chosen": -249.88723754882812, + "logps/rejected": -677.889892578125, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5472122430801392, + "rewards/margins": 2.4636504650115967, + "rewards/rejected": -1.916438341140747, + "step": 249 + }, + { + "epoch": 0.32, + "learning_rate": 9.570525146063798e-08, + "logits/chosen": -3.26814603805542, + "logits/rejected": -3.075709342956543, + "logps/chosen": -276.5301513671875, + "logps/rejected": -3690.595703125, + "loss": 0.4465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5249969959259033, + "rewards/margins": 7.364483833312988, + "rewards/rejected": -6.839486598968506, + "step": 250 + }, + { + "epoch": 0.32, + "learning_rate": 9.566325106873494e-08, + "logits/chosen": -3.3039183616638184, + "logits/rejected": -3.1272263526916504, + "logps/chosen": -237.80291748046875, + "logps/rejected": -549.0511474609375, + "loss": 0.4269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.575971245765686, + "rewards/margins": 2.1595680713653564, + "rewards/rejected": -1.5835968255996704, + "step": 251 + }, + { + "epoch": 0.32, + "learning_rate": 9.562105561188067e-08, + "logits/chosen": -3.2959718704223633, + "logits/rejected": -3.0927791595458984, + "logps/chosen": -214.4506378173828, + "logps/rejected": -318.79620361328125, + "loss": 0.3852, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5236480832099915, + "rewards/margins": 1.1706130504608154, + "rewards/rejected": -0.6469650268554688, + "step": 252 + }, + { + "epoch": 0.32, + "learning_rate": 9.55786652703264e-08, + "logits/chosen": -3.238281488418579, + "logits/rejected": -3.0620832443237305, + "logps/chosen": -306.21563720703125, + "logps/rejected": -628.8197021484375, + "loss": 0.4187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4515274167060852, + "rewards/margins": 2.149702548980713, + "rewards/rejected": -1.698175072669983, + "step": 253 + }, + { + "epoch": 0.32, + "learning_rate": 9.553608022515576e-08, + "logits/chosen": -3.216383457183838, + "logits/rejected": -3.02156400680542, + "logps/chosen": -240.71011352539062, + "logps/rejected": -645.3388671875, + "loss": 0.4155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5739914178848267, + "rewards/margins": 2.0035018920898438, + "rewards/rejected": -1.429510474205017, + "step": 254 + }, + { + "epoch": 0.33, + "learning_rate": 9.54933006582842e-08, + "logits/chosen": -3.2047278881073, + "logits/rejected": -3.065586566925049, + "logps/chosen": -255.00186157226562, + "logps/rejected": -833.9840698242188, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4378036558628082, + "rewards/margins": 2.4095778465270996, + "rewards/rejected": -1.9717743396759033, + "step": 255 + }, + { + "epoch": 0.33, + "learning_rate": 9.545032675245812e-08, + "logits/chosen": -3.288602352142334, + "logits/rejected": -3.2211713790893555, + "logps/chosen": -240.84439086914062, + "logps/rejected": -761.1458129882812, + "loss": 0.3887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32453614473342896, + "rewards/margins": 2.5384278297424316, + "rewards/rejected": -2.2138917446136475, + "step": 256 + }, + { + "epoch": 0.33, + "learning_rate": 9.540715869125407e-08, + "logits/chosen": -3.253300189971924, + "logits/rejected": -3.152029514312744, + "logps/chosen": -297.0726013183594, + "logps/rejected": -886.35888671875, + "loss": 0.3948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5952713489532471, + "rewards/margins": 2.8104567527770996, + "rewards/rejected": -2.2151856422424316, + "step": 257 + }, + { + "epoch": 0.33, + "learning_rate": 9.536379665907798e-08, + "logits/chosen": -3.167508840560913, + "logits/rejected": -3.109198570251465, + "logps/chosen": -277.9886474609375, + "logps/rejected": -585.235107421875, + "loss": 0.4651, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6446861624717712, + "rewards/margins": 2.253865957260132, + "rewards/rejected": -1.6091797351837158, + "step": 258 + }, + { + "epoch": 0.33, + "learning_rate": 9.532024084116448e-08, + "logits/chosen": -3.2384138107299805, + "logits/rejected": -3.0518853664398193, + "logps/chosen": -270.33746337890625, + "logps/rejected": -339.5698547363281, + "loss": 0.3968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45996248722076416, + "rewards/margins": 1.3308396339416504, + "rewards/rejected": -0.8708770871162415, + "step": 259 + }, + { + "epoch": 0.33, + "learning_rate": 9.527649142357594e-08, + "logits/chosen": -3.1940879821777344, + "logits/rejected": -3.1395950317382812, + "logps/chosen": -297.0071716308594, + "logps/rejected": -716.9846801757812, + "loss": 0.3954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47547149658203125, + "rewards/margins": 2.8150925636291504, + "rewards/rejected": -2.339621067047119, + "step": 260 + }, + { + "epoch": 0.33, + "learning_rate": 9.523254859320174e-08, + "logits/chosen": -3.138375997543335, + "logits/rejected": -2.996399402618408, + "logps/chosen": -297.4671630859375, + "logps/rejected": -243.6540985107422, + "loss": 0.4817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5389984250068665, + "rewards/margins": 0.9398712515830994, + "rewards/rejected": -0.4008728265762329, + "step": 261 + }, + { + "epoch": 0.33, + "learning_rate": 9.518841253775753e-08, + "logits/chosen": -3.238436698913574, + "logits/rejected": -3.1131749153137207, + "logps/chosen": -261.5426940917969, + "logps/rejected": -840.354736328125, + "loss": 0.4099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5525596737861633, + "rewards/margins": 3.0506341457366943, + "rewards/rejected": -2.498074531555176, + "step": 262 + }, + { + "epoch": 0.34, + "learning_rate": 9.514408344578438e-08, + "logits/chosen": -3.173826217651367, + "logits/rejected": -3.096693992614746, + "logps/chosen": -259.22137451171875, + "logps/rejected": -656.5377807617188, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.594042956829071, + "rewards/margins": 2.628232002258301, + "rewards/rejected": -2.034188985824585, + "step": 263 + }, + { + "epoch": 0.34, + "learning_rate": 9.509956150664795e-08, + "logits/chosen": -3.2629289627075195, + "logits/rejected": -3.0955772399902344, + "logps/chosen": -271.16357421875, + "logps/rejected": -742.2913818359375, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6395584344863892, + "rewards/margins": 2.763662815093994, + "rewards/rejected": -2.1241042613983154, + "step": 264 + }, + { + "epoch": 0.34, + "learning_rate": 9.50548469105377e-08, + "logits/chosen": -3.2181198596954346, + "logits/rejected": -3.0793442726135254, + "logps/chosen": -271.9820556640625, + "logps/rejected": -1047.900634765625, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5917221307754517, + "rewards/margins": 3.0850205421447754, + "rewards/rejected": -2.493298292160034, + "step": 265 + }, + { + "epoch": 0.34, + "learning_rate": 9.500993984846612e-08, + "logits/chosen": -3.199127197265625, + "logits/rejected": -3.1189324855804443, + "logps/chosen": -286.4573974609375, + "logps/rejected": -704.9855346679688, + "loss": 0.4119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5823295712471008, + "rewards/margins": 2.7873895168304443, + "rewards/rejected": -2.205059766769409, + "step": 266 + }, + { + "epoch": 0.34, + "learning_rate": 9.496484051226786e-08, + "logits/chosen": -3.247800588607788, + "logits/rejected": -3.096975803375244, + "logps/chosen": -269.71728515625, + "logps/rejected": -666.0947265625, + "loss": 0.3806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5306594967842102, + "rewards/margins": 2.341736078262329, + "rewards/rejected": -1.8110764026641846, + "step": 267 + }, + { + "epoch": 0.34, + "learning_rate": 9.491954909459894e-08, + "logits/chosen": -3.207648754119873, + "logits/rejected": -3.086160659790039, + "logps/chosen": -282.67999267578125, + "logps/rejected": -612.8390502929688, + "loss": 0.405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6044219732284546, + "rewards/margins": 2.973294258117676, + "rewards/rejected": -2.3688721656799316, + "step": 268 + }, + { + "epoch": 0.34, + "learning_rate": 9.487406578893589e-08, + "logits/chosen": -3.180375099182129, + "logits/rejected": -3.102757453918457, + "logps/chosen": -254.3272705078125, + "logps/rejected": -529.23291015625, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5935447812080383, + "rewards/margins": 2.172011613845825, + "rewards/rejected": -1.5784668922424316, + "step": 269 + }, + { + "epoch": 0.34, + "learning_rate": 9.482839078957499e-08, + "logits/chosen": -3.2396087646484375, + "logits/rejected": -3.208104372024536, + "logps/chosen": -316.38897705078125, + "logps/rejected": -785.638427734375, + "loss": 0.4004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5256881713867188, + "rewards/margins": 2.709111213684082, + "rewards/rejected": -2.1834230422973633, + "step": 270 + }, + { + "epoch": 0.35, + "learning_rate": 9.478252429163134e-08, + "logits/chosen": -3.184274673461914, + "logits/rejected": -3.2240419387817383, + "logps/chosen": -276.8490905761719, + "logps/rejected": -1171.3868408203125, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.493621826171875, + "rewards/margins": 3.6483702659606934, + "rewards/rejected": -3.1547484397888184, + "step": 271 + }, + { + "epoch": 0.35, + "learning_rate": 9.473646649103817e-08, + "logits/chosen": -3.2175040245056152, + "logits/rejected": -3.1186089515686035, + "logps/chosen": -214.89735412597656, + "logps/rejected": -743.3375244140625, + "loss": 0.3964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4836212396621704, + "rewards/margins": 2.987501621246338, + "rewards/rejected": -2.503880500793457, + "step": 272 + }, + { + "epoch": 0.35, + "learning_rate": 9.469021758454586e-08, + "logits/chosen": -3.1921987533569336, + "logits/rejected": -3.0898730754852295, + "logps/chosen": -264.75189208984375, + "logps/rejected": -342.34051513671875, + "loss": 0.418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5430809259414673, + "rewards/margins": 1.3819191455841064, + "rewards/rejected": -0.8388382196426392, + "step": 273 + }, + { + "epoch": 0.35, + "learning_rate": 9.464377776972114e-08, + "logits/chosen": -3.228872299194336, + "logits/rejected": -3.0599374771118164, + "logps/chosen": -273.426513671875, + "logps/rejected": -844.3735961914062, + "loss": 0.403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.530120849609375, + "rewards/margins": 2.987997531890869, + "rewards/rejected": -2.457876682281494, + "step": 274 + }, + { + "epoch": 0.35, + "learning_rate": 9.459714724494632e-08, + "logits/chosen": -3.2214794158935547, + "logits/rejected": -3.1707704067230225, + "logps/chosen": -259.22344970703125, + "logps/rejected": -974.74853515625, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5715515613555908, + "rewards/margins": 3.450949192047119, + "rewards/rejected": -2.8793976306915283, + "step": 275 + }, + { + "epoch": 0.35, + "learning_rate": 9.455032620941839e-08, + "logits/chosen": -3.203369140625, + "logits/rejected": -3.114152431488037, + "logps/chosen": -283.3614501953125, + "logps/rejected": -499.12640380859375, + "loss": 0.4274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42607271671295166, + "rewards/margins": 1.6229782104492188, + "rewards/rejected": -1.196905493736267, + "step": 276 + }, + { + "epoch": 0.35, + "learning_rate": 9.45033148631481e-08, + "logits/chosen": -3.228686809539795, + "logits/rejected": -3.082275152206421, + "logps/chosen": -239.50485229492188, + "logps/rejected": -1063.6451416015625, + "loss": 0.3984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.526471734046936, + "rewards/margins": 3.603351593017578, + "rewards/rejected": -3.0768799781799316, + "step": 277 + }, + { + "epoch": 0.35, + "learning_rate": 9.445611340695925e-08, + "logits/chosen": -3.1405203342437744, + "logits/rejected": -3.0182571411132812, + "logps/chosen": -257.3134765625, + "logps/rejected": -734.3167724609375, + "loss": 0.3574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5761321783065796, + "rewards/margins": 2.2172470092773438, + "rewards/rejected": -1.6411148309707642, + "step": 278 + }, + { + "epoch": 0.36, + "learning_rate": 9.440872204248771e-08, + "logits/chosen": -3.208834648132324, + "logits/rejected": -3.0374271869659424, + "logps/chosen": -264.8409423828125, + "logps/rejected": -868.1868896484375, + "loss": 0.3987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5122925043106079, + "rewards/margins": 2.555590867996216, + "rewards/rejected": -2.0432982444763184, + "step": 279 + }, + { + "epoch": 0.36, + "learning_rate": 9.436114097218058e-08, + "logits/chosen": -3.2230653762817383, + "logits/rejected": -2.9800310134887695, + "logps/chosen": -253.8841552734375, + "logps/rejected": -532.2025146484375, + "loss": 0.4606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5166419744491577, + "rewards/margins": 1.6697914600372314, + "rewards/rejected": -1.1531493663787842, + "step": 280 + }, + { + "epoch": 0.36, + "learning_rate": 9.431337039929542e-08, + "logits/chosen": -3.2464237213134766, + "logits/rejected": -3.1166536808013916, + "logps/chosen": -256.60186767578125, + "logps/rejected": -451.1839294433594, + "loss": 0.4529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7181984186172485, + "rewards/margins": 1.976529836654663, + "rewards/rejected": -1.258331298828125, + "step": 281 + }, + { + "epoch": 0.36, + "learning_rate": 9.426541052789925e-08, + "logits/chosen": -3.2291884422302246, + "logits/rejected": -3.0779995918273926, + "logps/chosen": -236.55369567871094, + "logps/rejected": -537.1358032226562, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44499969482421875, + "rewards/margins": 2.1976592540740967, + "rewards/rejected": -1.7526596784591675, + "step": 282 + }, + { + "epoch": 0.36, + "learning_rate": 9.421726156286776e-08, + "logits/chosen": -3.2402524948120117, + "logits/rejected": -3.1980090141296387, + "logps/chosen": -257.46466064453125, + "logps/rejected": -583.25341796875, + "loss": 0.379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4670044183731079, + "rewards/margins": 2.4648683071136475, + "rewards/rejected": -1.99786376953125, + "step": 283 + }, + { + "epoch": 0.36, + "learning_rate": 9.416892370988443e-08, + "logits/chosen": -3.242781639099121, + "logits/rejected": -2.9902470111846924, + "logps/chosen": -263.1647644042969, + "logps/rejected": -303.8855895996094, + "loss": 0.4146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5009704828262329, + "rewards/margins": 1.307756781578064, + "rewards/rejected": -0.8067863583564758, + "step": 284 + }, + { + "epoch": 0.36, + "learning_rate": 9.412039717543958e-08, + "logits/chosen": -3.2437210083007812, + "logits/rejected": -3.0731658935546875, + "logps/chosen": -293.2342529296875, + "logps/rejected": -1122.11572265625, + "loss": 0.3976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.483551025390625, + "rewards/margins": 3.5140504837036133, + "rewards/rejected": -3.0304994583129883, + "step": 285 + }, + { + "epoch": 0.36, + "learning_rate": 9.40716821668296e-08, + "logits/chosen": -3.218231678009033, + "logits/rejected": -3.179504871368408, + "logps/chosen": -289.91876220703125, + "logps/rejected": -848.2877197265625, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5658294558525085, + "rewards/margins": 3.0888612270355225, + "rewards/rejected": -2.523031711578369, + "step": 286 + }, + { + "epoch": 0.37, + "learning_rate": 9.402277889215598e-08, + "logits/chosen": -3.204578161239624, + "logits/rejected": -3.133023977279663, + "logps/chosen": -252.63465881347656, + "logps/rejected": -387.4194641113281, + "loss": 0.3729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40751343965530396, + "rewards/margins": 1.6016602516174316, + "rewards/rejected": -1.194146752357483, + "step": 287 + }, + { + "epoch": 0.37, + "learning_rate": 9.397368756032444e-08, + "logits/chosen": -3.2100887298583984, + "logits/rejected": -3.114614963531494, + "logps/chosen": -295.7611083984375, + "logps/rejected": -693.913330078125, + "loss": 0.4353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5514281988143921, + "rewards/margins": 2.7954115867614746, + "rewards/rejected": -2.243983507156372, + "step": 288 + }, + { + "epoch": 0.37, + "learning_rate": 9.39244083810441e-08, + "logits/chosen": -3.230074882507324, + "logits/rejected": -3.1252803802490234, + "logps/chosen": -261.01434326171875, + "logps/rejected": -528.9771728515625, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.544567883014679, + "rewards/margins": 1.9578354358673096, + "rewards/rejected": -1.4132676124572754, + "step": 289 + }, + { + "epoch": 0.37, + "learning_rate": 9.387494156482642e-08, + "logits/chosen": -3.277860164642334, + "logits/rejected": -3.1238176822662354, + "logps/chosen": -271.47344970703125, + "logps/rejected": -522.4393310546875, + "loss": 0.3965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5926079154014587, + "rewards/margins": 2.320784091949463, + "rewards/rejected": -1.7281761169433594, + "step": 290 + }, + { + "epoch": 0.37, + "learning_rate": 9.382528732298453e-08, + "logits/chosen": -3.30777645111084, + "logits/rejected": -3.096487522125244, + "logps/chosen": -277.7835388183594, + "logps/rejected": -345.83551025390625, + "loss": 0.3796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5443741083145142, + "rewards/margins": 1.6771286725997925, + "rewards/rejected": -1.1327545642852783, + "step": 291 + }, + { + "epoch": 0.37, + "learning_rate": 9.377544586763214e-08, + "logits/chosen": -3.2101263999938965, + "logits/rejected": -3.138218402862549, + "logps/chosen": -289.4250183105469, + "logps/rejected": -570.9707641601562, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6131362915039062, + "rewards/margins": 2.7012710571289062, + "rewards/rejected": -2.088134765625, + "step": 292 + }, + { + "epoch": 0.37, + "learning_rate": 9.372541741168271e-08, + "logits/chosen": -3.247945785522461, + "logits/rejected": -3.099637985229492, + "logps/chosen": -273.4846496582031, + "logps/rejected": -868.1533203125, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6601669788360596, + "rewards/margins": 3.1584582328796387, + "rewards/rejected": -2.498291015625, + "step": 293 + }, + { + "epoch": 0.37, + "learning_rate": 9.367520216884854e-08, + "logits/chosen": -3.2822723388671875, + "logits/rejected": -3.1042728424072266, + "logps/chosen": -276.95721435546875, + "logps/rejected": -645.378662109375, + "loss": 0.3923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6457099914550781, + "rewards/margins": 2.687328338623047, + "rewards/rejected": -2.0416183471679688, + "step": 294 + }, + { + "epoch": 0.38, + "learning_rate": 9.362480035363985e-08, + "logits/chosen": -3.2222206592559814, + "logits/rejected": -3.093477725982666, + "logps/chosen": -283.1591796875, + "logps/rejected": -687.117919921875, + "loss": 0.356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5613021850585938, + "rewards/margins": 2.321821689605713, + "rewards/rejected": -1.7605195045471191, + "step": 295 + }, + { + "epoch": 0.38, + "learning_rate": 9.357421218136385e-08, + "logits/chosen": -3.21010684967041, + "logits/rejected": -3.1014902591705322, + "logps/chosen": -254.42681884765625, + "logps/rejected": -796.4144287109375, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5600669980049133, + "rewards/margins": 3.530999183654785, + "rewards/rejected": -2.9709320068359375, + "step": 296 + }, + { + "epoch": 0.38, + "learning_rate": 9.352343786812386e-08, + "logits/chosen": -3.2452332973480225, + "logits/rejected": -3.081470012664795, + "logps/chosen": -301.9798889160156, + "logps/rejected": -1249.1630859375, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43000489473342896, + "rewards/margins": 3.8001160621643066, + "rewards/rejected": -3.3701109886169434, + "step": 297 + }, + { + "epoch": 0.38, + "learning_rate": 9.347247763081834e-08, + "logits/chosen": -3.210728168487549, + "logits/rejected": -3.1259188652038574, + "logps/chosen": -270.75164794921875, + "logps/rejected": -596.9723510742188, + "loss": 0.376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4070068299770355, + "rewards/margins": 2.260110378265381, + "rewards/rejected": -1.8531036376953125, + "step": 298 + }, + { + "epoch": 0.38, + "learning_rate": 9.342133168713998e-08, + "logits/chosen": -3.232881546020508, + "logits/rejected": -3.1607370376586914, + "logps/chosen": -259.60101318359375, + "logps/rejected": -485.60302734375, + "loss": 0.4023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5377922058105469, + "rewards/margins": 2.4092659950256348, + "rewards/rejected": -1.871473789215088, + "step": 299 + }, + { + "epoch": 0.38, + "learning_rate": 9.337000025557476e-08, + "logits/chosen": -3.247529983520508, + "logits/rejected": -3.0343732833862305, + "logps/chosen": -248.85203552246094, + "logps/rejected": -846.8745727539062, + "loss": 0.4025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6117721796035767, + "rewards/margins": 2.859297275543213, + "rewards/rejected": -2.2475249767303467, + "step": 300 + }, + { + "epoch": 0.38, + "learning_rate": 9.331848355540106e-08, + "logits/chosen": -3.2487518787384033, + "logits/rejected": -3.2152099609375, + "logps/chosen": -259.15057373046875, + "logps/rejected": -984.4152221679688, + "loss": 0.3498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5931793451309204, + "rewards/margins": 3.925473213195801, + "rewards/rejected": -3.33229398727417, + "step": 301 + }, + { + "epoch": 0.38, + "learning_rate": 9.32667818066887e-08, + "logits/chosen": -3.2349228858947754, + "logits/rejected": -3.194634199142456, + "logps/chosen": -319.758544921875, + "logps/rejected": -518.291015625, + "loss": 0.4042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7329986691474915, + "rewards/margins": 2.1960830688476562, + "rewards/rejected": -1.4630844593048096, + "step": 302 + }, + { + "epoch": 0.39, + "learning_rate": 9.321489523029797e-08, + "logits/chosen": -3.1891069412231445, + "logits/rejected": -3.0564451217651367, + "logps/chosen": -263.1719665527344, + "logps/rejected": -650.1837158203125, + "loss": 0.3425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4873512387275696, + "rewards/margins": 2.5840249061584473, + "rewards/rejected": -2.0966734886169434, + "step": 303 + }, + { + "epoch": 0.39, + "learning_rate": 9.316282404787869e-08, + "logits/chosen": -3.22841215133667, + "logits/rejected": -3.097505569458008, + "logps/chosen": -261.031982421875, + "logps/rejected": -989.2647705078125, + "loss": 0.3634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4610840082168579, + "rewards/margins": 3.757815361022949, + "rewards/rejected": -3.296731472015381, + "step": 304 + }, + { + "epoch": 0.39, + "learning_rate": 9.311056848186933e-08, + "logits/chosen": -3.2666115760803223, + "logits/rejected": -3.1161553859710693, + "logps/chosen": -249.83799743652344, + "logps/rejected": -324.92803955078125, + "loss": 0.4419, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8048118352890015, + "rewards/margins": 1.9660835266113281, + "rewards/rejected": -1.1612716913223267, + "step": 305 + }, + { + "epoch": 0.39, + "learning_rate": 9.305812875549598e-08, + "logits/chosen": -3.186903476715088, + "logits/rejected": -3.160834789276123, + "logps/chosen": -276.91009521484375, + "logps/rejected": -642.7632446289062, + "loss": 0.318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5527557134628296, + "rewards/margins": 2.9067931175231934, + "rewards/rejected": -2.3540375232696533, + "step": 306 + }, + { + "epoch": 0.39, + "learning_rate": 9.300550509277144e-08, + "logits/chosen": -3.2548608779907227, + "logits/rejected": -3.1529555320739746, + "logps/chosen": -246.70245361328125, + "logps/rejected": -371.9870300292969, + "loss": 0.3641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6810531616210938, + "rewards/margins": 1.6789398193359375, + "rewards/rejected": -0.9978866577148438, + "step": 307 + }, + { + "epoch": 0.39, + "learning_rate": 9.295269771849425e-08, + "logits/chosen": -3.246337413787842, + "logits/rejected": -3.0809082984924316, + "logps/chosen": -284.94769287109375, + "logps/rejected": -747.692626953125, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5846496820449829, + "rewards/margins": 2.974954128265381, + "rewards/rejected": -2.3903045654296875, + "step": 308 + }, + { + "epoch": 0.39, + "learning_rate": 9.289970685824775e-08, + "logits/chosen": -3.3027267456054688, + "logits/rejected": -3.013172149658203, + "logps/chosen": -253.38714599609375, + "logps/rejected": -1235.823486328125, + "loss": 0.4055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6053360104560852, + "rewards/margins": 4.089057922363281, + "rewards/rejected": -3.483721971511841, + "step": 309 + }, + { + "epoch": 0.4, + "learning_rate": 9.284653273839905e-08, + "logits/chosen": -3.251448631286621, + "logits/rejected": -3.1003293991088867, + "logps/chosen": -248.11215209960938, + "logps/rejected": -543.1563720703125, + "loss": 0.4502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6930435299873352, + "rewards/margins": 2.575369358062744, + "rewards/rejected": -1.8823257684707642, + "step": 310 + }, + { + "epoch": 0.4, + "learning_rate": 9.279317558609816e-08, + "logits/chosen": -3.2197999954223633, + "logits/rejected": -3.13232684135437, + "logps/chosen": -268.0981750488281, + "logps/rejected": -473.70172119140625, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5708023309707642, + "rewards/margins": 2.213627576828003, + "rewards/rejected": -1.6428253650665283, + "step": 311 + }, + { + "epoch": 0.4, + "learning_rate": 9.273963562927694e-08, + "logits/chosen": -3.205901861190796, + "logits/rejected": -3.0523581504821777, + "logps/chosen": -274.8895263671875, + "logps/rejected": -556.4844970703125, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6136245727539062, + "rewards/margins": 2.0882062911987305, + "rewards/rejected": -1.4745819568634033, + "step": 312 + }, + { + "epoch": 0.4, + "learning_rate": 9.26859130966482e-08, + "logits/chosen": -3.204451560974121, + "logits/rejected": -3.033909320831299, + "logps/chosen": -296.3106689453125, + "logps/rejected": -549.9437255859375, + "loss": 0.3911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7439117431640625, + "rewards/margins": 2.0873749256134033, + "rewards/rejected": -1.3434631824493408, + "step": 313 + }, + { + "epoch": 0.4, + "learning_rate": 9.26320082177046e-08, + "logits/chosen": -3.2706894874572754, + "logits/rejected": -3.1672658920288086, + "logps/chosen": -267.5731506347656, + "logps/rejected": -808.36572265625, + "loss": 0.3386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6549896001815796, + "rewards/margins": 2.970738410949707, + "rewards/rejected": -2.315748691558838, + "step": 314 + }, + { + "epoch": 0.4, + "learning_rate": 9.257792122271785e-08, + "logits/chosen": -3.252103567123413, + "logits/rejected": -3.1895341873168945, + "logps/chosen": -241.73617553710938, + "logps/rejected": -1402.5667724609375, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6730934381484985, + "rewards/margins": 5.277616500854492, + "rewards/rejected": -4.604522705078125, + "step": 315 + }, + { + "epoch": 0.4, + "learning_rate": 9.252365234273753e-08, + "logits/chosen": -3.2438769340515137, + "logits/rejected": -3.095254898071289, + "logps/chosen": -315.98101806640625, + "logps/rejected": -594.208984375, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5324249267578125, + "rewards/margins": 2.3061861991882324, + "rewards/rejected": -1.7737610340118408, + "step": 316 + }, + { + "epoch": 0.4, + "learning_rate": 9.246920180959029e-08, + "logits/chosen": -3.1549341678619385, + "logits/rejected": -2.9918808937072754, + "logps/chosen": -306.4482116699219, + "logps/rejected": -588.2877197265625, + "loss": 0.3835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6711166501045227, + "rewards/margins": 2.4525771141052246, + "rewards/rejected": -1.7814605236053467, + "step": 317 + }, + { + "epoch": 0.41, + "learning_rate": 9.241456985587868e-08, + "logits/chosen": -3.265550136566162, + "logits/rejected": -3.144221305847168, + "logps/chosen": -274.18109130859375, + "logps/rejected": -772.28271484375, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6376892328262329, + "rewards/margins": 2.923841953277588, + "rewards/rejected": -2.2861526012420654, + "step": 318 + }, + { + "epoch": 0.41, + "learning_rate": 9.23597567149803e-08, + "logits/chosen": -3.248553514480591, + "logits/rejected": -3.147860527038574, + "logps/chosen": -272.0599060058594, + "logps/rejected": -441.85028076171875, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6171371936798096, + "rewards/margins": 2.0442872047424316, + "rewards/rejected": -1.427150011062622, + "step": 319 + }, + { + "epoch": 0.41, + "learning_rate": 9.230476262104676e-08, + "logits/chosen": -3.2413864135742188, + "logits/rejected": -3.053819179534912, + "logps/chosen": -280.3955078125, + "logps/rejected": -592.1085205078125, + "loss": 0.3723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7762619256973267, + "rewards/margins": 2.704524278640747, + "rewards/rejected": -1.9282622337341309, + "step": 320 + }, + { + "epoch": 0.41, + "learning_rate": 9.224958780900263e-08, + "logits/chosen": -3.145852565765381, + "logits/rejected": -3.1692757606506348, + "logps/chosen": -312.27972412109375, + "logps/rejected": -723.71630859375, + "loss": 0.4151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6895980834960938, + "rewards/margins": 3.349482774734497, + "rewards/rejected": -2.6598846912384033, + "step": 321 + }, + { + "epoch": 0.41, + "learning_rate": 9.219423251454446e-08, + "logits/chosen": -3.254903793334961, + "logits/rejected": -3.0758070945739746, + "logps/chosen": -254.11068725585938, + "logps/rejected": -235.0104522705078, + "loss": 0.4098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6115700006484985, + "rewards/margins": 1.2590980529785156, + "rewards/rejected": -0.6475281119346619, + "step": 322 + }, + { + "epoch": 0.41, + "learning_rate": 9.213869697413986e-08, + "logits/chosen": -3.2230215072631836, + "logits/rejected": -3.0881457328796387, + "logps/chosen": -234.8556365966797, + "logps/rejected": -506.36614990234375, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5850608944892883, + "rewards/margins": 2.4922096729278564, + "rewards/rejected": -1.907148838043213, + "step": 323 + }, + { + "epoch": 0.41, + "learning_rate": 9.208298142502635e-08, + "logits/chosen": -3.1954598426818848, + "logits/rejected": -3.1014249324798584, + "logps/chosen": -273.1200866699219, + "logps/rejected": -847.0343627929688, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6407585144042969, + "rewards/margins": 2.8278801441192627, + "rewards/rejected": -2.187121629714966, + "step": 324 + }, + { + "epoch": 0.41, + "learning_rate": 9.202708610521044e-08, + "logits/chosen": -3.2204551696777344, + "logits/rejected": -3.0303120613098145, + "logps/chosen": -272.8629150390625, + "logps/rejected": -1252.6962890625, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6648895740509033, + "rewards/margins": 3.9440019130706787, + "rewards/rejected": -3.2791123390197754, + "step": 325 + }, + { + "epoch": 0.42, + "learning_rate": 9.197101125346657e-08, + "logits/chosen": -3.313735008239746, + "logits/rejected": -3.128878593444824, + "logps/chosen": -242.91046142578125, + "logps/rejected": -493.592529296875, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6176986694335938, + "rewards/margins": 2.221174716949463, + "rewards/rejected": -1.6034760475158691, + "step": 326 + }, + { + "epoch": 0.42, + "learning_rate": 9.191475710933614e-08, + "logits/chosen": -3.2060930728912354, + "logits/rejected": -3.004615306854248, + "logps/chosen": -294.5144348144531, + "logps/rejected": -475.55767822265625, + "loss": 0.3613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7048355340957642, + "rewards/margins": 2.186345100402832, + "rewards/rejected": -1.4815094470977783, + "step": 327 + }, + { + "epoch": 0.42, + "learning_rate": 9.185832391312642e-08, + "logits/chosen": -3.290862560272217, + "logits/rejected": -3.1256179809570312, + "logps/chosen": -295.43035888671875, + "logps/rejected": -808.88623046875, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6060501337051392, + "rewards/margins": 3.3082289695739746, + "rewards/rejected": -2.702178955078125, + "step": 328 + }, + { + "epoch": 0.42, + "learning_rate": 9.18017119059096e-08, + "logits/chosen": -3.187858819961548, + "logits/rejected": -3.0963521003723145, + "logps/chosen": -257.903564453125, + "logps/rejected": -988.97314453125, + "loss": 0.3632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6895378232002258, + "rewards/margins": 3.2418479919433594, + "rewards/rejected": -2.5523102283477783, + "step": 329 + }, + { + "epoch": 0.42, + "learning_rate": 9.174492132952165e-08, + "logits/chosen": -3.292555809020996, + "logits/rejected": -3.1582894325256348, + "logps/chosen": -236.30984497070312, + "logps/rejected": -640.6674194335938, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7765556573867798, + "rewards/margins": 2.9344000816345215, + "rewards/rejected": -2.1578445434570312, + "step": 330 + }, + { + "epoch": 0.42, + "learning_rate": 9.168795242656142e-08, + "logits/chosen": -3.2495319843292236, + "logits/rejected": -3.1690921783447266, + "logps/chosen": -251.51736450195312, + "logps/rejected": -556.037841796875, + "loss": 0.3135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6311584711074829, + "rewards/margins": 2.2638885974884033, + "rewards/rejected": -1.6327301263809204, + "step": 331 + }, + { + "epoch": 0.42, + "learning_rate": 9.163080544038952e-08, + "logits/chosen": -3.2200801372528076, + "logits/rejected": -2.8881630897521973, + "logps/chosen": -285.9815673828125, + "logps/rejected": -1556.1121826171875, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7121796011924744, + "rewards/margins": 4.9703521728515625, + "rewards/rejected": -4.258172512054443, + "step": 332 + }, + { + "epoch": 0.42, + "learning_rate": 9.157348061512727e-08, + "logits/chosen": -3.2644591331481934, + "logits/rejected": -3.129502296447754, + "logps/chosen": -264.4681396484375, + "logps/rejected": -486.45684814453125, + "loss": 0.351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7562118768692017, + "rewards/margins": 2.311265468597412, + "rewards/rejected": -1.5550537109375, + "step": 333 + }, + { + "epoch": 0.43, + "learning_rate": 9.15159781956557e-08, + "logits/chosen": -3.1723554134368896, + "logits/rejected": -3.079892635345459, + "logps/chosen": -232.65567016601562, + "logps/rejected": -952.301025390625, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6707184314727783, + "rewards/margins": 3.711395502090454, + "rewards/rejected": -3.040677070617676, + "step": 334 + }, + { + "epoch": 0.43, + "learning_rate": 9.145829842761452e-08, + "logits/chosen": -3.2603774070739746, + "logits/rejected": -3.0826663970947266, + "logps/chosen": -267.7445068359375, + "logps/rejected": -510.6075744628906, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6396118402481079, + "rewards/margins": 2.240884304046631, + "rewards/rejected": -1.6012725830078125, + "step": 335 + }, + { + "epoch": 0.43, + "learning_rate": 9.1400441557401e-08, + "logits/chosen": -3.2824697494506836, + "logits/rejected": -3.2019383907318115, + "logps/chosen": -251.39735412597656, + "logps/rejected": -610.2445068359375, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6602951288223267, + "rewards/margins": 3.0845627784729004, + "rewards/rejected": -2.424267530441284, + "step": 336 + }, + { + "epoch": 0.43, + "learning_rate": 9.134240783216898e-08, + "logits/chosen": -3.2278733253479004, + "logits/rejected": -3.130556344985962, + "logps/chosen": -267.3499755859375, + "logps/rejected": -564.5137329101562, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5237693786621094, + "rewards/margins": 2.3777153491973877, + "rewards/rejected": -1.8539459705352783, + "step": 337 + }, + { + "epoch": 0.43, + "learning_rate": 9.128419749982779e-08, + "logits/chosen": -3.2375214099884033, + "logits/rejected": -3.154594898223877, + "logps/chosen": -226.1439971923828, + "logps/rejected": -908.9793701171875, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6783767938613892, + "rewards/margins": 3.631735324859619, + "rewards/rejected": -2.9533586502075195, + "step": 338 + }, + { + "epoch": 0.43, + "learning_rate": 9.122581080904118e-08, + "logits/chosen": -3.208662986755371, + "logits/rejected": -3.163454055786133, + "logps/chosen": -283.7572021484375, + "logps/rejected": -425.8714904785156, + "loss": 0.3471, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5703506469726562, + "rewards/margins": 2.0210251808166504, + "rewards/rejected": -1.4506745338439941, + "step": 339 + }, + { + "epoch": 0.43, + "learning_rate": 9.116724800922628e-08, + "logits/chosen": -3.250164031982422, + "logits/rejected": -3.134016275405884, + "logps/chosen": -250.85186767578125, + "logps/rejected": -479.07574462890625, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6625908017158508, + "rewards/margins": 2.095714569091797, + "rewards/rejected": -1.4331238269805908, + "step": 340 + }, + { + "epoch": 0.43, + "learning_rate": 9.110850935055253e-08, + "logits/chosen": -3.221963405609131, + "logits/rejected": -3.1459970474243164, + "logps/chosen": -242.90928649902344, + "logps/rejected": -384.2160949707031, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7053314447402954, + "rewards/margins": 1.9731217622756958, + "rewards/rejected": -1.2677903175354004, + "step": 341 + }, + { + "epoch": 0.44, + "learning_rate": 9.10495950839406e-08, + "logits/chosen": -3.2121095657348633, + "logits/rejected": -3.1709351539611816, + "logps/chosen": -255.7861328125, + "logps/rejected": -577.662353515625, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6986839771270752, + "rewards/margins": 2.8892693519592285, + "rewards/rejected": -2.1905853748321533, + "step": 342 + }, + { + "epoch": 0.44, + "learning_rate": 9.099050546106133e-08, + "logits/chosen": -3.161777973175049, + "logits/rejected": -3.037576913833618, + "logps/chosen": -238.6705322265625, + "logps/rejected": -426.99212646484375, + "loss": 0.3677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6396888494491577, + "rewards/margins": 2.3370871543884277, + "rewards/rejected": -1.6973984241485596, + "step": 343 + }, + { + "epoch": 0.44, + "learning_rate": 9.093124073433462e-08, + "logits/chosen": -3.2438836097717285, + "logits/rejected": -3.1393826007843018, + "logps/chosen": -274.57843017578125, + "logps/rejected": -435.6207275390625, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7083526849746704, + "rewards/margins": 2.2576234340667725, + "rewards/rejected": -1.5492706298828125, + "step": 344 + }, + { + "epoch": 0.44, + "learning_rate": 9.087180115692843e-08, + "logits/chosen": -3.228778839111328, + "logits/rejected": -3.128049373626709, + "logps/chosen": -297.9307556152344, + "logps/rejected": -1289.423095703125, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5892654657363892, + "rewards/margins": 5.333732604980469, + "rewards/rejected": -4.744467258453369, + "step": 345 + }, + { + "epoch": 0.44, + "learning_rate": 9.081218698275762e-08, + "logits/chosen": -3.2750043869018555, + "logits/rejected": -3.140364170074463, + "logps/chosen": -272.1776428222656, + "logps/rejected": -589.4583740234375, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8061073422431946, + "rewards/margins": 2.8562402725219727, + "rewards/rejected": -2.0501327514648438, + "step": 346 + }, + { + "epoch": 0.44, + "learning_rate": 9.07523984664829e-08, + "logits/chosen": -3.1881537437438965, + "logits/rejected": -3.0828776359558105, + "logps/chosen": -273.57904052734375, + "logps/rejected": -886.2266235351562, + "loss": 0.3681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8570129871368408, + "rewards/margins": 3.8034210205078125, + "rewards/rejected": -2.946408271789551, + "step": 347 + }, + { + "epoch": 0.44, + "learning_rate": 9.069243586350974e-08, + "logits/chosen": -3.233619213104248, + "logits/rejected": -3.1463499069213867, + "logps/chosen": -268.69873046875, + "logps/rejected": -1057.912353515625, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8652664422988892, + "rewards/margins": 4.388008117675781, + "rewards/rejected": -3.5227417945861816, + "step": 348 + }, + { + "epoch": 0.44, + "learning_rate": 9.063229942998728e-08, + "logits/chosen": -3.201773166656494, + "logits/rejected": -3.0517044067382812, + "logps/chosen": -280.0558166503906, + "logps/rejected": -1184.35107421875, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7944732904434204, + "rewards/margins": 4.156172275543213, + "rewards/rejected": -3.361698865890503, + "step": 349 + }, + { + "epoch": 0.45, + "learning_rate": 9.057198942280721e-08, + "logits/chosen": -3.1946678161621094, + "logits/rejected": -3.1007118225097656, + "logps/chosen": -279.819580078125, + "logps/rejected": -624.7042236328125, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7872222661972046, + "rewards/margins": 2.9654359817504883, + "rewards/rejected": -2.178213596343994, + "step": 350 + }, + { + "epoch": 0.45, + "learning_rate": 9.051150609960271e-08, + "logits/chosen": -3.306736469268799, + "logits/rejected": -3.205411672592163, + "logps/chosen": -250.21063232421875, + "logps/rejected": -486.6370849609375, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7640335559844971, + "rewards/margins": 2.6880812644958496, + "rewards/rejected": -1.9240479469299316, + "step": 351 + }, + { + "epoch": 0.45, + "learning_rate": 9.045084971874737e-08, + "logits/chosen": -3.254366159439087, + "logits/rejected": -3.172635078430176, + "logps/chosen": -274.08880615234375, + "logps/rejected": -561.548095703125, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7648025751113892, + "rewards/margins": 2.6105880737304688, + "rewards/rejected": -1.8457856178283691, + "step": 352 + }, + { + "epoch": 0.45, + "learning_rate": 9.039002053935397e-08, + "logits/chosen": -3.2500243186950684, + "logits/rejected": -3.161827564239502, + "logps/chosen": -252.07200622558594, + "logps/rejected": -584.634521484375, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7880523800849915, + "rewards/margins": 2.8441529273986816, + "rewards/rejected": -2.056100368499756, + "step": 353 + }, + { + "epoch": 0.45, + "learning_rate": 9.032901882127352e-08, + "logits/chosen": -3.2823591232299805, + "logits/rejected": -3.089869976043701, + "logps/chosen": -272.52197265625, + "logps/rejected": -571.5909423828125, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.722364068031311, + "rewards/margins": 2.591040849685669, + "rewards/rejected": -1.8686769008636475, + "step": 354 + }, + { + "epoch": 0.45, + "learning_rate": 9.026784482509407e-08, + "logits/chosen": -3.283113956451416, + "logits/rejected": -3.150090217590332, + "logps/chosen": -274.990966796875, + "logps/rejected": -678.2208862304688, + "loss": 0.3169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.714006781578064, + "rewards/margins": 3.372347354888916, + "rewards/rejected": -2.6583404541015625, + "step": 355 + }, + { + "epoch": 0.45, + "learning_rate": 9.020649881213957e-08, + "logits/chosen": -3.202235698699951, + "logits/rejected": -3.0628156661987305, + "logps/chosen": -265.34429931640625, + "logps/rejected": -1168.0623779296875, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5895141363143921, + "rewards/margins": 3.55438232421875, + "rewards/rejected": -2.9648680686950684, + "step": 356 + }, + { + "epoch": 0.46, + "learning_rate": 9.014498104446886e-08, + "logits/chosen": -3.253669261932373, + "logits/rejected": -3.0914199352264404, + "logps/chosen": -228.3657989501953, + "logps/rejected": -785.8195190429688, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5857002139091492, + "rewards/margins": 2.9124932289123535, + "rewards/rejected": -2.3267929553985596, + "step": 357 + }, + { + "epoch": 0.46, + "learning_rate": 9.008329178487441e-08, + "logits/chosen": -3.2415831089019775, + "logits/rejected": -3.168215751647949, + "logps/chosen": -241.8489227294922, + "logps/rejected": -837.0526733398438, + "loss": 0.3129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7465484738349915, + "rewards/margins": 3.5373566150665283, + "rewards/rejected": -2.7908082008361816, + "step": 358 + }, + { + "epoch": 0.46, + "learning_rate": 9.002143129688137e-08, + "logits/chosen": -3.258683919906616, + "logits/rejected": -3.1265830993652344, + "logps/chosen": -275.4127197265625, + "logps/rejected": -673.54150390625, + "loss": 0.3805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5652145147323608, + "rewards/margins": 3.046818733215332, + "rewards/rejected": -2.4816040992736816, + "step": 359 + }, + { + "epoch": 0.46, + "learning_rate": 8.995939984474623e-08, + "logits/chosen": -3.170380115509033, + "logits/rejected": -3.122373104095459, + "logps/chosen": -271.25830078125, + "logps/rejected": -543.86279296875, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7795417904853821, + "rewards/margins": 2.4630424976348877, + "rewards/rejected": -1.6835006475448608, + "step": 360 + }, + { + "epoch": 0.46, + "learning_rate": 8.98971976934559e-08, + "logits/chosen": -3.2561697959899902, + "logits/rejected": -3.1546802520751953, + "logps/chosen": -216.69970703125, + "logps/rejected": -967.0401000976562, + "loss": 0.3302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6322014331817627, + "rewards/margins": 4.536375999450684, + "rewards/rejected": -3.9041748046875, + "step": 361 + }, + { + "epoch": 0.46, + "learning_rate": 8.983482510872644e-08, + "logits/chosen": -3.311954975128174, + "logits/rejected": -3.159938097000122, + "logps/chosen": -258.8069152832031, + "logps/rejected": -383.44647216796875, + "loss": 0.4061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5689567923545837, + "rewards/margins": 2.1611838340759277, + "rewards/rejected": -1.5922272205352783, + "step": 362 + }, + { + "epoch": 0.46, + "learning_rate": 8.977228235700196e-08, + "logits/chosen": -3.2919466495513916, + "logits/rejected": -3.1638612747192383, + "logps/chosen": -259.2259216308594, + "logps/rejected": -701.90771484375, + "loss": 0.3262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7513107061386108, + "rewards/margins": 3.834373712539673, + "rewards/rejected": -3.0830628871917725, + "step": 363 + }, + { + "epoch": 0.46, + "learning_rate": 8.970956970545355e-08, + "logits/chosen": -3.316131591796875, + "logits/rejected": -3.1254191398620605, + "logps/chosen": -258.08892822265625, + "logps/rejected": -693.6861572265625, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8119560480117798, + "rewards/margins": 2.9017724990844727, + "rewards/rejected": -2.0898163318634033, + "step": 364 + }, + { + "epoch": 0.47, + "learning_rate": 8.964668742197801e-08, + "logits/chosen": -3.2973670959472656, + "logits/rejected": -3.1541295051574707, + "logps/chosen": -283.81378173828125, + "logps/rejected": -715.1702880859375, + "loss": 0.3282, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6549209356307983, + "rewards/margins": 3.7252488136291504, + "rewards/rejected": -3.0703277587890625, + "step": 365 + }, + { + "epoch": 0.47, + "learning_rate": 8.958363577519683e-08, + "logits/chosen": -3.175367832183838, + "logits/rejected": -3.1230340003967285, + "logps/chosen": -242.11114501953125, + "logps/rejected": -366.45068359375, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6403770446777344, + "rewards/margins": 1.740139126777649, + "rewards/rejected": -1.099761962890625, + "step": 366 + }, + { + "epoch": 0.47, + "learning_rate": 8.952041503445495e-08, + "logits/chosen": -3.3067853450775146, + "logits/rejected": -3.185628652572632, + "logps/chosen": -260.015869140625, + "logps/rejected": -429.3157958984375, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7146240472793579, + "rewards/margins": 2.226742744445801, + "rewards/rejected": -1.5121185779571533, + "step": 367 + }, + { + "epoch": 0.47, + "learning_rate": 8.945702546981968e-08, + "logits/chosen": -3.2697033882141113, + "logits/rejected": -3.04641056060791, + "logps/chosen": -261.62322998046875, + "logps/rejected": -475.36614990234375, + "loss": 0.3443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6866836547851562, + "rewards/margins": 2.1325149536132812, + "rewards/rejected": -1.445831298828125, + "step": 368 + }, + { + "epoch": 0.47, + "learning_rate": 8.939346735207949e-08, + "logits/chosen": -3.2470741271972656, + "logits/rejected": -3.1911964416503906, + "logps/chosen": -252.5916748046875, + "logps/rejected": -800.461181640625, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6090583801269531, + "rewards/margins": 3.3670663833618164, + "rewards/rejected": -2.7580080032348633, + "step": 369 + }, + { + "epoch": 0.47, + "learning_rate": 8.932974095274289e-08, + "logits/chosen": -3.2645468711853027, + "logits/rejected": -3.0798678398132324, + "logps/chosen": -248.63168334960938, + "logps/rejected": -257.1418151855469, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9021705389022827, + "rewards/margins": 1.678198218345642, + "rewards/rejected": -0.7760276794433594, + "step": 370 + }, + { + "epoch": 0.47, + "learning_rate": 8.926584654403724e-08, + "logits/chosen": -3.2356948852539062, + "logits/rejected": -3.1394052505493164, + "logps/chosen": -285.9565124511719, + "logps/rejected": -389.1326904296875, + "loss": 0.3614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6310707330703735, + "rewards/margins": 1.823662519454956, + "rewards/rejected": -1.192591905593872, + "step": 371 + }, + { + "epoch": 0.47, + "learning_rate": 8.920178439890764e-08, + "logits/chosen": -3.2264113426208496, + "logits/rejected": -2.973222255706787, + "logps/chosen": -299.836669921875, + "logps/rejected": -313.46435546875, + "loss": 0.3956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.698742687702179, + "rewards/margins": 1.6632156372070312, + "rewards/rejected": -0.9644730091094971, + "step": 372 + }, + { + "epoch": 0.48, + "learning_rate": 8.913755479101572e-08, + "logits/chosen": -3.2552874088287354, + "logits/rejected": -2.9966814517974854, + "logps/chosen": -247.4773712158203, + "logps/rejected": -1204.779296875, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7345443964004517, + "rewards/margins": 3.8878679275512695, + "rewards/rejected": -3.1533234119415283, + "step": 373 + }, + { + "epoch": 0.48, + "learning_rate": 8.907315799473844e-08, + "logits/chosen": -3.1904778480529785, + "logits/rejected": -3.0696866512298584, + "logps/chosen": -258.1582946777344, + "logps/rejected": -363.6434631347656, + "loss": 0.3577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6831604242324829, + "rewards/margins": 1.7633682489395142, + "rewards/rejected": -1.0802078247070312, + "step": 374 + }, + { + "epoch": 0.48, + "learning_rate": 8.9008594285167e-08, + "logits/chosen": -3.2329232692718506, + "logits/rejected": -3.141707420349121, + "logps/chosen": -290.6908874511719, + "logps/rejected": -539.234130859375, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4645484983921051, + "rewards/margins": 2.643012285232544, + "rewards/rejected": -2.1784636974334717, + "step": 375 + }, + { + "epoch": 0.48, + "learning_rate": 8.894386393810562e-08, + "logits/chosen": -3.233431816101074, + "logits/rejected": -3.1377789974212646, + "logps/chosen": -302.5478515625, + "logps/rejected": -762.236328125, + "loss": 0.3402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6519393920898438, + "rewards/margins": 3.4178879261016846, + "rewards/rejected": -2.765948534011841, + "step": 376 + }, + { + "epoch": 0.48, + "learning_rate": 8.887896723007033e-08, + "logits/chosen": -3.2595200538635254, + "logits/rejected": -3.0760498046875, + "logps/chosen": -282.3601379394531, + "logps/rejected": -899.5616455078125, + "loss": 0.4069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9290809631347656, + "rewards/margins": 3.590940237045288, + "rewards/rejected": -2.6618592739105225, + "step": 377 + }, + { + "epoch": 0.48, + "learning_rate": 8.881390443828787e-08, + "logits/chosen": -3.2369799613952637, + "logits/rejected": -3.116346836090088, + "logps/chosen": -289.80364990234375, + "logps/rejected": -695.4540405273438, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9589188098907471, + "rewards/margins": 3.6912734508514404, + "rewards/rejected": -2.7323546409606934, + "step": 378 + }, + { + "epoch": 0.48, + "learning_rate": 8.874867584069441e-08, + "logits/chosen": -3.1951563358306885, + "logits/rejected": -3.151726007461548, + "logps/chosen": -257.6458435058594, + "logps/rejected": -1006.23095703125, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7394927740097046, + "rewards/margins": 4.912075996398926, + "rewards/rejected": -4.172583103179932, + "step": 379 + }, + { + "epoch": 0.48, + "learning_rate": 8.868328171593447e-08, + "logits/chosen": -3.2690272331237793, + "logits/rejected": -3.088817596435547, + "logps/chosen": -282.78021240234375, + "logps/rejected": -312.19378662109375, + "loss": 0.4177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4816536009311676, + "rewards/margins": 1.1630561351776123, + "rewards/rejected": -0.6814025640487671, + "step": 380 + }, + { + "epoch": 0.49, + "learning_rate": 8.861772234335962e-08, + "logits/chosen": -3.2217984199523926, + "logits/rejected": -3.057983875274658, + "logps/chosen": -287.53875732421875, + "logps/rejected": -893.9835205078125, + "loss": 0.3343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6464775204658508, + "rewards/margins": 3.4407951831817627, + "rewards/rejected": -2.7943177223205566, + "step": 381 + }, + { + "epoch": 0.49, + "learning_rate": 8.855199800302735e-08, + "logits/chosen": -3.193746566772461, + "logits/rejected": -3.1469926834106445, + "logps/chosen": -277.6173095703125, + "logps/rejected": -671.5110473632812, + "loss": 0.376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.615155041217804, + "rewards/margins": 3.3981597423553467, + "rewards/rejected": -2.7830047607421875, + "step": 382 + }, + { + "epoch": 0.49, + "learning_rate": 8.84861089756999e-08, + "logits/chosen": -3.293480634689331, + "logits/rejected": -3.110339403152466, + "logps/chosen": -262.4740905761719, + "logps/rejected": -649.0152587890625, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7656593322753906, + "rewards/margins": 3.0081918239593506, + "rewards/rejected": -2.242532253265381, + "step": 383 + }, + { + "epoch": 0.49, + "learning_rate": 8.842005554284295e-08, + "logits/chosen": -3.263068675994873, + "logits/rejected": -3.1245908737182617, + "logps/chosen": -277.92352294921875, + "logps/rejected": -450.91259765625, + "loss": 0.378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7800827026367188, + "rewards/margins": 2.460897922515869, + "rewards/rejected": -1.6808151006698608, + "step": 384 + }, + { + "epoch": 0.49, + "learning_rate": 8.835383798662456e-08, + "logits/chosen": -3.172696113586426, + "logits/rejected": -3.149498701095581, + "logps/chosen": -238.232177734375, + "logps/rejected": -1149.947021484375, + "loss": 0.3543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7311950922012329, + "rewards/margins": 4.70806884765625, + "rewards/rejected": -3.9768738746643066, + "step": 385 + }, + { + "epoch": 0.49, + "learning_rate": 8.828745658991386e-08, + "logits/chosen": -3.2159175872802734, + "logits/rejected": -2.934990644454956, + "logps/chosen": -271.5195007324219, + "logps/rejected": -1335.650390625, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5548553466796875, + "rewards/margins": 4.450497627258301, + "rewards/rejected": -3.895642042160034, + "step": 386 + }, + { + "epoch": 0.49, + "learning_rate": 8.822091163627987e-08, + "logits/chosen": -3.2037734985351562, + "logits/rejected": -3.1010403633117676, + "logps/chosen": -277.73260498046875, + "logps/rejected": -671.900634765625, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7989227771759033, + "rewards/margins": 3.2533812522888184, + "rewards/rejected": -2.454458713531494, + "step": 387 + }, + { + "epoch": 0.49, + "learning_rate": 8.815420340999033e-08, + "logits/chosen": -3.2585811614990234, + "logits/rejected": -3.2448368072509766, + "logps/chosen": -234.76406860351562, + "logps/rejected": -635.4046630859375, + "loss": 0.321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7009170651435852, + "rewards/margins": 3.1820449829101562, + "rewards/rejected": -2.481127977371216, + "step": 388 + }, + { + "epoch": 0.5, + "learning_rate": 8.80873321960104e-08, + "logits/chosen": -3.238550901412964, + "logits/rejected": -3.1430583000183105, + "logps/chosen": -261.35150146484375, + "logps/rejected": -595.8668212890625, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7972900867462158, + "rewards/margins": 2.975978136062622, + "rewards/rejected": -2.1786880493164062, + "step": 389 + }, + { + "epoch": 0.5, + "learning_rate": 8.802029828000155e-08, + "logits/chosen": -3.2676141262054443, + "logits/rejected": -3.0394110679626465, + "logps/chosen": -255.61744689941406, + "logps/rejected": -431.64898681640625, + "loss": 0.3363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7835479974746704, + "rewards/margins": 2.409100294113159, + "rewards/rejected": -1.6255524158477783, + "step": 390 + }, + { + "epoch": 0.5, + "learning_rate": 8.795310194832022e-08, + "logits/chosen": -3.1951351165771484, + "logits/rejected": -3.157233238220215, + "logps/chosen": -256.45703125, + "logps/rejected": -4088.7607421875, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7195777893066406, + "rewards/margins": 5.816611289978027, + "rewards/rejected": -5.097033500671387, + "step": 391 + }, + { + "epoch": 0.5, + "learning_rate": 8.788574348801674e-08, + "logits/chosen": -3.2021045684814453, + "logits/rejected": -3.1109941005706787, + "logps/chosen": -270.5849914550781, + "logps/rejected": -662.908203125, + "loss": 0.3536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.679155707359314, + "rewards/margins": 3.3490257263183594, + "rewards/rejected": -2.669869899749756, + "step": 392 + }, + { + "epoch": 0.5, + "learning_rate": 8.781822318683392e-08, + "logits/chosen": -3.223463535308838, + "logits/rejected": -3.159191846847534, + "logps/chosen": -250.5804443359375, + "logps/rejected": -807.8071899414062, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.778912365436554, + "rewards/margins": 3.6663331985473633, + "rewards/rejected": -2.887420654296875, + "step": 393 + }, + { + "epoch": 0.5, + "learning_rate": 8.775054133320602e-08, + "logits/chosen": -3.260546922683716, + "logits/rejected": -3.1004228591918945, + "logps/chosen": -295.9278564453125, + "logps/rejected": -306.4621887207031, + "loss": 0.3794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7646774053573608, + "rewards/margins": 1.7197647094726562, + "rewards/rejected": -0.9550873041152954, + "step": 394 + }, + { + "epoch": 0.5, + "learning_rate": 8.76826982162574e-08, + "logits/chosen": -3.2675275802612305, + "logits/rejected": -3.1566643714904785, + "logps/chosen": -277.8262634277344, + "logps/rejected": -616.3759155273438, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7742385864257812, + "rewards/margins": 3.1138110160827637, + "rewards/rejected": -2.3395721912384033, + "step": 395 + }, + { + "epoch": 0.5, + "learning_rate": 8.761469412580124e-08, + "logits/chosen": -3.203915596008301, + "logits/rejected": -3.101022958755493, + "logps/chosen": -268.7257080078125, + "logps/rejected": -527.9718017578125, + "loss": 0.3851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7460479736328125, + "rewards/margins": 2.727832078933716, + "rewards/rejected": -1.9817841053009033, + "step": 396 + }, + { + "epoch": 0.51, + "learning_rate": 8.754652935233844e-08, + "logits/chosen": -3.227212905883789, + "logits/rejected": -3.1370558738708496, + "logps/chosen": -270.0761413574219, + "logps/rejected": -548.3507080078125, + "loss": 0.3239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7508560419082642, + "rewards/margins": 2.684720039367676, + "rewards/rejected": -1.933863878250122, + "step": 397 + }, + { + "epoch": 0.51, + "learning_rate": 8.74782041870563e-08, + "logits/chosen": -3.2344765663146973, + "logits/rejected": -3.1797146797180176, + "logps/chosen": -266.8138122558594, + "logps/rejected": -517.9188842773438, + "loss": 0.3407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7181457281112671, + "rewards/margins": 2.8009369373321533, + "rewards/rejected": -2.0827910900115967, + "step": 398 + }, + { + "epoch": 0.51, + "learning_rate": 8.740971892182726e-08, + "logits/chosen": -3.241455078125, + "logits/rejected": -3.0925493240356445, + "logps/chosen": -289.90264892578125, + "logps/rejected": -419.2547302246094, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7414230108261108, + "rewards/margins": 2.5156166553497314, + "rewards/rejected": -1.7741936445236206, + "step": 399 + }, + { + "epoch": 0.51, + "learning_rate": 8.734107384920769e-08, + "logits/chosen": -3.2137584686279297, + "logits/rejected": -3.07206130027771, + "logps/chosen": -269.9283142089844, + "logps/rejected": -857.8602905273438, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8433784246444702, + "rewards/margins": 3.9186806678771973, + "rewards/rejected": -3.0753021240234375, + "step": 400 + }, + { + "epoch": 0.51, + "learning_rate": 8.727226926243663e-08, + "logits/chosen": -3.2742819786071777, + "logits/rejected": -3.1214187145233154, + "logps/chosen": -267.8173522949219, + "logps/rejected": -416.2884521484375, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.82080078125, + "rewards/margins": 2.200404405593872, + "rewards/rejected": -1.379603624343872, + "step": 401 + }, + { + "epoch": 0.51, + "learning_rate": 8.720330545543453e-08, + "logits/chosen": -3.2363996505737305, + "logits/rejected": -3.1871256828308105, + "logps/chosen": -280.8053283691406, + "logps/rejected": -716.689453125, + "loss": 0.3871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6497681140899658, + "rewards/margins": 3.356872797012329, + "rewards/rejected": -2.7071046829223633, + "step": 402 + }, + { + "epoch": 0.51, + "learning_rate": 8.7134182722802e-08, + "logits/chosen": -3.166591167449951, + "logits/rejected": -3.0851545333862305, + "logps/chosen": -297.1426696777344, + "logps/rejected": -469.15509033203125, + "loss": 0.3756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0726372003555298, + "rewards/margins": 2.7956888675689697, + "rewards/rejected": -1.7230515480041504, + "step": 403 + }, + { + "epoch": 0.51, + "learning_rate": 8.706490135981854e-08, + "logits/chosen": -3.194702625274658, + "logits/rejected": -3.046095848083496, + "logps/chosen": -269.4964904785156, + "logps/rejected": -196.99508666992188, + "loss": 0.4329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8028373718261719, + "rewards/margins": 1.2806214094161987, + "rewards/rejected": -0.4777839779853821, + "step": 404 + }, + { + "epoch": 0.52, + "learning_rate": 8.699546166244132e-08, + "logits/chosen": -3.206688165664673, + "logits/rejected": -3.1852996349334717, + "logps/chosen": -263.84613037109375, + "logps/rejected": -408.62249755859375, + "loss": 0.3563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7198097705841064, + "rewards/margins": 2.4317498207092285, + "rewards/rejected": -1.711940050125122, + "step": 405 + }, + { + "epoch": 0.52, + "learning_rate": 8.692586392730385e-08, + "logits/chosen": -3.266043186187744, + "logits/rejected": -3.1609995365142822, + "logps/chosen": -266.19720458984375, + "logps/rejected": -401.39105224609375, + "loss": 0.3585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7991256713867188, + "rewards/margins": 2.271409511566162, + "rewards/rejected": -1.472283959388733, + "step": 406 + }, + { + "epoch": 0.52, + "learning_rate": 8.685610845171478e-08, + "logits/chosen": -3.2206578254699707, + "logits/rejected": -3.131000518798828, + "logps/chosen": -284.8380126953125, + "logps/rejected": -855.0966796875, + "loss": 0.3213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7897232174873352, + "rewards/margins": 3.8959274291992188, + "rewards/rejected": -3.1062042713165283, + "step": 407 + }, + { + "epoch": 0.52, + "learning_rate": 8.678619553365658e-08, + "logits/chosen": -3.2543959617614746, + "logits/rejected": -3.08050537109375, + "logps/chosen": -270.50128173828125, + "logps/rejected": -411.5853271484375, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8454971313476562, + "rewards/margins": 2.3753814697265625, + "rewards/rejected": -1.5298843383789062, + "step": 408 + }, + { + "epoch": 0.52, + "learning_rate": 8.671612547178427e-08, + "logits/chosen": -3.2957241535186768, + "logits/rejected": -3.1010823249816895, + "logps/chosen": -264.9224548339844, + "logps/rejected": -495.974853515625, + "loss": 0.3635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9228675365447998, + "rewards/margins": 2.7172446250915527, + "rewards/rejected": -1.794377088546753, + "step": 409 + }, + { + "epoch": 0.52, + "learning_rate": 8.664589856542419e-08, + "logits/chosen": -3.19264554977417, + "logits/rejected": -3.0796408653259277, + "logps/chosen": -301.4252624511719, + "logps/rejected": -863.4136352539062, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.80950927734375, + "rewards/margins": 4.042944431304932, + "rewards/rejected": -3.2334351539611816, + "step": 410 + }, + { + "epoch": 0.52, + "learning_rate": 8.657551511457269e-08, + "logits/chosen": -3.2479875087738037, + "logits/rejected": -3.117178440093994, + "logps/chosen": -302.50244140625, + "logps/rejected": -1199.9393310546875, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7922073602676392, + "rewards/margins": 5.001304626464844, + "rewards/rejected": -4.209097385406494, + "step": 411 + }, + { + "epoch": 0.53, + "learning_rate": 8.650497541989481e-08, + "logits/chosen": -3.2609214782714844, + "logits/rejected": -3.132622241973877, + "logps/chosen": -288.5090637207031, + "logps/rejected": -390.28680419921875, + "loss": 0.4131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6833893060684204, + "rewards/margins": 2.046048164367676, + "rewards/rejected": -1.3626587390899658, + "step": 412 + }, + { + "epoch": 0.53, + "learning_rate": 8.643427978272307e-08, + "logits/chosen": -3.205841064453125, + "logits/rejected": -3.047661781311035, + "logps/chosen": -256.7186584472656, + "logps/rejected": -363.6689453125, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7278534173965454, + "rewards/margins": 1.9034538269042969, + "rewards/rejected": -1.1756004095077515, + "step": 413 + }, + { + "epoch": 0.53, + "learning_rate": 8.636342850505615e-08, + "logits/chosen": -3.2487196922302246, + "logits/rejected": -3.0837626457214355, + "logps/chosen": -255.2008056640625, + "logps/rejected": -999.067138671875, + "loss": 0.3655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9413070678710938, + "rewards/margins": 3.689776659011841, + "rewards/rejected": -2.748469591140747, + "step": 414 + }, + { + "epoch": 0.53, + "learning_rate": 8.629242188955757e-08, + "logits/chosen": -3.2341248989105225, + "logits/rejected": -3.0866165161132812, + "logps/chosen": -243.77304077148438, + "logps/rejected": -554.971923828125, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8338203430175781, + "rewards/margins": 3.057842254638672, + "rewards/rejected": -2.2240219116210938, + "step": 415 + }, + { + "epoch": 0.53, + "learning_rate": 8.622126023955445e-08, + "logits/chosen": -3.238546371459961, + "logits/rejected": -3.091050624847412, + "logps/chosen": -236.2622528076172, + "logps/rejected": -467.99566650390625, + "loss": 0.3225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7882606983184814, + "rewards/margins": 2.3601479530334473, + "rewards/rejected": -1.5718872547149658, + "step": 416 + }, + { + "epoch": 0.53, + "learning_rate": 8.614994385903616e-08, + "logits/chosen": -3.2050018310546875, + "logits/rejected": -3.0946083068847656, + "logps/chosen": -305.7296142578125, + "logps/rejected": -901.577392578125, + "loss": 0.3304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7755997180938721, + "rewards/margins": 3.6424193382263184, + "rewards/rejected": -2.8668198585510254, + "step": 417 + }, + { + "epoch": 0.53, + "learning_rate": 8.60784730526531e-08, + "logits/chosen": -3.238826274871826, + "logits/rejected": -3.168844223022461, + "logps/chosen": -284.062744140625, + "logps/rejected": -604.8909912109375, + "loss": 0.3629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7170898914337158, + "rewards/margins": 3.128391981124878, + "rewards/rejected": -2.411302089691162, + "step": 418 + }, + { + "epoch": 0.53, + "learning_rate": 8.600684812571531e-08, + "logits/chosen": -3.2228927612304688, + "logits/rejected": -3.0462446212768555, + "logps/chosen": -247.1304931640625, + "logps/rejected": -1464.03173828125, + "loss": 0.3466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8536087274551392, + "rewards/margins": 4.803727626800537, + "rewards/rejected": -3.9501190185546875, + "step": 419 + }, + { + "epoch": 0.54, + "learning_rate": 8.593506938419119e-08, + "logits/chosen": -3.1613121032714844, + "logits/rejected": -3.073786497116089, + "logps/chosen": -249.16342163085938, + "logps/rejected": -573.100830078125, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8730926513671875, + "rewards/margins": 2.980360507965088, + "rewards/rejected": -2.1072678565979004, + "step": 420 + }, + { + "epoch": 0.54, + "learning_rate": 8.586313713470626e-08, + "logits/chosen": -3.252687931060791, + "logits/rejected": -3.1467041969299316, + "logps/chosen": -259.2896423339844, + "logps/rejected": -660.0703735351562, + "loss": 0.3247, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9007790088653564, + "rewards/margins": 3.8071603775024414, + "rewards/rejected": -2.906381368637085, + "step": 421 + }, + { + "epoch": 0.54, + "learning_rate": 8.579105168454172e-08, + "logits/chosen": -3.294645309448242, + "logits/rejected": -3.127978801727295, + "logps/chosen": -287.02484130859375, + "logps/rejected": -655.2437744140625, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5608078241348267, + "rewards/margins": 2.878159999847412, + "rewards/rejected": -2.317352294921875, + "step": 422 + }, + { + "epoch": 0.54, + "learning_rate": 8.571881334163332e-08, + "logits/chosen": -3.227839469909668, + "logits/rejected": -3.1543753147125244, + "logps/chosen": -232.31195068359375, + "logps/rejected": -724.9199829101562, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6227134466171265, + "rewards/margins": 3.212893009185791, + "rewards/rejected": -2.590179443359375, + "step": 423 + }, + { + "epoch": 0.54, + "learning_rate": 8.564642241456985e-08, + "logits/chosen": -3.254117012023926, + "logits/rejected": -3.0684051513671875, + "logps/chosen": -270.421875, + "logps/rejected": -514.4871826171875, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8487594723701477, + "rewards/margins": 2.5126266479492188, + "rewards/rejected": -1.6638672351837158, + "step": 424 + }, + { + "epoch": 0.54, + "learning_rate": 8.557387921259195e-08, + "logits/chosen": -3.196629524230957, + "logits/rejected": -3.104896068572998, + "logps/chosen": -263.36181640625, + "logps/rejected": -868.3273315429688, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7430130243301392, + "rewards/margins": 3.576608180999756, + "rewards/rejected": -2.8335952758789062, + "step": 425 + }, + { + "epoch": 0.54, + "learning_rate": 8.550118404559074e-08, + "logits/chosen": -3.1540639400482178, + "logits/rejected": -3.09812593460083, + "logps/chosen": -280.1803894042969, + "logps/rejected": -572.5153198242188, + "loss": 0.3261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8260093927383423, + "rewards/margins": 3.010540008544922, + "rewards/rejected": -2.184530735015869, + "step": 426 + }, + { + "epoch": 0.54, + "learning_rate": 8.542833722410649e-08, + "logits/chosen": -3.285837411880493, + "logits/rejected": -3.1072425842285156, + "logps/chosen": -274.2689208984375, + "logps/rejected": -897.6898803710938, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8566345572471619, + "rewards/margins": 3.90101957321167, + "rewards/rejected": -3.0443849563598633, + "step": 427 + }, + { + "epoch": 0.55, + "learning_rate": 8.535533905932736e-08, + "logits/chosen": -3.206382989883423, + "logits/rejected": -3.1535534858703613, + "logps/chosen": -296.6357727050781, + "logps/rejected": -632.74169921875, + "loss": 0.4031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8454002737998962, + "rewards/margins": 3.2725729942321777, + "rewards/rejected": -2.427172899246216, + "step": 428 + }, + { + "epoch": 0.55, + "learning_rate": 8.5282189863088e-08, + "logits/chosen": -3.1793198585510254, + "logits/rejected": -3.129317283630371, + "logps/chosen": -244.85064697265625, + "logps/rejected": -706.5179443359375, + "loss": 0.2798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7417747378349304, + "rewards/margins": 3.760221004486084, + "rewards/rejected": -3.018446445465088, + "step": 429 + }, + { + "epoch": 0.55, + "learning_rate": 8.52088899478682e-08, + "logits/chosen": -3.2473902702331543, + "logits/rejected": -3.1077284812927246, + "logps/chosen": -259.7874450683594, + "logps/rejected": -604.6925048828125, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9414176940917969, + "rewards/margins": 2.93373966217041, + "rewards/rejected": -1.9923218488693237, + "step": 430 + }, + { + "epoch": 0.55, + "learning_rate": 8.513543962679161e-08, + "logits/chosen": -3.197272539138794, + "logits/rejected": -3.1023566722869873, + "logps/chosen": -283.84478759765625, + "logps/rejected": -1101.611572265625, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8700027465820312, + "rewards/margins": 4.090762138366699, + "rewards/rejected": -3.220759630203247, + "step": 431 + }, + { + "epoch": 0.55, + "learning_rate": 8.506183921362442e-08, + "logits/chosen": -3.1974639892578125, + "logits/rejected": -3.0022056102752686, + "logps/chosen": -253.10504150390625, + "logps/rejected": -322.11474609375, + "loss": 0.3449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8072960376739502, + "rewards/margins": 1.849646806716919, + "rewards/rejected": -1.0423507690429688, + "step": 432 + }, + { + "epoch": 0.55, + "learning_rate": 8.498808902277397e-08, + "logits/chosen": -3.2275032997131348, + "logits/rejected": -3.1213772296905518, + "logps/chosen": -254.1715087890625, + "logps/rejected": -511.87322998046875, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9334220886230469, + "rewards/margins": 2.9389641284942627, + "rewards/rejected": -2.005542039871216, + "step": 433 + }, + { + "epoch": 0.55, + "learning_rate": 8.491418936928741e-08, + "logits/chosen": -3.2292819023132324, + "logits/rejected": -2.919489860534668, + "logps/chosen": -261.2859191894531, + "logps/rejected": -947.7154541015625, + "loss": 0.3656, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8181701898574829, + "rewards/margins": 3.444366455078125, + "rewards/rejected": -2.6261963844299316, + "step": 434 + }, + { + "epoch": 0.55, + "learning_rate": 8.484014056885038e-08, + "logits/chosen": -3.2828025817871094, + "logits/rejected": -3.099106550216675, + "logps/chosen": -261.2425842285156, + "logps/rejected": -1636.424560546875, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6553581357002258, + "rewards/margins": 6.446419715881348, + "rewards/rejected": -5.7910614013671875, + "step": 435 + }, + { + "epoch": 0.56, + "learning_rate": 8.47659429377856e-08, + "logits/chosen": -3.2147057056427, + "logits/rejected": -3.070646286010742, + "logps/chosen": -269.6820068359375, + "logps/rejected": -480.5264892578125, + "loss": 0.3678, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9617859125137329, + "rewards/margins": 2.738739013671875, + "rewards/rejected": -1.776953101158142, + "step": 436 + }, + { + "epoch": 0.56, + "learning_rate": 8.469159679305166e-08, + "logits/chosen": -3.1902236938476562, + "logits/rejected": -3.058621883392334, + "logps/chosen": -313.3646240234375, + "logps/rejected": -1541.30810546875, + "loss": 0.3596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6860015988349915, + "rewards/margins": 5.736645698547363, + "rewards/rejected": -5.0506439208984375, + "step": 437 + }, + { + "epoch": 0.56, + "learning_rate": 8.461710245224147e-08, + "logits/chosen": -3.2143163681030273, + "logits/rejected": -3.1018097400665283, + "logps/chosen": -282.0649108886719, + "logps/rejected": -563.9544677734375, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.87744140625, + "rewards/margins": 3.1633238792419434, + "rewards/rejected": -2.2858827114105225, + "step": 438 + }, + { + "epoch": 0.56, + "learning_rate": 8.454246023358112e-08, + "logits/chosen": -3.2391562461853027, + "logits/rejected": -3.1246042251586914, + "logps/chosen": -298.03753662109375, + "logps/rejected": -813.7003173828125, + "loss": 0.3456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8310104608535767, + "rewards/margins": 4.024688720703125, + "rewards/rejected": -3.193678379058838, + "step": 439 + }, + { + "epoch": 0.56, + "learning_rate": 8.446767045592829e-08, + "logits/chosen": -3.219405174255371, + "logits/rejected": -3.1657209396362305, + "logps/chosen": -293.6539611816406, + "logps/rejected": -582.5520629882812, + "loss": 0.3532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7351264953613281, + "rewards/margins": 2.773043155670166, + "rewards/rejected": -2.037916660308838, + "step": 440 + }, + { + "epoch": 0.56, + "learning_rate": 8.439273343877108e-08, + "logits/chosen": -3.2989501953125, + "logits/rejected": -3.0345401763916016, + "logps/chosen": -258.2314147949219, + "logps/rejected": -1288.0616455078125, + "loss": 0.3293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8184105157852173, + "rewards/margins": 5.289821624755859, + "rewards/rejected": -4.471411228179932, + "step": 441 + }, + { + "epoch": 0.56, + "learning_rate": 8.431764950222655e-08, + "logits/chosen": -3.2187578678131104, + "logits/rejected": -3.132005453109741, + "logps/chosen": -255.35928344726562, + "logps/rejected": -824.1069946289062, + "loss": 0.3185, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7944900989532471, + "rewards/margins": 3.626194715499878, + "rewards/rejected": -2.831704616546631, + "step": 442 + }, + { + "epoch": 0.56, + "learning_rate": 8.424241896703935e-08, + "logits/chosen": -3.2637572288513184, + "logits/rejected": -3.08433198928833, + "logps/chosen": -249.485595703125, + "logps/rejected": -642.3795166015625, + "loss": 0.3406, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6987549066543579, + "rewards/margins": 3.5568253993988037, + "rewards/rejected": -2.8580703735351562, + "step": 443 + }, + { + "epoch": 0.57, + "learning_rate": 8.416704215458041e-08, + "logits/chosen": -3.2099385261535645, + "logits/rejected": -3.082271099090576, + "logps/chosen": -303.0394592285156, + "logps/rejected": -408.7459411621094, + "loss": 0.342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8746589422225952, + "rewards/margins": 2.4815988540649414, + "rewards/rejected": -1.6069397926330566, + "step": 444 + }, + { + "epoch": 0.57, + "learning_rate": 8.409151938684553e-08, + "logits/chosen": -3.2044286727905273, + "logits/rejected": -3.0380234718322754, + "logps/chosen": -249.4583282470703, + "logps/rejected": -1257.105712890625, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8386322259902954, + "rewards/margins": 4.7194013595581055, + "rewards/rejected": -3.8807692527770996, + "step": 445 + }, + { + "epoch": 0.57, + "learning_rate": 8.401585098645395e-08, + "logits/chosen": -3.2411084175109863, + "logits/rejected": -3.116609573364258, + "logps/chosen": -263.5846862792969, + "logps/rejected": -549.5791015625, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8765121698379517, + "rewards/margins": 3.583491563796997, + "rewards/rejected": -2.706979274749756, + "step": 446 + }, + { + "epoch": 0.57, + "learning_rate": 8.394003727664709e-08, + "logits/chosen": -3.293605327606201, + "logits/rejected": -3.137969970703125, + "logps/chosen": -275.9134826660156, + "logps/rejected": -689.2149047851562, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8251174688339233, + "rewards/margins": 3.58331298828125, + "rewards/rejected": -2.758195400238037, + "step": 447 + }, + { + "epoch": 0.57, + "learning_rate": 8.386407858128706e-08, + "logits/chosen": -3.2034220695495605, + "logits/rejected": -3.1753761768341064, + "logps/chosen": -270.89300537109375, + "logps/rejected": -577.076416015625, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.206414818763733, + "rewards/margins": 3.332437038421631, + "rewards/rejected": -2.1260223388671875, + "step": 448 + }, + { + "epoch": 0.57, + "learning_rate": 8.378797522485532e-08, + "logits/chosen": -3.2648541927337646, + "logits/rejected": -3.135232448577881, + "logps/chosen": -243.2174072265625, + "logps/rejected": -603.101318359375, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7300262451171875, + "rewards/margins": 2.933821201324463, + "rewards/rejected": -2.2037949562072754, + "step": 449 + }, + { + "epoch": 0.57, + "learning_rate": 8.371172753245137e-08, + "logits/chosen": -3.222536563873291, + "logits/rejected": -2.970794200897217, + "logps/chosen": -253.36312866210938, + "logps/rejected": -1204.4268798828125, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.57611083984375, + "rewards/margins": 4.244023323059082, + "rewards/rejected": -3.667912244796753, + "step": 450 + }, + { + "epoch": 0.57, + "learning_rate": 8.363533582979117e-08, + "logits/chosen": -3.167879581451416, + "logits/rejected": -3.018566370010376, + "logps/chosen": -285.8478698730469, + "logps/rejected": -1378.07763671875, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7847213745117188, + "rewards/margins": 4.811253547668457, + "rewards/rejected": -4.026532173156738, + "step": 451 + }, + { + "epoch": 0.58, + "learning_rate": 8.355880044320598e-08, + "logits/chosen": -3.196105480194092, + "logits/rejected": -3.1379575729370117, + "logps/chosen": -286.457275390625, + "logps/rejected": -769.9691162109375, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9460388422012329, + "rewards/margins": 4.262341499328613, + "rewards/rejected": -3.316302537918091, + "step": 452 + }, + { + "epoch": 0.58, + "learning_rate": 8.348212169964075e-08, + "logits/chosen": -3.1923580169677734, + "logits/rejected": -3.119267463684082, + "logps/chosen": -287.0309753417969, + "logps/rejected": -438.1377868652344, + "loss": 0.3306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7889373898506165, + "rewards/margins": 2.7345948219299316, + "rewards/rejected": -1.94565749168396, + "step": 453 + }, + { + "epoch": 0.58, + "learning_rate": 8.340529992665288e-08, + "logits/chosen": -3.2198643684387207, + "logits/rejected": -3.071834087371826, + "logps/chosen": -269.8547668457031, + "logps/rejected": -1442.5081787109375, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8464142084121704, + "rewards/margins": 5.412161350250244, + "rewards/rejected": -4.565747261047363, + "step": 454 + }, + { + "epoch": 0.58, + "learning_rate": 8.332833545241078e-08, + "logits/chosen": -3.204014539718628, + "logits/rejected": -3.027287483215332, + "logps/chosen": -243.5648651123047, + "logps/rejected": -636.8330078125, + "loss": 0.353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8403542041778564, + "rewards/margins": 3.3460776805877686, + "rewards/rejected": -2.505723476409912, + "step": 455 + }, + { + "epoch": 0.58, + "learning_rate": 8.32512286056924e-08, + "logits/chosen": -3.2582318782806396, + "logits/rejected": -3.0865116119384766, + "logps/chosen": -256.03558349609375, + "logps/rejected": -335.97198486328125, + "loss": 0.3139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.977630615234375, + "rewards/margins": 2.2482941150665283, + "rewards/rejected": -1.2706634998321533, + "step": 456 + }, + { + "epoch": 0.58, + "learning_rate": 8.317397971588394e-08, + "logits/chosen": -3.1544711589813232, + "logits/rejected": -3.0553297996520996, + "logps/chosen": -267.984619140625, + "logps/rejected": -4538.857421875, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7690101861953735, + "rewards/margins": 6.857004165649414, + "rewards/rejected": -6.087994575500488, + "step": 457 + }, + { + "epoch": 0.58, + "learning_rate": 8.309658911297832e-08, + "logits/chosen": -3.2238638401031494, + "logits/rejected": -3.0274300575256348, + "logps/chosen": -290.748046875, + "logps/rejected": -953.1544189453125, + "loss": 0.3437, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8593581914901733, + "rewards/margins": 3.9426591396331787, + "rewards/rejected": -3.083300828933716, + "step": 458 + }, + { + "epoch": 0.59, + "learning_rate": 8.301905712757389e-08, + "logits/chosen": -3.2502427101135254, + "logits/rejected": -3.156615734100342, + "logps/chosen": -256.81341552734375, + "logps/rejected": -597.954345703125, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7308555841445923, + "rewards/margins": 2.931398868560791, + "rewards/rejected": -2.200543165206909, + "step": 459 + }, + { + "epoch": 0.59, + "learning_rate": 8.294138409087289e-08, + "logits/chosen": -3.1654038429260254, + "logits/rejected": -3.1063411235809326, + "logps/chosen": -273.8494567871094, + "logps/rejected": -411.19866943359375, + "loss": 0.3502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6654342412948608, + "rewards/margins": 2.2789306640625, + "rewards/rejected": -1.6134964227676392, + "step": 460 + }, + { + "epoch": 0.59, + "learning_rate": 8.286357033468018e-08, + "logits/chosen": -3.239157199859619, + "logits/rejected": -3.122438430786133, + "logps/chosen": -294.9073486328125, + "logps/rejected": -570.1322021484375, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8439834713935852, + "rewards/margins": 3.110246181488037, + "rewards/rejected": -2.2662627696990967, + "step": 461 + }, + { + "epoch": 0.59, + "learning_rate": 8.278561619140171e-08, + "logits/chosen": -3.3047595024108887, + "logits/rejected": -3.2164769172668457, + "logps/chosen": -273.0039978027344, + "logps/rejected": -1108.64794921875, + "loss": 0.3139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9684463739395142, + "rewards/margins": 5.910676956176758, + "rewards/rejected": -4.942230224609375, + "step": 462 + }, + { + "epoch": 0.59, + "learning_rate": 8.270752199404315e-08, + "logits/chosen": -3.2319936752319336, + "logits/rejected": -3.108454465866089, + "logps/chosen": -278.43011474609375, + "logps/rejected": -732.1000366210938, + "loss": 0.3322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9008781313896179, + "rewards/margins": 4.602609634399414, + "rewards/rejected": -3.7017319202423096, + "step": 463 + }, + { + "epoch": 0.59, + "learning_rate": 8.262928807620843e-08, + "logits/chosen": -3.193385124206543, + "logits/rejected": -3.076955556869507, + "logps/chosen": -313.0581970214844, + "logps/rejected": -808.719970703125, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9106689691543579, + "rewards/margins": 3.4831771850585938, + "rewards/rejected": -2.5725083351135254, + "step": 464 + }, + { + "epoch": 0.59, + "learning_rate": 8.255091477209835e-08, + "logits/chosen": -3.21062970161438, + "logits/rejected": -3.128424644470215, + "logps/chosen": -274.0123596191406, + "logps/rejected": -1581.61865234375, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1071442365646362, + "rewards/margins": 6.115814208984375, + "rewards/rejected": -5.008669853210449, + "step": 465 + }, + { + "epoch": 0.59, + "learning_rate": 8.247240241650917e-08, + "logits/chosen": -3.285097122192383, + "logits/rejected": -3.1463565826416016, + "logps/chosen": -247.94085693359375, + "logps/rejected": -473.7142333984375, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7603409290313721, + "rewards/margins": 2.522885322570801, + "rewards/rejected": -1.7625442743301392, + "step": 466 + }, + { + "epoch": 0.6, + "learning_rate": 8.239375134483114e-08, + "logits/chosen": -3.2274584770202637, + "logits/rejected": -3.086172342300415, + "logps/chosen": -260.783203125, + "logps/rejected": -673.7109375, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8220055103302002, + "rewards/margins": 3.541088104248047, + "rewards/rejected": -2.7190825939178467, + "step": 467 + }, + { + "epoch": 0.6, + "learning_rate": 8.231496189304703e-08, + "logits/chosen": -3.2889819145202637, + "logits/rejected": -3.211007595062256, + "logps/chosen": -261.2234191894531, + "logps/rejected": -1084.532958984375, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9469841122627258, + "rewards/margins": 4.951415061950684, + "rewards/rejected": -4.004431247711182, + "step": 468 + }, + { + "epoch": 0.6, + "learning_rate": 8.223603439773083e-08, + "logits/chosen": -3.252103805541992, + "logits/rejected": -3.191023349761963, + "logps/chosen": -293.1502685546875, + "logps/rejected": -700.7526245117188, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9240814447402954, + "rewards/margins": 3.9154510498046875, + "rewards/rejected": -2.9913697242736816, + "step": 469 + }, + { + "epoch": 0.6, + "learning_rate": 8.215696919604617e-08, + "logits/chosen": -3.241913080215454, + "logits/rejected": -3.137037992477417, + "logps/chosen": -273.1730041503906, + "logps/rejected": -520.4462280273438, + "loss": 0.3295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9573044180870056, + "rewards/margins": 3.204525947570801, + "rewards/rejected": -2.2472214698791504, + "step": 470 + }, + { + "epoch": 0.6, + "learning_rate": 8.207776662574493e-08, + "logits/chosen": -3.2164626121520996, + "logits/rejected": -3.1888022422790527, + "logps/chosen": -294.4500732421875, + "logps/rejected": -761.2125244140625, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0259690284729004, + "rewards/margins": 4.041487216949463, + "rewards/rejected": -3.0155181884765625, + "step": 471 + }, + { + "epoch": 0.6, + "learning_rate": 8.199842702516583e-08, + "logits/chosen": -3.2477505207061768, + "logits/rejected": -3.04158878326416, + "logps/chosen": -305.4393310546875, + "logps/rejected": -873.6239013671875, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8159821033477783, + "rewards/margins": 3.7831695079803467, + "rewards/rejected": -2.9671874046325684, + "step": 472 + }, + { + "epoch": 0.6, + "learning_rate": 8.191895073323294e-08, + "logits/chosen": -3.2811999320983887, + "logits/rejected": -3.0701277256011963, + "logps/chosen": -236.37631225585938, + "logps/rejected": -369.90777587890625, + "loss": 0.3217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8392982482910156, + "rewards/margins": 2.0900368690490723, + "rewards/rejected": -1.2507386207580566, + "step": 473 + }, + { + "epoch": 0.6, + "learning_rate": 8.18393380894543e-08, + "logits/chosen": -3.2601847648620605, + "logits/rejected": -3.128382682800293, + "logps/chosen": -282.0128173828125, + "logps/rejected": -1145.208251953125, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9818969964981079, + "rewards/margins": 5.056524753570557, + "rewards/rejected": -4.074627876281738, + "step": 474 + }, + { + "epoch": 0.61, + "learning_rate": 8.175958943392032e-08, + "logits/chosen": -3.209251880645752, + "logits/rejected": -3.058342218399048, + "logps/chosen": -247.351806640625, + "logps/rejected": -584.42236328125, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8321807980537415, + "rewards/margins": 2.7255706787109375, + "rewards/rejected": -1.8933899402618408, + "step": 475 + }, + { + "epoch": 0.61, + "learning_rate": 8.167970510730252e-08, + "logits/chosen": -3.1772780418395996, + "logits/rejected": -3.1269190311431885, + "logps/chosen": -265.93212890625, + "logps/rejected": -311.333251953125, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1616013050079346, + "rewards/margins": 2.2335357666015625, + "rewards/rejected": -1.071934461593628, + "step": 476 + }, + { + "epoch": 0.61, + "learning_rate": 8.159968545085195e-08, + "logits/chosen": -3.2071313858032227, + "logits/rejected": -3.10189151763916, + "logps/chosen": -288.17022705078125, + "logps/rejected": -556.959716796875, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9114974737167358, + "rewards/margins": 3.4832839965820312, + "rewards/rejected": -2.571786642074585, + "step": 477 + }, + { + "epoch": 0.61, + "learning_rate": 8.151953080639775e-08, + "logits/chosen": -3.268465995788574, + "logits/rejected": -3.1517961025238037, + "logps/chosen": -231.3397674560547, + "logps/rejected": -491.50323486328125, + "loss": 0.3365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8494201898574829, + "rewards/margins": 2.885180711746216, + "rewards/rejected": -2.0357606410980225, + "step": 478 + }, + { + "epoch": 0.61, + "learning_rate": 8.143924151634572e-08, + "logits/chosen": -3.270580768585205, + "logits/rejected": -3.102890729904175, + "logps/chosen": -259.47613525390625, + "logps/rejected": -246.3270721435547, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0151214599609375, + "rewards/margins": 1.8615150451660156, + "rewards/rejected": -0.8463935852050781, + "step": 479 + }, + { + "epoch": 0.61, + "learning_rate": 8.135881792367685e-08, + "logits/chosen": -3.2480430603027344, + "logits/rejected": -3.15874981880188, + "logps/chosen": -293.80010986328125, + "logps/rejected": -491.9605712890625, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8417556881904602, + "rewards/margins": 2.6379408836364746, + "rewards/rejected": -1.7961852550506592, + "step": 480 + }, + { + "epoch": 0.61, + "learning_rate": 8.127826037194581e-08, + "logits/chosen": -3.1967482566833496, + "logits/rejected": -3.044292688369751, + "logps/chosen": -274.60052490234375, + "logps/rejected": -867.2493286132812, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6944778561592102, + "rewards/margins": 3.6914825439453125, + "rewards/rejected": -2.997004747390747, + "step": 481 + }, + { + "epoch": 0.61, + "learning_rate": 8.119756920527954e-08, + "logits/chosen": -3.203083038330078, + "logits/rejected": -3.081141948699951, + "logps/chosen": -290.3846435546875, + "logps/rejected": -801.4775390625, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8721588253974915, + "rewards/margins": 3.928524971008301, + "rewards/rejected": -3.056365966796875, + "step": 482 + }, + { + "epoch": 0.62, + "learning_rate": 8.11167447683758e-08, + "logits/chosen": -3.2252793312072754, + "logits/rejected": -3.0807223320007324, + "logps/chosen": -248.05531311035156, + "logps/rejected": -846.721435546875, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9259941577911377, + "rewards/margins": 4.812294960021973, + "rewards/rejected": -3.886300802230835, + "step": 483 + }, + { + "epoch": 0.62, + "learning_rate": 8.103578740650156e-08, + "logits/chosen": -3.305634021759033, + "logits/rejected": -3.166006088256836, + "logps/chosen": -247.18142700195312, + "logps/rejected": -458.03533935546875, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9866645336151123, + "rewards/margins": 2.5511162281036377, + "rewards/rejected": -1.5644516944885254, + "step": 484 + }, + { + "epoch": 0.62, + "learning_rate": 8.09546974654917e-08, + "logits/chosen": -3.2296323776245117, + "logits/rejected": -3.137997627258301, + "logps/chosen": -259.03668212890625, + "logps/rejected": -399.5750732421875, + "loss": 0.329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.785937488079071, + "rewards/margins": 2.2554473876953125, + "rewards/rejected": -1.4695098400115967, + "step": 485 + }, + { + "epoch": 0.62, + "learning_rate": 8.087347529174742e-08, + "logits/chosen": -3.102102041244507, + "logits/rejected": -3.0444111824035645, + "logps/chosen": -279.82513427734375, + "logps/rejected": -462.0140686035156, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7409866452217102, + "rewards/margins": 2.271193027496338, + "rewards/rejected": -1.530206322669983, + "step": 486 + }, + { + "epoch": 0.62, + "learning_rate": 8.079212123223485e-08, + "logits/chosen": -3.169666290283203, + "logits/rejected": -3.0671205520629883, + "logps/chosen": -275.1385192871094, + "logps/rejected": -791.0269775390625, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8326294422149658, + "rewards/margins": 4.057623386383057, + "rewards/rejected": -3.224993944168091, + "step": 487 + }, + { + "epoch": 0.62, + "learning_rate": 8.07106356344834e-08, + "logits/chosen": -3.2315640449523926, + "logits/rejected": -3.186962127685547, + "logps/chosen": -287.77398681640625, + "logps/rejected": -615.0077514648438, + "loss": 0.3307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9552154541015625, + "rewards/margins": 3.293942451477051, + "rewards/rejected": -2.3387269973754883, + "step": 488 + }, + { + "epoch": 0.62, + "learning_rate": 8.062901884658447e-08, + "logits/chosen": -3.2639987468719482, + "logits/rejected": -3.1851909160614014, + "logps/chosen": -267.9469299316406, + "logps/rejected": -1045.946533203125, + "loss": 0.3434, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.075709581375122, + "rewards/margins": 5.401869297027588, + "rewards/rejected": -4.326159954071045, + "step": 489 + }, + { + "epoch": 0.62, + "learning_rate": 8.054727121718987e-08, + "logits/chosen": -3.2084200382232666, + "logits/rejected": -2.933948516845703, + "logps/chosen": -299.1805114746094, + "logps/rejected": -1513.9356689453125, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0042266845703125, + "rewards/margins": 5.984256267547607, + "rewards/rejected": -4.980029582977295, + "step": 490 + }, + { + "epoch": 0.63, + "learning_rate": 8.046539309551035e-08, + "logits/chosen": -3.253354787826538, + "logits/rejected": -3.167513847351074, + "logps/chosen": -250.2384796142578, + "logps/rejected": -458.48211669921875, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.091555118560791, + "rewards/margins": 2.934211015701294, + "rewards/rejected": -1.842655897140503, + "step": 491 + }, + { + "epoch": 0.63, + "learning_rate": 8.038338483131406e-08, + "logits/chosen": -3.269975423812866, + "logits/rejected": -3.0754189491271973, + "logps/chosen": -270.8665466308594, + "logps/rejected": -718.1097412109375, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9355705976486206, + "rewards/margins": 4.564894676208496, + "rewards/rejected": -3.629324436187744, + "step": 492 + }, + { + "epoch": 0.63, + "learning_rate": 8.030124677492512e-08, + "logits/chosen": -3.230534553527832, + "logits/rejected": -3.133211851119995, + "logps/chosen": -256.1295166015625, + "logps/rejected": -747.050537109375, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9535477161407471, + "rewards/margins": 4.078138828277588, + "rewards/rejected": -3.124591112136841, + "step": 493 + }, + { + "epoch": 0.63, + "learning_rate": 8.021897927722208e-08, + "logits/chosen": -3.249177932739258, + "logits/rejected": -3.1046226024627686, + "logps/chosen": -242.00831604003906, + "logps/rejected": -414.6741943359375, + "loss": 0.318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8704559803009033, + "rewards/margins": 2.4647645950317383, + "rewards/rejected": -1.5943084955215454, + "step": 494 + }, + { + "epoch": 0.63, + "learning_rate": 8.013658268963648e-08, + "logits/chosen": -3.285334825515747, + "logits/rejected": -3.1017045974731445, + "logps/chosen": -252.1298065185547, + "logps/rejected": -468.2322692871094, + "loss": 0.3246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8938530683517456, + "rewards/margins": 2.6449639797210693, + "rewards/rejected": -1.7511109113693237, + "step": 495 + }, + { + "epoch": 0.63, + "learning_rate": 8.005405736415125e-08, + "logits/chosen": -3.177626132965088, + "logits/rejected": -3.0918681621551514, + "logps/chosen": -311.8412170410156, + "logps/rejected": -456.81072998046875, + "loss": 0.3249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9600433707237244, + "rewards/margins": 2.432858467102051, + "rewards/rejected": -1.4728150367736816, + "step": 496 + }, + { + "epoch": 0.63, + "learning_rate": 7.997140365329933e-08, + "logits/chosen": -3.2233431339263916, + "logits/rejected": -3.0488181114196777, + "logps/chosen": -245.9797821044922, + "logps/rejected": -983.743408203125, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9913345575332642, + "rewards/margins": 4.792069911956787, + "rewards/rejected": -3.8007354736328125, + "step": 497 + }, + { + "epoch": 0.63, + "learning_rate": 7.988862191016203e-08, + "logits/chosen": -3.1880879402160645, + "logits/rejected": -3.0477395057678223, + "logps/chosen": -286.5135498046875, + "logps/rejected": -1163.4364013671875, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9509323239326477, + "rewards/margins": 5.073513507843018, + "rewards/rejected": -4.122581481933594, + "step": 498 + }, + { + "epoch": 0.64, + "learning_rate": 7.980571248836767e-08, + "logits/chosen": -3.2033843994140625, + "logits/rejected": -3.062028408050537, + "logps/chosen": -295.5080261230469, + "logps/rejected": -347.59735107421875, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.935192883014679, + "rewards/margins": 2.4147400856018066, + "rewards/rejected": -1.479547142982483, + "step": 499 + }, + { + "epoch": 0.64, + "learning_rate": 7.97226757420899e-08, + "logits/chosen": -3.2122724056243896, + "logits/rejected": -3.0662803649902344, + "logps/chosen": -256.3802185058594, + "logps/rejected": -875.139892578125, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9409592151641846, + "rewards/margins": 3.972665548324585, + "rewards/rejected": -3.0317063331604004, + "step": 500 + }, + { + "epoch": 0.64, + "learning_rate": 7.963951202604632e-08, + "logits/chosen": -3.17441725730896, + "logits/rejected": -3.032059669494629, + "logps/chosen": -290.2332458496094, + "logps/rejected": -708.6346435546875, + "loss": 0.3223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8342208862304688, + "rewards/margins": 3.0124192237854004, + "rewards/rejected": -2.1781983375549316, + "step": 501 + }, + { + "epoch": 0.64, + "learning_rate": 7.955622169549696e-08, + "logits/chosen": -3.2692856788635254, + "logits/rejected": -3.126535654067993, + "logps/chosen": -287.5715637207031, + "logps/rejected": -629.563720703125, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9720200300216675, + "rewards/margins": 3.402387857437134, + "rewards/rejected": -2.430367946624756, + "step": 502 + }, + { + "epoch": 0.64, + "learning_rate": 7.947280510624267e-08, + "logits/chosen": -3.2869300842285156, + "logits/rejected": -3.112861156463623, + "logps/chosen": -258.0238037109375, + "logps/rejected": -564.7628784179688, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8686996698379517, + "rewards/margins": 3.3582839965820312, + "rewards/rejected": -2.489584445953369, + "step": 503 + }, + { + "epoch": 0.64, + "learning_rate": 7.938926261462366e-08, + "logits/chosen": -3.2854108810424805, + "logits/rejected": -3.130479335784912, + "logps/chosen": -284.789794921875, + "logps/rejected": -521.0792236328125, + "loss": 0.3565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7680023312568665, + "rewards/margins": 2.812640428543091, + "rewards/rejected": -2.044638156890869, + "step": 504 + }, + { + "epoch": 0.64, + "learning_rate": 7.930559457751797e-08, + "logits/chosen": -3.1595396995544434, + "logits/rejected": -3.147921085357666, + "logps/chosen": -339.26385498046875, + "logps/rejected": -807.2064208984375, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8873291015625, + "rewards/margins": 4.492535591125488, + "rewards/rejected": -3.6052064895629883, + "step": 505 + }, + { + "epoch": 0.64, + "learning_rate": 7.922180135233999e-08, + "logits/chosen": -3.202681064605713, + "logits/rejected": -3.144829750061035, + "logps/chosen": -265.39520263671875, + "logps/rejected": -417.9725646972656, + "loss": 0.3525, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.923352062702179, + "rewards/margins": 2.1943695545196533, + "rewards/rejected": -1.2710175514221191, + "step": 506 + }, + { + "epoch": 0.65, + "learning_rate": 7.913788329703883e-08, + "logits/chosen": -3.2522664070129395, + "logits/rejected": -3.1345367431640625, + "logps/chosen": -266.73651123046875, + "logps/rejected": -837.6697998046875, + "loss": 0.3151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8037551641464233, + "rewards/margins": 3.883000135421753, + "rewards/rejected": -3.079245090484619, + "step": 507 + }, + { + "epoch": 0.65, + "learning_rate": 7.905384077009691e-08, + "logits/chosen": -3.195955514907837, + "logits/rejected": -2.9255895614624023, + "logps/chosen": -292.22125244140625, + "logps/rejected": -1653.4771728515625, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8337119817733765, + "rewards/margins": 6.201058387756348, + "rewards/rejected": -5.367346286773682, + "step": 508 + }, + { + "epoch": 0.65, + "learning_rate": 7.896967413052831e-08, + "logits/chosen": -3.214755058288574, + "logits/rejected": -3.0654759407043457, + "logps/chosen": -263.3607177734375, + "logps/rejected": -394.0655212402344, + "loss": 0.3308, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9713028073310852, + "rewards/margins": 2.222695827484131, + "rewards/rejected": -1.2513930797576904, + "step": 509 + }, + { + "epoch": 0.65, + "learning_rate": 7.888538373787734e-08, + "logits/chosen": -3.262011766433716, + "logits/rejected": -3.1478731632232666, + "logps/chosen": -252.99986267089844, + "logps/rejected": -507.463134765625, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.921093761920929, + "rewards/margins": 3.0025742053985596, + "rewards/rejected": -2.0814805030822754, + "step": 510 + }, + { + "epoch": 0.65, + "learning_rate": 7.880096995221694e-08, + "logits/chosen": -3.262287139892578, + "logits/rejected": -3.066721200942993, + "logps/chosen": -267.89013671875, + "logps/rejected": -1701.785400390625, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2037795782089233, + "rewards/margins": 7.255384922027588, + "rewards/rejected": -6.051605224609375, + "step": 511 + }, + { + "epoch": 0.65, + "learning_rate": 7.871643313414717e-08, + "logits/chosen": -3.2283334732055664, + "logits/rejected": -3.0314745903015137, + "logps/chosen": -254.7135009765625, + "logps/rejected": -348.5784912109375, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8020065426826477, + "rewards/margins": 2.0088088512420654, + "rewards/rejected": -1.2068023681640625, + "step": 512 + }, + { + "epoch": 0.65, + "learning_rate": 7.863177364479368e-08, + "logits/chosen": -3.287590980529785, + "logits/rejected": -3.1082167625427246, + "logps/chosen": -249.97314453125, + "logps/rejected": -1140.659423828125, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0283730030059814, + "rewards/margins": 4.728116989135742, + "rewards/rejected": -3.6997437477111816, + "step": 513 + }, + { + "epoch": 0.66, + "learning_rate": 7.854699184580609e-08, + "logits/chosen": -3.2080893516540527, + "logits/rejected": -3.07490873336792, + "logps/chosen": -276.00494384765625, + "logps/rejected": -616.3365478515625, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8010879755020142, + "rewards/margins": 3.564196825027466, + "rewards/rejected": -2.763108730316162, + "step": 514 + }, + { + "epoch": 0.66, + "learning_rate": 7.846208809935657e-08, + "logits/chosen": -3.2451539039611816, + "logits/rejected": -3.1256484985351562, + "logps/chosen": -305.7849426269531, + "logps/rejected": -1551.864501953125, + "loss": 0.3157, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8533821105957031, + "rewards/margins": 6.63797664642334, + "rewards/rejected": -5.784594535827637, + "step": 515 + }, + { + "epoch": 0.66, + "learning_rate": 7.837706276813818e-08, + "logits/chosen": -3.2355170249938965, + "logits/rejected": -3.139461040496826, + "logps/chosen": -276.65191650390625, + "logps/rejected": -745.6324462890625, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7643287777900696, + "rewards/margins": 3.7776222229003906, + "rewards/rejected": -3.013293504714966, + "step": 516 + }, + { + "epoch": 0.66, + "learning_rate": 7.829191621536342e-08, + "logits/chosen": -3.239579916000366, + "logits/rejected": -3.0733895301818848, + "logps/chosen": -275.88330078125, + "logps/rejected": -903.9667358398438, + "loss": 0.3648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8528610467910767, + "rewards/margins": 4.1765031814575195, + "rewards/rejected": -3.3236420154571533, + "step": 517 + }, + { + "epoch": 0.66, + "learning_rate": 7.820664880476255e-08, + "logits/chosen": -3.2847442626953125, + "logits/rejected": -3.188959836959839, + "logps/chosen": -243.78622436523438, + "logps/rejected": -834.29736328125, + "loss": 0.2655, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9403396844863892, + "rewards/margins": 4.5684099197387695, + "rewards/rejected": -3.628070116043091, + "step": 518 + }, + { + "epoch": 0.66, + "learning_rate": 7.812126090058219e-08, + "logits/chosen": -3.247957229614258, + "logits/rejected": -3.1304707527160645, + "logps/chosen": -282.64630126953125, + "logps/rejected": -806.189453125, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8832901120185852, + "rewards/margins": 4.2848615646362305, + "rewards/rejected": -3.401571750640869, + "step": 519 + }, + { + "epoch": 0.66, + "learning_rate": 7.803575286758363e-08, + "logits/chosen": -3.2145047187805176, + "logits/rejected": -3.037783145904541, + "logps/chosen": -286.93316650390625, + "logps/rejected": -583.8309326171875, + "loss": 0.3286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8776290416717529, + "rewards/margins": 3.027740478515625, + "rewards/rejected": -2.150111436843872, + "step": 520 + }, + { + "epoch": 0.66, + "learning_rate": 7.795012507104138e-08, + "logits/chosen": -3.256953239440918, + "logits/rejected": -3.012065887451172, + "logps/chosen": -306.9139404296875, + "logps/rejected": -1149.723876953125, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6737213134765625, + "rewards/margins": 4.532641410827637, + "rewards/rejected": -3.858920097351074, + "step": 521 + }, + { + "epoch": 0.67, + "learning_rate": 7.786437787674148e-08, + "logits/chosen": -3.2733168601989746, + "logits/rejected": -3.120448112487793, + "logps/chosen": -239.06793212890625, + "logps/rejected": -434.75396728515625, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2086349725723267, + "rewards/margins": 3.079336643218994, + "rewards/rejected": -1.870701551437378, + "step": 522 + }, + { + "epoch": 0.67, + "learning_rate": 7.77785116509801e-08, + "logits/chosen": -3.2625668048858643, + "logits/rejected": -3.1292028427124023, + "logps/chosen": -267.37451171875, + "logps/rejected": -728.0638427734375, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8828476071357727, + "rewards/margins": 3.6070876121520996, + "rewards/rejected": -2.724240303039551, + "step": 523 + }, + { + "epoch": 0.67, + "learning_rate": 7.769252676056186e-08, + "logits/chosen": -3.234973430633545, + "logits/rejected": -3.181455612182617, + "logps/chosen": -259.1497802734375, + "logps/rejected": -623.27392578125, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9709548950195312, + "rewards/margins": 3.7027177810668945, + "rewards/rejected": -2.731762647628784, + "step": 524 + }, + { + "epoch": 0.67, + "learning_rate": 7.760642357279825e-08, + "logits/chosen": -3.3221044540405273, + "logits/rejected": -3.204723358154297, + "logps/chosen": -264.5735778808594, + "logps/rejected": -664.690185546875, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0730102062225342, + "rewards/margins": 4.3547821044921875, + "rewards/rejected": -3.2817718982696533, + "step": 525 + }, + { + "epoch": 0.67, + "learning_rate": 7.752020245550617e-08, + "logits/chosen": -3.224801778793335, + "logits/rejected": -3.1399426460266113, + "logps/chosen": -278.9126281738281, + "logps/rejected": -680.254638671875, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9002716541290283, + "rewards/margins": 3.909503221511841, + "rewards/rejected": -3.0092315673828125, + "step": 526 + }, + { + "epoch": 0.67, + "learning_rate": 7.743386377700624e-08, + "logits/chosen": -3.230410575866699, + "logits/rejected": -3.116095542907715, + "logps/chosen": -253.297607421875, + "logps/rejected": -793.6036987304688, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9630340337753296, + "rewards/margins": 4.092770576477051, + "rewards/rejected": -3.1297364234924316, + "step": 527 + }, + { + "epoch": 0.67, + "learning_rate": 7.734740790612135e-08, + "logits/chosen": -3.223742961883545, + "logits/rejected": -2.9614508152008057, + "logps/chosen": -249.21334838867188, + "logps/rejected": -590.9315185546875, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0051895380020142, + "rewards/margins": 3.152940273284912, + "rewards/rejected": -2.1477508544921875, + "step": 528 + }, + { + "epoch": 0.67, + "learning_rate": 7.72608352121749e-08, + "logits/chosen": -3.2182412147521973, + "logits/rejected": -3.107832908630371, + "logps/chosen": -290.255615234375, + "logps/rejected": -371.3076171875, + "loss": 0.3362, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8200042843818665, + "rewards/margins": 2.224134922027588, + "rewards/rejected": -1.4041305780410767, + "step": 529 + }, + { + "epoch": 0.68, + "learning_rate": 7.717414606498946e-08, + "logits/chosen": -3.2205896377563477, + "logits/rejected": -3.0913562774658203, + "logps/chosen": -240.5312957763672, + "logps/rejected": -351.2569885253906, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8355430364608765, + "rewards/margins": 2.493551731109619, + "rewards/rejected": -1.6580085754394531, + "step": 530 + }, + { + "epoch": 0.68, + "learning_rate": 7.7087340834885e-08, + "logits/chosen": -3.176675796508789, + "logits/rejected": -3.017183780670166, + "logps/chosen": -281.84832763671875, + "logps/rejected": -361.05255126953125, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8578758239746094, + "rewards/margins": 2.004880428314209, + "rewards/rejected": -1.1470047235488892, + "step": 531 + }, + { + "epoch": 0.68, + "learning_rate": 7.700041989267735e-08, + "logits/chosen": -3.213366985321045, + "logits/rejected": -3.1395394802093506, + "logps/chosen": -278.75750732421875, + "logps/rejected": -657.2445068359375, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0544830560684204, + "rewards/margins": 3.957211494445801, + "rewards/rejected": -2.902728319168091, + "step": 532 + }, + { + "epoch": 0.68, + "learning_rate": 7.691338360967672e-08, + "logits/chosen": -3.2496109008789062, + "logits/rejected": -3.138824462890625, + "logps/chosen": -283.82757568359375, + "logps/rejected": -955.5682373046875, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0083786249160767, + "rewards/margins": 4.567265510559082, + "rewards/rejected": -3.558886766433716, + "step": 533 + }, + { + "epoch": 0.68, + "learning_rate": 7.682623235768597e-08, + "logits/chosen": -3.2388906478881836, + "logits/rejected": -3.1142959594726562, + "logps/chosen": -275.38861083984375, + "logps/rejected": -745.701171875, + "loss": 0.3365, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8948440551757812, + "rewards/margins": 4.276542663574219, + "rewards/rejected": -3.3816986083984375, + "step": 534 + }, + { + "epoch": 0.68, + "learning_rate": 7.673896650899911e-08, + "logits/chosen": -3.226477861404419, + "logits/rejected": -3.1375856399536133, + "logps/chosen": -254.49365234375, + "logps/rejected": -758.1466064453125, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9639724493026733, + "rewards/margins": 4.456911087036133, + "rewards/rejected": -3.492938280105591, + "step": 535 + }, + { + "epoch": 0.68, + "learning_rate": 7.665158643639968e-08, + "logits/chosen": -3.2455337047576904, + "logits/rejected": -3.2158942222595215, + "logps/chosen": -232.1070556640625, + "logps/rejected": -515.5385131835938, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8571532964706421, + "rewards/margins": 3.4924514293670654, + "rewards/rejected": -2.635298252105713, + "step": 536 + }, + { + "epoch": 0.68, + "learning_rate": 7.656409251315919e-08, + "logits/chosen": -3.283883571624756, + "logits/rejected": -3.1694986820220947, + "logps/chosen": -245.04071044921875, + "logps/rejected": -1066.221923828125, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8421295881271362, + "rewards/margins": 5.6372222900390625, + "rewards/rejected": -4.795092582702637, + "step": 537 + }, + { + "epoch": 0.69, + "learning_rate": 7.647648511303544e-08, + "logits/chosen": -3.206747055053711, + "logits/rejected": -3.0920236110687256, + "logps/chosen": -255.77711486816406, + "logps/rejected": -309.36444091796875, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9793335199356079, + "rewards/margins": 2.116485595703125, + "rewards/rejected": -1.137152075767517, + "step": 538 + }, + { + "epoch": 0.69, + "learning_rate": 7.638876461027104e-08, + "logits/chosen": -3.2299280166625977, + "logits/rejected": -3.133495807647705, + "logps/chosen": -264.3487854003906, + "logps/rejected": -637.8765258789062, + "loss": 0.3024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.873291015625, + "rewards/margins": 3.58868408203125, + "rewards/rejected": -2.71539306640625, + "step": 539 + }, + { + "epoch": 0.69, + "learning_rate": 7.63009313795917e-08, + "logits/chosen": -3.2871601581573486, + "logits/rejected": -3.2158679962158203, + "logps/chosen": -259.71417236328125, + "logps/rejected": -628.6317749023438, + "loss": 0.3205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7782173156738281, + "rewards/margins": 3.605863094329834, + "rewards/rejected": -2.827645778656006, + "step": 540 + }, + { + "epoch": 0.69, + "learning_rate": 7.621298579620477e-08, + "logits/chosen": -3.288991928100586, + "logits/rejected": -3.159212112426758, + "logps/chosen": -261.38177490234375, + "logps/rejected": -484.53778076171875, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0078362226486206, + "rewards/margins": 2.878875255584717, + "rewards/rejected": -1.8710389137268066, + "step": 541 + }, + { + "epoch": 0.69, + "learning_rate": 7.612492823579743e-08, + "logits/chosen": -3.239182472229004, + "logits/rejected": -3.038348913192749, + "logps/chosen": -249.31329345703125, + "logps/rejected": -446.888671875, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.023870825767517, + "rewards/margins": 2.784407138824463, + "rewards/rejected": -1.7605363130569458, + "step": 542 + }, + { + "epoch": 0.69, + "learning_rate": 7.603675907453533e-08, + "logits/chosen": -3.278347969055176, + "logits/rejected": -3.0552518367767334, + "logps/chosen": -239.4309844970703, + "logps/rejected": -478.0412902832031, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.108465552330017, + "rewards/margins": 2.827536106109619, + "rewards/rejected": -1.7190704345703125, + "step": 543 + }, + { + "epoch": 0.69, + "learning_rate": 7.594847868906076e-08, + "logits/chosen": -3.2673611640930176, + "logits/rejected": -3.161482334136963, + "logps/chosen": -250.8300018310547, + "logps/rejected": -595.26806640625, + "loss": 0.2848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0041427612304688, + "rewards/margins": 3.8504714965820312, + "rewards/rejected": -2.8463287353515625, + "step": 544 + }, + { + "epoch": 0.69, + "learning_rate": 7.586008745649118e-08, + "logits/chosen": -3.19612193107605, + "logits/rejected": -3.116224765777588, + "logps/chosen": -277.82025146484375, + "logps/rejected": -758.5462646484375, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.930310070514679, + "rewards/margins": 4.18342924118042, + "rewards/rejected": -3.2531189918518066, + "step": 545 + }, + { + "epoch": 0.7, + "learning_rate": 7.577158575441756e-08, + "logits/chosen": -3.2588765621185303, + "logits/rejected": -3.064006805419922, + "logps/chosen": -263.23321533203125, + "logps/rejected": -587.2333984375, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0284607410430908, + "rewards/margins": 3.5324065685272217, + "rewards/rejected": -2.503945827484131, + "step": 546 + }, + { + "epoch": 0.7, + "learning_rate": 7.56829739609028e-08, + "logits/chosen": -3.259798288345337, + "logits/rejected": -3.18719482421875, + "logps/chosen": -280.9007568359375, + "logps/rejected": -4309.5244140625, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9346923828125, + "rewards/margins": 4.1327056884765625, + "rewards/rejected": -3.1980133056640625, + "step": 547 + }, + { + "epoch": 0.7, + "learning_rate": 7.559425245448005e-08, + "logits/chosen": -3.1783008575439453, + "logits/rejected": -3.059762477874756, + "logps/chosen": -254.94435119628906, + "logps/rejected": -454.454345703125, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1022682189941406, + "rewards/margins": 3.0397775173187256, + "rewards/rejected": -1.937509298324585, + "step": 548 + }, + { + "epoch": 0.7, + "learning_rate": 7.550542161415117e-08, + "logits/chosen": -3.2539758682250977, + "logits/rejected": -3.0788626670837402, + "logps/chosen": -271.46820068359375, + "logps/rejected": -1112.750732421875, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8723891973495483, + "rewards/margins": 5.6296234130859375, + "rewards/rejected": -4.7572340965271, + "step": 549 + }, + { + "epoch": 0.7, + "learning_rate": 7.541648181938503e-08, + "logits/chosen": -3.239428997039795, + "logits/rejected": -3.178981304168701, + "logps/chosen": -266.95220947265625, + "logps/rejected": -773.0162353515625, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9986381530761719, + "rewards/margins": 4.601447582244873, + "rewards/rejected": -3.602809429168701, + "step": 550 + }, + { + "epoch": 0.7, + "learning_rate": 7.5327433450116e-08, + "logits/chosen": -3.2342119216918945, + "logits/rejected": -3.1784262657165527, + "logps/chosen": -262.12017822265625, + "logps/rejected": -1438.175537109375, + "loss": 0.3168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.826741099357605, + "rewards/margins": 6.397596836090088, + "rewards/rejected": -5.570856094360352, + "step": 551 + }, + { + "epoch": 0.7, + "learning_rate": 7.523827688674219e-08, + "logits/chosen": -3.2129855155944824, + "logits/rejected": -3.13611102104187, + "logps/chosen": -245.77462768554688, + "logps/rejected": -328.15576171875, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8962639570236206, + "rewards/margins": 2.572795867919922, + "rewards/rejected": -1.6765320301055908, + "step": 552 + }, + { + "epoch": 0.7, + "learning_rate": 7.514901251012394e-08, + "logits/chosen": -3.2307188510894775, + "logits/rejected": -3.119929313659668, + "logps/chosen": -254.70489501953125, + "logps/rejected": -583.8771362304688, + "loss": 0.316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9599281549453735, + "rewards/margins": 3.712261199951172, + "rewards/rejected": -2.752333164215088, + "step": 553 + }, + { + "epoch": 0.71, + "learning_rate": 7.505964070158213e-08, + "logits/chosen": -3.2871689796447754, + "logits/rejected": -3.0932729244232178, + "logps/chosen": -270.1034851074219, + "logps/rejected": -1310.4599609375, + "loss": 0.3342, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2587639093399048, + "rewards/margins": 6.464178085327148, + "rewards/rejected": -5.205413818359375, + "step": 554 + }, + { + "epoch": 0.71, + "learning_rate": 7.497016184289655e-08, + "logits/chosen": -3.2409658432006836, + "logits/rejected": -3.1526379585266113, + "logps/chosen": -299.20428466796875, + "logps/rejected": -503.13861083984375, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5971634387969971, + "rewards/margins": 2.8685646057128906, + "rewards/rejected": -2.2714011669158936, + "step": 555 + }, + { + "epoch": 0.71, + "learning_rate": 7.488057631630437e-08, + "logits/chosen": -3.2440457344055176, + "logits/rejected": -3.058347702026367, + "logps/chosen": -317.0685119628906, + "logps/rejected": -549.3489990234375, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9542999267578125, + "rewards/margins": 3.0746474266052246, + "rewards/rejected": -2.120347499847412, + "step": 556 + }, + { + "epoch": 0.71, + "learning_rate": 7.47908845044983e-08, + "logits/chosen": -3.2667548656463623, + "logits/rejected": -3.1356656551361084, + "logps/chosen": -258.975830078125, + "logps/rejected": -553.007568359375, + "loss": 0.329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9200332760810852, + "rewards/margins": 3.3022384643554688, + "rewards/rejected": -2.3822052478790283, + "step": 557 + }, + { + "epoch": 0.71, + "learning_rate": 7.47010867906252e-08, + "logits/chosen": -3.2383198738098145, + "logits/rejected": -3.109431028366089, + "logps/chosen": -292.2282409667969, + "logps/rejected": -657.6223754882812, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7256149053573608, + "rewards/margins": 3.901085138320923, + "rewards/rejected": -3.1754701137542725, + "step": 558 + }, + { + "epoch": 0.71, + "learning_rate": 7.461118355828424e-08, + "logits/chosen": -3.244302749633789, + "logits/rejected": -3.135364294052124, + "logps/chosen": -244.8573455810547, + "logps/rejected": -935.609130859375, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8264442682266235, + "rewards/margins": 4.255320072174072, + "rewards/rejected": -3.4288759231567383, + "step": 559 + }, + { + "epoch": 0.71, + "learning_rate": 7.452117519152541e-08, + "logits/chosen": -3.2582993507385254, + "logits/rejected": -3.0757462978363037, + "logps/chosen": -271.85418701171875, + "logps/rejected": -382.9568786621094, + "loss": 0.3285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9241706728935242, + "rewards/margins": 2.665747880935669, + "rewards/rejected": -1.7415771484375, + "step": 560 + }, + { + "epoch": 0.72, + "learning_rate": 7.443106207484775e-08, + "logits/chosen": -3.2471673488616943, + "logits/rejected": -3.166207790374756, + "logps/chosen": -267.0723876953125, + "logps/rejected": -655.58203125, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8119583129882812, + "rewards/margins": 4.501498222351074, + "rewards/rejected": -3.689539909362793, + "step": 561 + }, + { + "epoch": 0.72, + "learning_rate": 7.434084459319781e-08, + "logits/chosen": -3.244582176208496, + "logits/rejected": -3.1418118476867676, + "logps/chosen": -272.6268310546875, + "logps/rejected": -685.0859985351562, + "loss": 0.3225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8363860845565796, + "rewards/margins": 3.95896315574646, + "rewards/rejected": -3.122576951980591, + "step": 562 + }, + { + "epoch": 0.72, + "learning_rate": 7.425052313196798e-08, + "logits/chosen": -3.261486053466797, + "logits/rejected": -3.1395087242126465, + "logps/chosen": -285.8348083496094, + "logps/rejected": -849.5155029296875, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8778716921806335, + "rewards/margins": 4.803436279296875, + "rewards/rejected": -3.925564765930176, + "step": 563 + }, + { + "epoch": 0.72, + "learning_rate": 7.41600980769948e-08, + "logits/chosen": -3.226691484451294, + "logits/rejected": -3.1995387077331543, + "logps/chosen": -289.5733642578125, + "logps/rejected": -846.9449462890625, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8912338614463806, + "rewards/margins": 4.8650665283203125, + "rewards/rejected": -3.973832845687866, + "step": 564 + }, + { + "epoch": 0.72, + "learning_rate": 7.406956981455736e-08, + "logits/chosen": -3.2394824028015137, + "logits/rejected": -3.071074962615967, + "logps/chosen": -223.1629638671875, + "logps/rejected": -381.79962158203125, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0198570489883423, + "rewards/margins": 2.4093804359436035, + "rewards/rejected": -1.3895232677459717, + "step": 565 + }, + { + "epoch": 0.72, + "learning_rate": 7.397893873137563e-08, + "logits/chosen": -3.220445156097412, + "logits/rejected": -3.1116747856140137, + "logps/chosen": -259.60736083984375, + "logps/rejected": -1365.7388916015625, + "loss": 0.288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9649902582168579, + "rewards/margins": 6.447527885437012, + "rewards/rejected": -5.482538223266602, + "step": 566 + }, + { + "epoch": 0.72, + "learning_rate": 7.38882052146088e-08, + "logits/chosen": -3.272402286529541, + "logits/rejected": -3.0550789833068848, + "logps/chosen": -250.29086303710938, + "logps/rejected": -1127.1846923828125, + "loss": 0.3067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9509223699569702, + "rewards/margins": 5.884130001068115, + "rewards/rejected": -4.9332075119018555, + "step": 567 + }, + { + "epoch": 0.72, + "learning_rate": 7.379736965185368e-08, + "logits/chosen": -3.1742987632751465, + "logits/rejected": -3.206357002258301, + "logps/chosen": -320.2076721191406, + "logps/rejected": -802.1028442382812, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7085403203964233, + "rewards/margins": 4.7036638259887695, + "rewards/rejected": -3.9951233863830566, + "step": 568 + }, + { + "epoch": 0.73, + "learning_rate": 7.370643243114294e-08, + "logits/chosen": -3.2539849281311035, + "logits/rejected": -3.120335340499878, + "logps/chosen": -269.009765625, + "logps/rejected": -562.8438720703125, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9987289309501648, + "rewards/margins": 3.6984682083129883, + "rewards/rejected": -2.6997392177581787, + "step": 569 + }, + { + "epoch": 0.73, + "learning_rate": 7.361539394094355e-08, + "logits/chosen": -3.229133367538452, + "logits/rejected": -3.108680486679077, + "logps/chosen": -225.53549194335938, + "logps/rejected": -404.02520751953125, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8450263738632202, + "rewards/margins": 2.3409385681152344, + "rewards/rejected": -1.4959121942520142, + "step": 570 + }, + { + "epoch": 0.73, + "learning_rate": 7.352425457015508e-08, + "logits/chosen": -3.180424213409424, + "logits/rejected": -3.1646041870117188, + "logps/chosen": -273.06597900390625, + "logps/rejected": -799.8272705078125, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1086068153381348, + "rewards/margins": 4.988165855407715, + "rewards/rejected": -3.8795595169067383, + "step": 571 + }, + { + "epoch": 0.73, + "learning_rate": 7.343301470810807e-08, + "logits/chosen": -3.222696304321289, + "logits/rejected": -3.0970664024353027, + "logps/chosen": -255.37274169921875, + "logps/rejected": -3740.154296875, + "loss": 0.2702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8663269281387329, + "rewards/margins": 6.693624973297119, + "rewards/rejected": -5.827298164367676, + "step": 572 + }, + { + "epoch": 0.73, + "learning_rate": 7.334167474456227e-08, + "logits/chosen": -3.2171292304992676, + "logits/rejected": -3.1634645462036133, + "logps/chosen": -244.93177795410156, + "logps/rejected": -796.8452758789062, + "loss": 0.2867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9363784790039062, + "rewards/margins": 4.7151594161987305, + "rewards/rejected": -3.7787811756134033, + "step": 573 + }, + { + "epoch": 0.73, + "learning_rate": 7.325023506970511e-08, + "logits/chosen": -3.205413818359375, + "logits/rejected": -3.0805439949035645, + "logps/chosen": -269.09320068359375, + "logps/rejected": -1290.1744384765625, + "loss": 0.3287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1033439636230469, + "rewards/margins": 6.440108299255371, + "rewards/rejected": -5.336764335632324, + "step": 574 + }, + { + "epoch": 0.73, + "learning_rate": 7.315869607414992e-08, + "logits/chosen": -3.223808526992798, + "logits/rejected": -3.134195327758789, + "logps/chosen": -295.7995910644531, + "logps/rejected": -697.267333984375, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8658767938613892, + "rewards/margins": 3.5332136154174805, + "rewards/rejected": -2.667336940765381, + "step": 575 + }, + { + "epoch": 0.73, + "learning_rate": 7.306705814893439e-08, + "logits/chosen": -3.18438458442688, + "logits/rejected": -3.205685615539551, + "logps/chosen": -261.6709899902344, + "logps/rejected": -891.722900390625, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7725235223770142, + "rewards/margins": 5.4653825759887695, + "rewards/rejected": -4.692858695983887, + "step": 576 + }, + { + "epoch": 0.74, + "learning_rate": 7.297532168551871e-08, + "logits/chosen": -3.2606468200683594, + "logits/rejected": -3.076374053955078, + "logps/chosen": -261.97589111328125, + "logps/rejected": -586.0733642578125, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7658416628837585, + "rewards/margins": 3.4239258766174316, + "rewards/rejected": -2.6580841541290283, + "step": 577 + }, + { + "epoch": 0.74, + "learning_rate": 7.288348707578408e-08, + "logits/chosen": -3.287463665008545, + "logits/rejected": -3.1117029190063477, + "logps/chosen": -287.5086975097656, + "logps/rejected": -730.4693603515625, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9130302667617798, + "rewards/margins": 4.72672176361084, + "rewards/rejected": -3.8136916160583496, + "step": 578 + }, + { + "epoch": 0.74, + "learning_rate": 7.279155471203095e-08, + "logits/chosen": -3.2053678035736084, + "logits/rejected": -3.1365084648132324, + "logps/chosen": -246.35557556152344, + "logps/rejected": -645.244140625, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5647087097167969, + "rewards/margins": 3.900554656982422, + "rewards/rejected": -3.335845947265625, + "step": 579 + }, + { + "epoch": 0.74, + "learning_rate": 7.269952498697734e-08, + "logits/chosen": -3.213503360748291, + "logits/rejected": -3.047102451324463, + "logps/chosen": -229.6963348388672, + "logps/rejected": -824.8508911132812, + "loss": 0.2569, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0614440441131592, + "rewards/margins": 4.47882080078125, + "rewards/rejected": -3.4173765182495117, + "step": 580 + }, + { + "epoch": 0.74, + "learning_rate": 7.260739829375719e-08, + "logits/chosen": -3.2178454399108887, + "logits/rejected": -3.172919273376465, + "logps/chosen": -250.12709045410156, + "logps/rejected": -696.6937255859375, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8512458801269531, + "rewards/margins": 4.6778483390808105, + "rewards/rejected": -3.8266024589538574, + "step": 581 + }, + { + "epoch": 0.74, + "learning_rate": 7.251517502591869e-08, + "logits/chosen": -3.286540985107422, + "logits/rejected": -3.1085662841796875, + "logps/chosen": -270.69293212890625, + "logps/rejected": -979.2383422851562, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0621674060821533, + "rewards/margins": 5.0521087646484375, + "rewards/rejected": -3.9899415969848633, + "step": 582 + }, + { + "epoch": 0.74, + "learning_rate": 7.242285557742254e-08, + "logits/chosen": -3.2587881088256836, + "logits/rejected": -3.179022789001465, + "logps/chosen": -240.91409301757812, + "logps/rejected": -695.6497802734375, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7323830127716064, + "rewards/margins": 4.471442222595215, + "rewards/rejected": -3.7390594482421875, + "step": 583 + }, + { + "epoch": 0.74, + "learning_rate": 7.233044034264033e-08, + "logits/chosen": -3.224533796310425, + "logits/rejected": -3.1273012161254883, + "logps/chosen": -310.4874267578125, + "logps/rejected": -626.1445922851562, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0259751081466675, + "rewards/margins": 3.7854599952697754, + "rewards/rejected": -2.7594850063323975, + "step": 584 + }, + { + "epoch": 0.75, + "learning_rate": 7.223792971635282e-08, + "logits/chosen": -3.200220823287964, + "logits/rejected": -3.062971591949463, + "logps/chosen": -236.58901977539062, + "logps/rejected": -541.1466064453125, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9527702331542969, + "rewards/margins": 3.126537322998047, + "rewards/rejected": -2.17376708984375, + "step": 585 + }, + { + "epoch": 0.75, + "learning_rate": 7.214532409374828e-08, + "logits/chosen": -3.214277982711792, + "logits/rejected": -3.218873977661133, + "logps/chosen": -257.7005920410156, + "logps/rejected": -980.273681640625, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0260215997695923, + "rewards/margins": 6.05336856842041, + "rewards/rejected": -5.027346611022949, + "step": 586 + }, + { + "epoch": 0.75, + "learning_rate": 7.205262387042078e-08, + "logits/chosen": -3.2160706520080566, + "logits/rejected": -3.1762681007385254, + "logps/chosen": -274.85791015625, + "logps/rejected": -693.7561645507812, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7792274951934814, + "rewards/margins": 3.4999260902404785, + "rewards/rejected": -2.720698595046997, + "step": 587 + }, + { + "epoch": 0.75, + "learning_rate": 7.195982944236851e-08, + "logits/chosen": -3.280170440673828, + "logits/rejected": -3.187361478805542, + "logps/chosen": -262.7057189941406, + "logps/rejected": -854.3911743164062, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8415611982345581, + "rewards/margins": 4.764021873474121, + "rewards/rejected": -3.9224610328674316, + "step": 588 + }, + { + "epoch": 0.75, + "learning_rate": 7.186694120599208e-08, + "logits/chosen": -3.239792823791504, + "logits/rejected": -3.0875296592712402, + "logps/chosen": -298.57275390625, + "logps/rejected": -351.06402587890625, + "loss": 0.3313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9123398065567017, + "rewards/margins": 2.076491594314575, + "rewards/rejected": -1.1641517877578735, + "step": 589 + }, + { + "epoch": 0.75, + "learning_rate": 7.17739595580928e-08, + "logits/chosen": -3.265353202819824, + "logits/rejected": -3.079379081726074, + "logps/chosen": -261.0533752441406, + "logps/rejected": -655.3694458007812, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.995015025138855, + "rewards/margins": 3.8897385597229004, + "rewards/rejected": -2.894723415374756, + "step": 590 + }, + { + "epoch": 0.75, + "learning_rate": 7.16808848958711e-08, + "logits/chosen": -3.2736306190490723, + "logits/rejected": -3.1220710277557373, + "logps/chosen": -305.14874267578125, + "logps/rejected": -534.415771484375, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1659424304962158, + "rewards/margins": 3.5129776000976562, + "rewards/rejected": -2.3470351696014404, + "step": 591 + }, + { + "epoch": 0.75, + "learning_rate": 7.158771761692464e-08, + "logits/chosen": -3.2637410163879395, + "logits/rejected": -3.125340223312378, + "logps/chosen": -258.11962890625, + "logps/rejected": -864.3178100585938, + "loss": 0.279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7761329412460327, + "rewards/margins": 4.767758846282959, + "rewards/rejected": -3.991626024246216, + "step": 592 + }, + { + "epoch": 0.76, + "learning_rate": 7.149445811924683e-08, + "logits/chosen": -3.2110092639923096, + "logits/rejected": -3.0344882011413574, + "logps/chosen": -258.7489929199219, + "logps/rejected": -510.6515197753906, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0471770763397217, + "rewards/margins": 2.9412474632263184, + "rewards/rejected": -1.8940703868865967, + "step": 593 + }, + { + "epoch": 0.76, + "learning_rate": 7.140110680122495e-08, + "logits/chosen": -3.2582991123199463, + "logits/rejected": -3.1372828483581543, + "logps/chosen": -262.79132080078125, + "logps/rejected": -629.2630615234375, + "loss": 0.3202, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.03734290599823, + "rewards/margins": 4.251989841461182, + "rewards/rejected": -3.214647054672241, + "step": 594 + }, + { + "epoch": 0.76, + "learning_rate": 7.130766406163855e-08, + "logits/chosen": -3.1890523433685303, + "logits/rejected": -3.1476643085479736, + "logps/chosen": -253.9185791015625, + "logps/rejected": -447.8146057128906, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6388603448867798, + "rewards/margins": 2.8200767040252686, + "rewards/rejected": -2.1812164783477783, + "step": 595 + }, + { + "epoch": 0.76, + "learning_rate": 7.121413029965768e-08, + "logits/chosen": -3.3165512084960938, + "logits/rejected": -3.1387786865234375, + "logps/chosen": -224.49395751953125, + "logps/rejected": -328.468505859375, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9167922735214233, + "rewards/margins": 2.3120408058166504, + "rewards/rejected": -1.3952484130859375, + "step": 596 + }, + { + "epoch": 0.76, + "learning_rate": 7.112050591484129e-08, + "logits/chosen": -3.261037826538086, + "logits/rejected": -2.964733123779297, + "logps/chosen": -267.63055419921875, + "logps/rejected": -1564.934326171875, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1881431341171265, + "rewards/margins": 6.636519432067871, + "rewards/rejected": -5.448376655578613, + "step": 597 + }, + { + "epoch": 0.76, + "learning_rate": 7.102679130713537e-08, + "logits/chosen": -3.223822832107544, + "logits/rejected": -3.1310384273529053, + "logps/chosen": -243.99282836914062, + "logps/rejected": -982.2159423828125, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9472618103027344, + "rewards/margins": 5.428786754608154, + "rewards/rejected": -4.48152494430542, + "step": 598 + }, + { + "epoch": 0.76, + "learning_rate": 7.093298687687141e-08, + "logits/chosen": -3.2493557929992676, + "logits/rejected": -3.1215977668762207, + "logps/chosen": -284.9884948730469, + "logps/rejected": -442.31103515625, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0769813060760498, + "rewards/margins": 2.8159494400024414, + "rewards/rejected": -1.7389678955078125, + "step": 599 + }, + { + "epoch": 0.76, + "learning_rate": 7.083909302476451e-08, + "logits/chosen": -3.1779065132141113, + "logits/rejected": -3.098649024963379, + "logps/chosen": -236.6397705078125, + "logps/rejected": -406.64422607421875, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.174471378326416, + "rewards/margins": 2.7964425086975098, + "rewards/rejected": -1.6219711303710938, + "step": 600 + }, + { + "epoch": 0.77, + "learning_rate": 7.074511015191187e-08, + "logits/chosen": -3.2432847023010254, + "logits/rejected": -3.113938331604004, + "logps/chosen": -241.98699951171875, + "logps/rejected": -723.487060546875, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0948158502578735, + "rewards/margins": 4.600130558013916, + "rewards/rejected": -3.505314826965332, + "step": 601 + }, + { + "epoch": 0.77, + "learning_rate": 7.065103865979087e-08, + "logits/chosen": -3.266310214996338, + "logits/rejected": -3.1745550632476807, + "logps/chosen": -240.62008666992188, + "logps/rejected": -1061.3353271484375, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7976447939872742, + "rewards/margins": 6.200362205505371, + "rewards/rejected": -5.402717590332031, + "step": 602 + }, + { + "epoch": 0.77, + "learning_rate": 7.055687895025755e-08, + "logits/chosen": -3.1722707748413086, + "logits/rejected": -3.1752963066101074, + "logps/chosen": -266.89453125, + "logps/rejected": -630.9573974609375, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.073034644126892, + "rewards/margins": 4.180633544921875, + "rewards/rejected": -3.1075987815856934, + "step": 603 + }, + { + "epoch": 0.77, + "learning_rate": 7.046263142554469e-08, + "logits/chosen": -3.2284371852874756, + "logits/rejected": -3.092564105987549, + "logps/chosen": -295.15374755859375, + "logps/rejected": -535.1867065429688, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9328933954238892, + "rewards/margins": 3.586744785308838, + "rewards/rejected": -2.6538515090942383, + "step": 604 + }, + { + "epoch": 0.77, + "learning_rate": 7.036829648826032e-08, + "logits/chosen": -3.2096123695373535, + "logits/rejected": -3.1269803047180176, + "logps/chosen": -285.2486572265625, + "logps/rejected": -786.1339721679688, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2445160150527954, + "rewards/margins": 4.520504951477051, + "rewards/rejected": -3.275988817214966, + "step": 605 + }, + { + "epoch": 0.77, + "learning_rate": 7.027387454138578e-08, + "logits/chosen": -3.3013057708740234, + "logits/rejected": -3.158067464828491, + "logps/chosen": -263.56011962890625, + "logps/rejected": -312.0362548828125, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9222580194473267, + "rewards/margins": 2.0930771827697754, + "rewards/rejected": -1.1708190441131592, + "step": 606 + }, + { + "epoch": 0.77, + "learning_rate": 7.017936598827415e-08, + "logits/chosen": -3.236349105834961, + "logits/rejected": -3.132878303527832, + "logps/chosen": -287.46337890625, + "logps/rejected": -865.60302734375, + "loss": 0.3197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1919952630996704, + "rewards/margins": 5.109402656555176, + "rewards/rejected": -3.917407512664795, + "step": 607 + }, + { + "epoch": 0.78, + "learning_rate": 7.008477123264848e-08, + "logits/chosen": -3.195064067840576, + "logits/rejected": -2.998166561126709, + "logps/chosen": -265.8143310546875, + "logps/rejected": -1197.5416259765625, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0611282587051392, + "rewards/margins": 5.51237678527832, + "rewards/rejected": -4.4512481689453125, + "step": 608 + }, + { + "epoch": 0.78, + "learning_rate": 6.999009067860001e-08, + "logits/chosen": -3.277787208557129, + "logits/rejected": -3.1659440994262695, + "logps/chosen": -281.638916015625, + "logps/rejected": -646.485595703125, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.006556749343872, + "rewards/margins": 3.185063123703003, + "rewards/rejected": -2.17850661277771, + "step": 609 + }, + { + "epoch": 0.78, + "learning_rate": 6.989532473058657e-08, + "logits/chosen": -3.2436485290527344, + "logits/rejected": -3.0960240364074707, + "logps/chosen": -296.03448486328125, + "logps/rejected": -1084.05224609375, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9947860836982727, + "rewards/margins": 5.115156650543213, + "rewards/rejected": -4.120370864868164, + "step": 610 + }, + { + "epoch": 0.78, + "learning_rate": 6.98004737934307e-08, + "logits/chosen": -3.20511531829834, + "logits/rejected": -3.0409116744995117, + "logps/chosen": -278.213623046875, + "logps/rejected": -1632.8734130859375, + "loss": 0.3182, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1178252696990967, + "rewards/margins": 8.354697227478027, + "rewards/rejected": -7.236871719360352, + "step": 611 + }, + { + "epoch": 0.78, + "learning_rate": 6.970553827231808e-08, + "logits/chosen": -3.2272939682006836, + "logits/rejected": -3.0440545082092285, + "logps/chosen": -259.5163879394531, + "logps/rejected": -322.5784606933594, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0726699829101562, + "rewards/margins": 2.1513123512268066, + "rewards/rejected": -1.0786423683166504, + "step": 612 + }, + { + "epoch": 0.78, + "learning_rate": 6.961051857279564e-08, + "logits/chosen": -3.216578483581543, + "logits/rejected": -3.0863046646118164, + "logps/chosen": -280.2833557128906, + "logps/rejected": -496.58868408203125, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.97314453125, + "rewards/margins": 3.357011556625366, + "rewards/rejected": -2.383866786956787, + "step": 613 + }, + { + "epoch": 0.78, + "learning_rate": 6.951541510076994e-08, + "logits/chosen": -3.139549970626831, + "logits/rejected": -3.0547337532043457, + "logps/chosen": -317.1626892089844, + "logps/rejected": -725.947265625, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9915282726287842, + "rewards/margins": 4.023150444030762, + "rewards/rejected": -3.0316224098205566, + "step": 614 + }, + { + "epoch": 0.78, + "learning_rate": 6.942022826250543e-08, + "logits/chosen": -3.269258975982666, + "logits/rejected": -3.094508171081543, + "logps/chosen": -271.33221435546875, + "logps/rejected": -464.17791748046875, + "loss": 0.3099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0615631341934204, + "rewards/margins": 3.013458251953125, + "rewards/rejected": -1.9518951177597046, + "step": 615 + }, + { + "epoch": 0.79, + "learning_rate": 6.932495846462261e-08, + "logits/chosen": -3.225459575653076, + "logits/rejected": -3.050647497177124, + "logps/chosen": -233.33091735839844, + "logps/rejected": -343.45263671875, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9184738397598267, + "rewards/margins": 2.471754550933838, + "rewards/rejected": -1.5532805919647217, + "step": 616 + }, + { + "epoch": 0.79, + "learning_rate": 6.922960611409643e-08, + "logits/chosen": -3.228908061981201, + "logits/rejected": -3.0634069442749023, + "logps/chosen": -275.214599609375, + "logps/rejected": -1331.5845947265625, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.115410566329956, + "rewards/margins": 6.295782089233398, + "rewards/rejected": -5.180371284484863, + "step": 617 + }, + { + "epoch": 0.79, + "learning_rate": 6.913417161825448e-08, + "logits/chosen": -3.2381019592285156, + "logits/rejected": -3.088548183441162, + "logps/chosen": -266.2345886230469, + "logps/rejected": -1328.414794921875, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9860031604766846, + "rewards/margins": 6.029878616333008, + "rewards/rejected": -5.043875217437744, + "step": 618 + }, + { + "epoch": 0.79, + "learning_rate": 6.903865538477526e-08, + "logits/chosen": -3.2728798389434814, + "logits/rejected": -3.1775293350219727, + "logps/chosen": -285.69500732421875, + "logps/rejected": -401.2793273925781, + "loss": 0.3423, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1208794116973877, + "rewards/margins": 3.3015360832214355, + "rewards/rejected": -2.1806564331054688, + "step": 619 + }, + { + "epoch": 0.79, + "learning_rate": 6.894305782168638e-08, + "logits/chosen": -3.224571704864502, + "logits/rejected": -3.1327457427978516, + "logps/chosen": -283.7574768066406, + "logps/rejected": -818.4495239257812, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8474701046943665, + "rewards/margins": 5.032431125640869, + "rewards/rejected": -4.184960842132568, + "step": 620 + }, + { + "epoch": 0.79, + "learning_rate": 6.884737933736297e-08, + "logits/chosen": -3.3256936073303223, + "logits/rejected": -3.236123561859131, + "logps/chosen": -264.15972900390625, + "logps/rejected": -594.8782958984375, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.057489037513733, + "rewards/margins": 4.481839179992676, + "rewards/rejected": -3.4243500232696533, + "step": 621 + }, + { + "epoch": 0.79, + "learning_rate": 6.875162034052578e-08, + "logits/chosen": -3.251702308654785, + "logits/rejected": -3.22036075592041, + "logps/chosen": -273.0546875, + "logps/rejected": -755.664794921875, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9843490123748779, + "rewards/margins": 4.297715663909912, + "rewards/rejected": -3.3133668899536133, + "step": 622 + }, + { + "epoch": 0.79, + "learning_rate": 6.865578124023951e-08, + "logits/chosen": -3.2713143825531006, + "logits/rejected": -3.2024383544921875, + "logps/chosen": -249.31451416015625, + "logps/rejected": -1101.035888671875, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8694428205490112, + "rewards/margins": 5.779019355773926, + "rewards/rejected": -4.909576416015625, + "step": 623 + }, + { + "epoch": 0.8, + "learning_rate": 6.855986244591103e-08, + "logits/chosen": -3.2321414947509766, + "logits/rejected": -3.1150474548339844, + "logps/chosen": -292.8340148925781, + "logps/rejected": -542.078369140625, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0546127557754517, + "rewards/margins": 3.5861618518829346, + "rewards/rejected": -2.5315492153167725, + "step": 624 + }, + { + "epoch": 0.8, + "learning_rate": 6.846386436728771e-08, + "logits/chosen": -3.240878105163574, + "logits/rejected": -3.080671787261963, + "logps/chosen": -245.03878784179688, + "logps/rejected": -396.94207763671875, + "loss": 0.3397, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1157280206680298, + "rewards/margins": 2.6267471313476562, + "rewards/rejected": -1.511019229888916, + "step": 625 + }, + { + "epoch": 0.8, + "learning_rate": 6.836778741445549e-08, + "logits/chosen": -3.1966052055358887, + "logits/rejected": -3.1615121364593506, + "logps/chosen": -266.1714782714844, + "logps/rejected": -692.537841796875, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0822906494140625, + "rewards/margins": 4.427861213684082, + "rewards/rejected": -3.3455705642700195, + "step": 626 + }, + { + "epoch": 0.8, + "learning_rate": 6.827163199783739e-08, + "logits/chosen": -3.1559183597564697, + "logits/rejected": -3.199906349182129, + "logps/chosen": -281.5062561035156, + "logps/rejected": -783.4947509765625, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2268959283828735, + "rewards/margins": 4.66637659072876, + "rewards/rejected": -3.439480781555176, + "step": 627 + }, + { + "epoch": 0.8, + "learning_rate": 6.817539852819148e-08, + "logits/chosen": -3.265791416168213, + "logits/rejected": -3.1110944747924805, + "logps/chosen": -312.884765625, + "logps/rejected": -910.9104614257812, + "loss": 0.3403, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.275538682937622, + "rewards/margins": 4.988102912902832, + "rewards/rejected": -3.712563991546631, + "step": 628 + }, + { + "epoch": 0.8, + "learning_rate": 6.807908741660938e-08, + "logits/chosen": -3.2958054542541504, + "logits/rejected": -3.187185287475586, + "logps/chosen": -263.21173095703125, + "logps/rejected": -578.08203125, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.781585693359375, + "rewards/margins": 3.4872331619262695, + "rewards/rejected": -2.7056474685668945, + "step": 629 + }, + { + "epoch": 0.8, + "learning_rate": 6.798269907451427e-08, + "logits/chosen": -3.2476069927215576, + "logits/rejected": -3.1101737022399902, + "logps/chosen": -257.3448486328125, + "logps/rejected": -346.5738830566406, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.031672716140747, + "rewards/margins": 2.364201545715332, + "rewards/rejected": -1.3325287103652954, + "step": 630 + }, + { + "epoch": 0.8, + "learning_rate": 6.788623391365932e-08, + "logits/chosen": -3.266244411468506, + "logits/rejected": -3.1044979095458984, + "logps/chosen": -329.3221435546875, + "logps/rejected": -505.4480895996094, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0845839977264404, + "rewards/margins": 2.9441146850585938, + "rewards/rejected": -1.8595306873321533, + "step": 631 + }, + { + "epoch": 0.81, + "learning_rate": 6.778969234612582e-08, + "logits/chosen": -3.2298102378845215, + "logits/rejected": -3.0549821853637695, + "logps/chosen": -293.5399475097656, + "logps/rejected": -571.260498046875, + "loss": 0.3694, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.200524091720581, + "rewards/margins": 3.830416202545166, + "rewards/rejected": -2.629891872406006, + "step": 632 + }, + { + "epoch": 0.81, + "learning_rate": 6.769307478432149e-08, + "logits/chosen": -3.235677719116211, + "logits/rejected": -3.1012868881225586, + "logps/chosen": -261.2768859863281, + "logps/rejected": -861.76318359375, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.982733964920044, + "rewards/margins": 5.362702369689941, + "rewards/rejected": -4.379968643188477, + "step": 633 + }, + { + "epoch": 0.81, + "learning_rate": 6.759638164097861e-08, + "logits/chosen": -3.209372043609619, + "logits/rejected": -3.0525801181793213, + "logps/chosen": -254.92202758789062, + "logps/rejected": -293.87884521484375, + "loss": 0.3247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1337075233459473, + "rewards/margins": 2.264665365219116, + "rewards/rejected": -1.130957841873169, + "step": 634 + }, + { + "epoch": 0.81, + "learning_rate": 6.749961332915241e-08, + "logits/chosen": -3.2575650215148926, + "logits/rejected": -3.1438727378845215, + "logps/chosen": -320.60498046875, + "logps/rejected": -812.3399658203125, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0762649774551392, + "rewards/margins": 4.965822219848633, + "rewards/rejected": -3.889556884765625, + "step": 635 + }, + { + "epoch": 0.81, + "learning_rate": 6.740277026221922e-08, + "logits/chosen": -3.204127311706543, + "logits/rejected": -3.1459975242614746, + "logps/chosen": -284.52960205078125, + "logps/rejected": -709.6529541015625, + "loss": 0.3576, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9799057245254517, + "rewards/margins": 4.42161750793457, + "rewards/rejected": -3.44171142578125, + "step": 636 + }, + { + "epoch": 0.81, + "learning_rate": 6.730585285387464e-08, + "logits/chosen": -3.250396966934204, + "logits/rejected": -3.1122241020202637, + "logps/chosen": -251.3563690185547, + "logps/rejected": -609.86376953125, + "loss": 0.3213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8787628412246704, + "rewards/margins": 4.0643463134765625, + "rewards/rejected": -3.1855835914611816, + "step": 637 + }, + { + "epoch": 0.81, + "learning_rate": 6.720886151813194e-08, + "logits/chosen": -3.2234911918640137, + "logits/rejected": -3.11915922164917, + "logps/chosen": -264.0916442871094, + "logps/rejected": -776.652587890625, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.147718906402588, + "rewards/margins": 4.977932929992676, + "rewards/rejected": -3.830214023590088, + "step": 638 + }, + { + "epoch": 0.81, + "learning_rate": 6.711179666932007e-08, + "logits/chosen": -3.234419584274292, + "logits/rejected": -3.120140552520752, + "logps/chosen": -267.8446350097656, + "logps/rejected": -496.87353515625, + "loss": 0.332, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0048507452011108, + "rewards/margins": 3.0367140769958496, + "rewards/rejected": -2.0318634510040283, + "step": 639 + }, + { + "epoch": 0.82, + "learning_rate": 6.701465872208215e-08, + "logits/chosen": -3.281862735748291, + "logits/rejected": -3.1928014755249023, + "logps/chosen": -245.78506469726562, + "logps/rejected": -810.86474609375, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.10369873046875, + "rewards/margins": 6.153329849243164, + "rewards/rejected": -5.049631118774414, + "step": 640 + }, + { + "epoch": 0.82, + "learning_rate": 6.691744809137347e-08, + "logits/chosen": -3.2574939727783203, + "logits/rejected": -3.083463668823242, + "logps/chosen": -258.482421875, + "logps/rejected": -381.8408508300781, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0227097272872925, + "rewards/margins": 2.8199875354766846, + "rewards/rejected": -1.7972779273986816, + "step": 641 + }, + { + "epoch": 0.82, + "learning_rate": 6.682016519245985e-08, + "logits/chosen": -3.2101845741271973, + "logits/rejected": -3.100653648376465, + "logps/chosen": -257.865478515625, + "logps/rejected": -1081.8653564453125, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.983141303062439, + "rewards/margins": 5.697175979614258, + "rewards/rejected": -4.7140350341796875, + "step": 642 + }, + { + "epoch": 0.82, + "learning_rate": 6.672281044091582e-08, + "logits/chosen": -3.1955673694610596, + "logits/rejected": -3.0994720458984375, + "logps/chosen": -268.1143493652344, + "logps/rejected": -543.6380615234375, + "loss": 0.2878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1773948669433594, + "rewards/margins": 3.691765785217285, + "rewards/rejected": -2.5143706798553467, + "step": 643 + }, + { + "epoch": 0.82, + "learning_rate": 6.662538425262284e-08, + "logits/chosen": -3.2392971515655518, + "logits/rejected": -3.104764938354492, + "logps/chosen": -289.302001953125, + "logps/rejected": -622.265625, + "loss": 0.3243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8296486139297485, + "rewards/margins": 3.782756805419922, + "rewards/rejected": -2.953108310699463, + "step": 644 + }, + { + "epoch": 0.82, + "learning_rate": 6.652788704376753e-08, + "logits/chosen": -3.2066736221313477, + "logits/rejected": -3.0650393962860107, + "logps/chosen": -238.0009002685547, + "logps/rejected": -903.4182739257812, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.018640160560608, + "rewards/margins": 4.849260330200195, + "rewards/rejected": -3.8306198120117188, + "step": 645 + }, + { + "epoch": 0.82, + "learning_rate": 6.643031923083994e-08, + "logits/chosen": -3.2809929847717285, + "logits/rejected": -3.133486747741699, + "logps/chosen": -274.993896484375, + "logps/rejected": -1851.325927734375, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9754173159599304, + "rewards/margins": 9.153264999389648, + "rewards/rejected": -8.177847862243652, + "step": 646 + }, + { + "epoch": 0.82, + "learning_rate": 6.633268123063168e-08, + "logits/chosen": -3.173837184906006, + "logits/rejected": -3.1701903343200684, + "logps/chosen": -268.07293701171875, + "logps/rejected": -884.62841796875, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8989151120185852, + "rewards/margins": 5.166470527648926, + "rewards/rejected": -4.267555236816406, + "step": 647 + }, + { + "epoch": 0.83, + "learning_rate": 6.623497346023418e-08, + "logits/chosen": -3.297623634338379, + "logits/rejected": -3.1436362266540527, + "logps/chosen": -261.3677978515625, + "logps/rejected": -1137.9376220703125, + "loss": 0.3207, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0334625244140625, + "rewards/margins": 6.033010959625244, + "rewards/rejected": -4.999548435211182, + "step": 648 + }, + { + "epoch": 0.83, + "learning_rate": 6.613719633703697e-08, + "logits/chosen": -3.2448368072509766, + "logits/rejected": -3.123293876647949, + "logps/chosen": -263.0838317871094, + "logps/rejected": -796.3887939453125, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0263817310333252, + "rewards/margins": 5.158782005310059, + "rewards/rejected": -4.1324005126953125, + "step": 649 + }, + { + "epoch": 0.83, + "learning_rate": 6.603935027872579e-08, + "logits/chosen": -3.233506202697754, + "logits/rejected": -3.169320583343506, + "logps/chosen": -276.4696960449219, + "logps/rejected": -956.7083740234375, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0019172430038452, + "rewards/margins": 5.420722007751465, + "rewards/rejected": -4.418805122375488, + "step": 650 + }, + { + "epoch": 0.83, + "learning_rate": 6.59414357032809e-08, + "logits/chosen": -3.2513160705566406, + "logits/rejected": -3.052734851837158, + "logps/chosen": -277.35211181640625, + "logps/rejected": -961.7281494140625, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0931724309921265, + "rewards/margins": 4.262937545776367, + "rewards/rejected": -3.169764995574951, + "step": 651 + }, + { + "epoch": 0.83, + "learning_rate": 6.584345302897522e-08, + "logits/chosen": -3.3227427005767822, + "logits/rejected": -3.1273107528686523, + "logps/chosen": -254.2098846435547, + "logps/rejected": -1062.238525390625, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1418274641036987, + "rewards/margins": 6.091583251953125, + "rewards/rejected": -4.949755668640137, + "step": 652 + }, + { + "epoch": 0.83, + "learning_rate": 6.574540267437259e-08, + "logits/chosen": -3.221045970916748, + "logits/rejected": -3.076141357421875, + "logps/chosen": -251.0912628173828, + "logps/rejected": -1332.3421630859375, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.137017846107483, + "rewards/margins": 6.007678508758545, + "rewards/rejected": -4.870660781860352, + "step": 653 + }, + { + "epoch": 0.83, + "learning_rate": 6.564728505832595e-08, + "logits/chosen": -3.24699330329895, + "logits/rejected": -3.1446967124938965, + "logps/chosen": -264.85028076171875, + "logps/rejected": -394.4859619140625, + "loss": 0.3104, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.024906873703003, + "rewards/margins": 3.0863983631134033, + "rewards/rejected": -2.0614914894104004, + "step": 654 + }, + { + "epoch": 0.83, + "learning_rate": 6.554910059997561e-08, + "logits/chosen": -3.235657215118408, + "logits/rejected": -3.071475028991699, + "logps/chosen": -282.7298278808594, + "logps/rejected": -611.5126953125, + "loss": 0.3271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9654037952423096, + "rewards/margins": 3.5037002563476562, + "rewards/rejected": -2.538296699523926, + "step": 655 + }, + { + "epoch": 0.84, + "learning_rate": 6.545084971874738e-08, + "logits/chosen": -3.211343765258789, + "logits/rejected": -3.1153154373168945, + "logps/chosen": -256.5293273925781, + "logps/rejected": -444.79156494140625, + "loss": 0.2888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9512420892715454, + "rewards/margins": 3.078991651535034, + "rewards/rejected": -2.1277496814727783, + "step": 656 + }, + { + "epoch": 0.84, + "learning_rate": 6.535253283435081e-08, + "logits/chosen": -3.2546184062957764, + "logits/rejected": -3.147806167602539, + "logps/chosen": -273.85943603515625, + "logps/rejected": -626.3049926757812, + "loss": 0.3237, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1051177978515625, + "rewards/margins": 4.198187828063965, + "rewards/rejected": -3.093069553375244, + "step": 657 + }, + { + "epoch": 0.84, + "learning_rate": 6.525415036677744e-08, + "logits/chosen": -3.202986001968384, + "logits/rejected": -3.089249849319458, + "logps/chosen": -239.606201171875, + "logps/rejected": -388.4881896972656, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1094681024551392, + "rewards/margins": 2.6606476306915283, + "rewards/rejected": -1.5511795282363892, + "step": 658 + }, + { + "epoch": 0.84, + "learning_rate": 6.515570273629896e-08, + "logits/chosen": -3.215503454208374, + "logits/rejected": -3.1276748180389404, + "logps/chosen": -243.036865234375, + "logps/rejected": -535.5747680664062, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9516884088516235, + "rewards/margins": 3.326282501220703, + "rewards/rejected": -2.374594211578369, + "step": 659 + }, + { + "epoch": 0.84, + "learning_rate": 6.505719036346537e-08, + "logits/chosen": -3.2353830337524414, + "logits/rejected": -3.153951644897461, + "logps/chosen": -272.19952392578125, + "logps/rejected": -823.4688110351562, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9637176990509033, + "rewards/margins": 5.243499755859375, + "rewards/rejected": -4.279782295227051, + "step": 660 + }, + { + "epoch": 0.84, + "learning_rate": 6.495861366910333e-08, + "logits/chosen": -3.2646002769470215, + "logits/rejected": -3.093869686126709, + "logps/chosen": -270.0985107421875, + "logps/rejected": -1111.5059814453125, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0263808965682983, + "rewards/margins": 5.181684970855713, + "rewards/rejected": -4.155303955078125, + "step": 661 + }, + { + "epoch": 0.84, + "learning_rate": 6.485997307431419e-08, + "logits/chosen": -3.2543797492980957, + "logits/rejected": -3.1336770057678223, + "logps/chosen": -256.4206237792969, + "logps/rejected": -339.0733337402344, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9556075930595398, + "rewards/margins": 2.2658286094665527, + "rewards/rejected": -1.3102210760116577, + "step": 662 + }, + { + "epoch": 0.85, + "learning_rate": 6.476126900047233e-08, + "logits/chosen": -3.1921422481536865, + "logits/rejected": -3.102653980255127, + "logps/chosen": -281.4469299316406, + "logps/rejected": -826.4521484375, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8775001764297485, + "rewards/margins": 5.613724708557129, + "rewards/rejected": -4.736224174499512, + "step": 663 + }, + { + "epoch": 0.85, + "learning_rate": 6.466250186922324e-08, + "logits/chosen": -3.268350601196289, + "logits/rejected": -3.162301540374756, + "logps/chosen": -278.08245849609375, + "logps/rejected": -831.8665161132812, + "loss": 0.3092, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2207717895507812, + "rewards/margins": 5.409049987792969, + "rewards/rejected": -4.1882781982421875, + "step": 664 + }, + { + "epoch": 0.85, + "learning_rate": 6.456367210248184e-08, + "logits/chosen": -3.2314090728759766, + "logits/rejected": -3.107562780380249, + "logps/chosen": -311.00018310546875, + "logps/rejected": -856.9881591796875, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.223555088043213, + "rewards/margins": 4.686534404754639, + "rewards/rejected": -3.462979316711426, + "step": 665 + }, + { + "epoch": 0.85, + "learning_rate": 6.446478012243055e-08, + "logits/chosen": -3.2630128860473633, + "logits/rejected": -3.1518702507019043, + "logps/chosen": -292.18408203125, + "logps/rejected": -722.9266357421875, + "loss": 0.3137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7320435047149658, + "rewards/margins": 4.458279609680176, + "rewards/rejected": -3.726235866546631, + "step": 666 + }, + { + "epoch": 0.85, + "learning_rate": 6.436582635151764e-08, + "logits/chosen": -3.2661538124084473, + "logits/rejected": -3.081404685974121, + "logps/chosen": -283.285888671875, + "logps/rejected": -539.0272216796875, + "loss": 0.323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8649413585662842, + "rewards/margins": 3.3580124378204346, + "rewards/rejected": -2.4930710792541504, + "step": 667 + }, + { + "epoch": 0.85, + "learning_rate": 6.426681121245526e-08, + "logits/chosen": -3.2931222915649414, + "logits/rejected": -3.0767946243286133, + "logps/chosen": -215.32333374023438, + "logps/rejected": -633.4215087890625, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2442436218261719, + "rewards/margins": 3.7142693996429443, + "rewards/rejected": -2.4700257778167725, + "step": 668 + }, + { + "epoch": 0.85, + "learning_rate": 6.416773512821777e-08, + "logits/chosen": -3.27756404876709, + "logits/rejected": -3.086406707763672, + "logps/chosen": -285.71649169921875, + "logps/rejected": -1007.381103515625, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9482269287109375, + "rewards/margins": 5.486135959625244, + "rewards/rejected": -4.537909030914307, + "step": 669 + }, + { + "epoch": 0.85, + "learning_rate": 6.406859852203981e-08, + "logits/chosen": -3.244642972946167, + "logits/rejected": -3.1691455841064453, + "logps/chosen": -247.45516967773438, + "logps/rejected": -673.4642944335938, + "loss": 0.3164, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9521156549453735, + "rewards/margins": 3.8862831592559814, + "rewards/rejected": -2.9341673851013184, + "step": 670 + }, + { + "epoch": 0.86, + "learning_rate": 6.396940181741466e-08, + "logits/chosen": -3.2002780437469482, + "logits/rejected": -3.093531370162964, + "logps/chosen": -301.4239196777344, + "logps/rejected": -508.07379150390625, + "loss": 0.3257, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0783157348632812, + "rewards/margins": 3.1952805519104004, + "rewards/rejected": -2.116964817047119, + "step": 671 + }, + { + "epoch": 0.86, + "learning_rate": 6.387014543809223e-08, + "logits/chosen": -3.206188201904297, + "logits/rejected": -3.1167521476745605, + "logps/chosen": -237.4105682373047, + "logps/rejected": -269.06524658203125, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.075382947921753, + "rewards/margins": 2.1166718006134033, + "rewards/rejected": -1.0412888526916504, + "step": 672 + }, + { + "epoch": 0.86, + "learning_rate": 6.37708298080774e-08, + "logits/chosen": -3.224548578262329, + "logits/rejected": -3.1462645530700684, + "logps/chosen": -254.38003540039062, + "logps/rejected": -485.0670471191406, + "loss": 0.3164, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0476486682891846, + "rewards/margins": 3.280552864074707, + "rewards/rejected": -2.2329039573669434, + "step": 673 + }, + { + "epoch": 0.86, + "learning_rate": 6.367145535162812e-08, + "logits/chosen": -3.2697644233703613, + "logits/rejected": -3.1457862854003906, + "logps/chosen": -259.843505859375, + "logps/rejected": -1492.3731689453125, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9752899408340454, + "rewards/margins": 6.273001194000244, + "rewards/rejected": -5.297711372375488, + "step": 674 + }, + { + "epoch": 0.86, + "learning_rate": 6.357202249325371e-08, + "logits/chosen": -3.254349708557129, + "logits/rejected": -3.08567214012146, + "logps/chosen": -257.71063232421875, + "logps/rejected": -806.790771484375, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9811248779296875, + "rewards/margins": 3.8902621269226074, + "rewards/rejected": -2.909137010574341, + "step": 675 + }, + { + "epoch": 0.86, + "learning_rate": 6.347253165771289e-08, + "logits/chosen": -3.2418317794799805, + "logits/rejected": -3.1960601806640625, + "logps/chosen": -289.9067077636719, + "logps/rejected": -1081.1053466796875, + "loss": 0.3012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9184387922286987, + "rewards/margins": 5.8831939697265625, + "rewards/rejected": -4.964755058288574, + "step": 676 + }, + { + "epoch": 0.86, + "learning_rate": 6.33729832700121e-08, + "logits/chosen": -3.238748550415039, + "logits/rejected": -3.2057712078094482, + "logps/chosen": -296.369384765625, + "logps/rejected": -829.7852172851562, + "loss": 0.3346, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1037262678146362, + "rewards/margins": 5.611212253570557, + "rewards/rejected": -4.507485866546631, + "step": 677 + }, + { + "epoch": 0.86, + "learning_rate": 6.327337775540361e-08, + "logits/chosen": -3.2186169624328613, + "logits/rejected": -3.118309497833252, + "logps/chosen": -271.2445373535156, + "logps/rejected": -1062.27099609375, + "loss": 0.3102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8960129022598267, + "rewards/margins": 4.967855930328369, + "rewards/rejected": -4.071843147277832, + "step": 678 + }, + { + "epoch": 0.87, + "learning_rate": 6.317371553938373e-08, + "logits/chosen": -3.1665377616882324, + "logits/rejected": -3.1083178520202637, + "logps/chosen": -255.70184326171875, + "logps/rejected": -700.8330688476562, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0384689569473267, + "rewards/margins": 3.9718825817108154, + "rewards/rejected": -2.9334137439727783, + "step": 679 + }, + { + "epoch": 0.87, + "learning_rate": 6.307399704769099e-08, + "logits/chosen": -3.2316365242004395, + "logits/rejected": -3.084895133972168, + "logps/chosen": -285.75469970703125, + "logps/rejected": -525.1240234375, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0568511486053467, + "rewards/margins": 3.689265489578247, + "rewards/rejected": -2.6324143409729004, + "step": 680 + }, + { + "epoch": 0.87, + "learning_rate": 6.29742227063043e-08, + "logits/chosen": -3.275193214416504, + "logits/rejected": -3.1321604251861572, + "logps/chosen": -280.646728515625, + "logps/rejected": -476.57415771484375, + "loss": 0.3581, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1733627319335938, + "rewards/margins": 3.4538896083831787, + "rewards/rejected": -2.280526876449585, + "step": 681 + }, + { + "epoch": 0.87, + "learning_rate": 6.287439294144119e-08, + "logits/chosen": -3.2935781478881836, + "logits/rejected": -3.1566224098205566, + "logps/chosen": -258.3626403808594, + "logps/rejected": -696.8289184570312, + "loss": 0.3158, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.218571424484253, + "rewards/margins": 4.267500400543213, + "rewards/rejected": -3.048928737640381, + "step": 682 + }, + { + "epoch": 0.87, + "learning_rate": 6.277450817955593e-08, + "logits/chosen": -3.2606704235076904, + "logits/rejected": -3.056795358657837, + "logps/chosen": -233.425048828125, + "logps/rejected": -1415.447509765625, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9058372378349304, + "rewards/margins": 6.224309921264648, + "rewards/rejected": -5.318472862243652, + "step": 683 + }, + { + "epoch": 0.87, + "learning_rate": 6.26745688473377e-08, + "logits/chosen": -3.250427484512329, + "logits/rejected": -3.1378793716430664, + "logps/chosen": -276.65045166015625, + "logps/rejected": -571.1107177734375, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0132430791854858, + "rewards/margins": 3.2232866287231445, + "rewards/rejected": -2.210043430328369, + "step": 684 + }, + { + "epoch": 0.87, + "learning_rate": 6.257457537170882e-08, + "logits/chosen": -3.2477023601531982, + "logits/rejected": -3.08233642578125, + "logps/chosen": -235.435791015625, + "logps/rejected": -465.41424560546875, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1138451099395752, + "rewards/margins": 3.0905203819274902, + "rewards/rejected": -1.9766755104064941, + "step": 685 + }, + { + "epoch": 0.87, + "learning_rate": 6.247452817982293e-08, + "logits/chosen": -3.266540050506592, + "logits/rejected": -3.2067172527313232, + "logps/chosen": -253.22119140625, + "logps/rejected": -552.4891357421875, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9716117978096008, + "rewards/margins": 3.7573068141937256, + "rewards/rejected": -2.7856950759887695, + "step": 686 + }, + { + "epoch": 0.88, + "learning_rate": 6.237442769906305e-08, + "logits/chosen": -3.2647573947906494, + "logits/rejected": -3.1113786697387695, + "logps/chosen": -223.5919189453125, + "logps/rejected": -681.461669921875, + "loss": 0.2841, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0853958129882812, + "rewards/margins": 3.840496778488159, + "rewards/rejected": -2.755100965499878, + "step": 687 + }, + { + "epoch": 0.88, + "learning_rate": 6.227427435703996e-08, + "logits/chosen": -3.298337697982788, + "logits/rejected": -3.1554107666015625, + "logps/chosen": -290.15350341796875, + "logps/rejected": -602.4044189453125, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1456924676895142, + "rewards/margins": 4.0145158767700195, + "rewards/rejected": -2.868823289871216, + "step": 688 + }, + { + "epoch": 0.88, + "learning_rate": 6.217406858159013e-08, + "logits/chosen": -3.222896099090576, + "logits/rejected": -3.1334362030029297, + "logps/chosen": -236.25514221191406, + "logps/rejected": -817.5414428710938, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0434967279434204, + "rewards/margins": 5.023843765258789, + "rewards/rejected": -3.9803466796875, + "step": 689 + }, + { + "epoch": 0.88, + "learning_rate": 6.20738108007741e-08, + "logits/chosen": -3.2628917694091797, + "logits/rejected": -3.2033472061157227, + "logps/chosen": -244.79969787597656, + "logps/rejected": -414.16961669921875, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0355545282363892, + "rewards/margins": 3.419774055480957, + "rewards/rejected": -2.3842194080352783, + "step": 690 + }, + { + "epoch": 0.88, + "learning_rate": 6.197350144287454e-08, + "logits/chosen": -3.2200393676757812, + "logits/rejected": -3.1198885440826416, + "logps/chosen": -248.10739135742188, + "logps/rejected": -846.185302734375, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9778434634208679, + "rewards/margins": 5.37062931060791, + "rewards/rejected": -4.392786026000977, + "step": 691 + }, + { + "epoch": 0.88, + "learning_rate": 6.187314093639443e-08, + "logits/chosen": -3.265612840652466, + "logits/rejected": -3.1977179050445557, + "logps/chosen": -274.9576416015625, + "logps/rejected": -822.56201171875, + "loss": 0.2932, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0110901594161987, + "rewards/margins": 5.435452461242676, + "rewards/rejected": -4.4243621826171875, + "step": 692 + }, + { + "epoch": 0.88, + "learning_rate": 6.177272971005528e-08, + "logits/chosen": -3.293440818786621, + "logits/rejected": -3.1273374557495117, + "logps/chosen": -270.0041809082031, + "logps/rejected": -605.3888549804688, + "loss": 0.3096, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0000473260879517, + "rewards/margins": 4.048913478851318, + "rewards/rejected": -3.0488662719726562, + "step": 693 + }, + { + "epoch": 0.88, + "learning_rate": 6.167226819279528e-08, + "logits/chosen": -3.269310712814331, + "logits/rejected": -2.9640181064605713, + "logps/chosen": -263.1588134765625, + "logps/rejected": -969.8955078125, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2010483741760254, + "rewards/margins": 4.994422912597656, + "rewards/rejected": -3.79337477684021, + "step": 694 + }, + { + "epoch": 0.89, + "learning_rate": 6.157175681376736e-08, + "logits/chosen": -3.2775378227233887, + "logits/rejected": -3.125418186187744, + "logps/chosen": -273.23388671875, + "logps/rejected": -748.03466796875, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0588881969451904, + "rewards/margins": 4.69192361831665, + "rewards/rejected": -3.633035182952881, + "step": 695 + }, + { + "epoch": 0.89, + "learning_rate": 6.147119600233758e-08, + "logits/chosen": -3.297919750213623, + "logits/rejected": -3.1779277324676514, + "logps/chosen": -255.63621520996094, + "logps/rejected": -485.6526184082031, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9213669300079346, + "rewards/margins": 3.6082887649536133, + "rewards/rejected": -2.6869218349456787, + "step": 696 + }, + { + "epoch": 0.89, + "learning_rate": 6.137058618808306e-08, + "logits/chosen": -3.2537970542907715, + "logits/rejected": -3.137833595275879, + "logps/chosen": -234.99984741210938, + "logps/rejected": -928.3157348632812, + "loss": 0.3052, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3386902809143066, + "rewards/margins": 5.104205131530762, + "rewards/rejected": -3.7655153274536133, + "step": 697 + }, + { + "epoch": 0.89, + "learning_rate": 6.126992780079031e-08, + "logits/chosen": -3.239086627960205, + "logits/rejected": -3.0881080627441406, + "logps/chosen": -247.05438232421875, + "logps/rejected": -700.8790283203125, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8943435549736023, + "rewards/margins": 4.286973476409912, + "rewards/rejected": -3.392630100250244, + "step": 698 + }, + { + "epoch": 0.89, + "learning_rate": 6.116922127045331e-08, + "logits/chosen": -3.1977365016937256, + "logits/rejected": -3.1023640632629395, + "logps/chosen": -234.45155334472656, + "logps/rejected": -525.4302978515625, + "loss": 0.264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.992652177810669, + "rewards/margins": 3.8459267616271973, + "rewards/rejected": -2.8532745838165283, + "step": 699 + }, + { + "epoch": 0.89, + "learning_rate": 6.106846702727172e-08, + "logits/chosen": -3.2286813259124756, + "logits/rejected": -3.2118163108825684, + "logps/chosen": -284.4957275390625, + "logps/rejected": -793.830810546875, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1614716053009033, + "rewards/margins": 4.984827041625977, + "rewards/rejected": -3.823355197906494, + "step": 700 + }, + { + "epoch": 0.89, + "learning_rate": 6.096766550164899e-08, + "logits/chosen": -3.246285915374756, + "logits/rejected": -3.096752166748047, + "logps/chosen": -257.68695068359375, + "logps/rejected": -820.2569580078125, + "loss": 0.3243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9743126034736633, + "rewards/margins": 4.3609185218811035, + "rewards/rejected": -3.386605978012085, + "step": 701 + }, + { + "epoch": 0.89, + "learning_rate": 6.086681712419058e-08, + "logits/chosen": -3.2727243900299072, + "logits/rejected": -3.1888551712036133, + "logps/chosen": -244.31552124023438, + "logps/rejected": -621.885986328125, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1097427606582642, + "rewards/margins": 4.7340593338012695, + "rewards/rejected": -3.6243162155151367, + "step": 702 + }, + { + "epoch": 0.9, + "learning_rate": 6.076592232570206e-08, + "logits/chosen": -3.238368511199951, + "logits/rejected": -3.173015594482422, + "logps/chosen": -245.08633422851562, + "logps/rejected": -681.9411010742188, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0285415649414062, + "rewards/margins": 4.473475933074951, + "rewards/rejected": -3.444934129714966, + "step": 703 + }, + { + "epoch": 0.9, + "learning_rate": 6.066498153718735e-08, + "logits/chosen": -3.223019599914551, + "logits/rejected": -3.0926578044891357, + "logps/chosen": -240.27955627441406, + "logps/rejected": -665.3905029296875, + "loss": 0.3023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0989395380020142, + "rewards/margins": 4.180383682250977, + "rewards/rejected": -3.0814437866210938, + "step": 704 + }, + { + "epoch": 0.9, + "learning_rate": 6.056399518984677e-08, + "logits/chosen": -3.2040252685546875, + "logits/rejected": -3.052750825881958, + "logps/chosen": -248.95379638671875, + "logps/rejected": -1470.603515625, + "loss": 0.2672, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0607597827911377, + "rewards/margins": 7.066631317138672, + "rewards/rejected": -6.005871772766113, + "step": 705 + }, + { + "epoch": 0.9, + "learning_rate": 6.046296371507533e-08, + "logits/chosen": -3.301731586456299, + "logits/rejected": -3.1088948249816895, + "logps/chosen": -275.873046875, + "logps/rejected": -412.80462646484375, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2693344354629517, + "rewards/margins": 2.964646816253662, + "rewards/rejected": -1.6953125, + "step": 706 + }, + { + "epoch": 0.9, + "learning_rate": 6.036188754446074e-08, + "logits/chosen": -3.2670936584472656, + "logits/rejected": -3.1703133583068848, + "logps/chosen": -307.5868835449219, + "logps/rejected": -945.349365234375, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0445373058319092, + "rewards/margins": 5.166238307952881, + "rewards/rejected": -4.121701240539551, + "step": 707 + }, + { + "epoch": 0.9, + "learning_rate": 6.02607671097817e-08, + "logits/chosen": -3.2343244552612305, + "logits/rejected": -3.215853214263916, + "logps/chosen": -277.88067626953125, + "logps/rejected": -832.0726318359375, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2029099464416504, + "rewards/margins": 5.456925868988037, + "rewards/rejected": -4.254015922546387, + "step": 708 + }, + { + "epoch": 0.9, + "learning_rate": 6.015960284300601e-08, + "logits/chosen": -3.2701187133789062, + "logits/rejected": -3.110706329345703, + "logps/chosen": -272.1822509765625, + "logps/rejected": -567.19970703125, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0131614208221436, + "rewards/margins": 3.971027374267578, + "rewards/rejected": -2.9578659534454346, + "step": 709 + }, + { + "epoch": 0.91, + "learning_rate": 6.005839517628861e-08, + "logits/chosen": -3.1920814514160156, + "logits/rejected": -3.0948352813720703, + "logps/chosen": -262.27130126953125, + "logps/rejected": -964.709716796875, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.244023084640503, + "rewards/margins": 5.627641677856445, + "rewards/rejected": -4.383618354797363, + "step": 710 + }, + { + "epoch": 0.91, + "learning_rate": 5.995714454196997e-08, + "logits/chosen": -3.27418851852417, + "logits/rejected": -3.105113983154297, + "logps/chosen": -287.9771728515625, + "logps/rejected": -1662.5594482421875, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.222163438796997, + "rewards/margins": 8.480171203613281, + "rewards/rejected": -7.258008003234863, + "step": 711 + }, + { + "epoch": 0.91, + "learning_rate": 5.985585137257401e-08, + "logits/chosen": -3.2755353450775146, + "logits/rejected": -3.199542760848999, + "logps/chosen": -280.1078186035156, + "logps/rejected": -1283.80810546875, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.301727294921875, + "rewards/margins": 6.929705619812012, + "rewards/rejected": -5.627978324890137, + "step": 712 + }, + { + "epoch": 0.91, + "learning_rate": 5.975451610080642e-08, + "logits/chosen": -3.1667184829711914, + "logits/rejected": -3.063683271408081, + "logps/chosen": -255.15316772460938, + "logps/rejected": -910.307373046875, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9468444585800171, + "rewards/margins": 5.170239448547363, + "rewards/rejected": -4.223394870758057, + "step": 713 + }, + { + "epoch": 0.91, + "learning_rate": 5.965313915955268e-08, + "logits/chosen": -3.2267775535583496, + "logits/rejected": -3.142263412475586, + "logps/chosen": -271.84808349609375, + "logps/rejected": -598.3282470703125, + "loss": 0.3012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2086349725723267, + "rewards/margins": 4.617340087890625, + "rewards/rejected": -3.408705234527588, + "step": 714 + }, + { + "epoch": 0.91, + "learning_rate": 5.955172098187632e-08, + "logits/chosen": -3.215301990509033, + "logits/rejected": -3.1431896686553955, + "logps/chosen": -264.39959716796875, + "logps/rejected": -675.44775390625, + "loss": 0.2991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9577186107635498, + "rewards/margins": 4.767767906188965, + "rewards/rejected": -3.810049533843994, + "step": 715 + }, + { + "epoch": 0.91, + "learning_rate": 5.945026200101701e-08, + "logits/chosen": -3.2901668548583984, + "logits/rejected": -3.0961556434631348, + "logps/chosen": -247.3822021484375, + "logps/rejected": -504.525146484375, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3761773109436035, + "rewards/margins": 3.9870553016662598, + "rewards/rejected": -2.6108779907226562, + "step": 716 + }, + { + "epoch": 0.91, + "learning_rate": 5.934876265038874e-08, + "logits/chosen": -3.233747959136963, + "logits/rejected": -3.184901237487793, + "logps/chosen": -283.09161376953125, + "logps/rejected": -650.0584106445312, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1601775884628296, + "rewards/margins": 4.1347808837890625, + "rewards/rejected": -2.9746031761169434, + "step": 717 + }, + { + "epoch": 0.92, + "learning_rate": 5.9247223363577924e-08, + "logits/chosen": -3.253308057785034, + "logits/rejected": -3.0565671920776367, + "logps/chosen": -257.21246337890625, + "logps/rejected": -1237.721923828125, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.98680579662323, + "rewards/margins": 5.654626369476318, + "rewards/rejected": -4.667820930480957, + "step": 718 + }, + { + "epoch": 0.92, + "learning_rate": 5.914564457434158e-08, + "logits/chosen": -3.2780308723449707, + "logits/rejected": -3.1503448486328125, + "logps/chosen": -271.92333984375, + "logps/rejected": -605.86279296875, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9355263113975525, + "rewards/margins": 3.8535356521606445, + "rewards/rejected": -2.9180092811584473, + "step": 719 + }, + { + "epoch": 0.92, + "learning_rate": 5.90440267166055e-08, + "logits/chosen": -3.2184066772460938, + "logits/rejected": -3.0817222595214844, + "logps/chosen": -280.2200622558594, + "logps/rejected": -755.8271484375, + "loss": 0.2983, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3027969598770142, + "rewards/margins": 5.102546691894531, + "rewards/rejected": -3.7997498512268066, + "step": 720 + }, + { + "epoch": 0.92, + "learning_rate": 5.894237022446233e-08, + "logits/chosen": -3.2015674114227295, + "logits/rejected": -3.141106128692627, + "logps/chosen": -288.0885009765625, + "logps/rejected": -685.884521484375, + "loss": 0.3226, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8763481378555298, + "rewards/margins": 4.834218978881836, + "rewards/rejected": -3.9578704833984375, + "step": 721 + }, + { + "epoch": 0.92, + "learning_rate": 5.8840675532169806e-08, + "logits/chosen": -3.202146530151367, + "logits/rejected": -3.0136590003967285, + "logps/chosen": -287.7388000488281, + "logps/rejected": -931.8311767578125, + "loss": 0.3235, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0592255592346191, + "rewards/margins": 4.573129653930664, + "rewards/rejected": -3.513903856277466, + "step": 722 + }, + { + "epoch": 0.92, + "learning_rate": 5.873894307414878e-08, + "logits/chosen": -3.2552390098571777, + "logits/rejected": -3.0850839614868164, + "logps/chosen": -263.7825622558594, + "logps/rejected": -717.0108032226562, + "loss": 0.3177, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0878784656524658, + "rewards/margins": 3.8551254272460938, + "rewards/rejected": -2.767246961593628, + "step": 723 + }, + { + "epoch": 0.92, + "learning_rate": 5.8637173284981525e-08, + "logits/chosen": -3.19887375831604, + "logits/rejected": -3.1219446659088135, + "logps/chosen": -232.374755859375, + "logps/rejected": -468.51092529296875, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0476996898651123, + "rewards/margins": 3.5660178661346436, + "rewards/rejected": -2.5183181762695312, + "step": 724 + }, + { + "epoch": 0.92, + "learning_rate": 5.853536659940967e-08, + "logits/chosen": -3.197094440460205, + "logits/rejected": -3.169013500213623, + "logps/chosen": -241.07797241210938, + "logps/rejected": -915.3023681640625, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9018630981445312, + "rewards/margins": 5.531498908996582, + "rewards/rejected": -4.629635810852051, + "step": 725 + }, + { + "epoch": 0.93, + "learning_rate": 5.843352345233257e-08, + "logits/chosen": -3.252014636993408, + "logits/rejected": -3.1484029293060303, + "logps/chosen": -299.244140625, + "logps/rejected": -592.1588134765625, + "loss": 0.3137, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.038323998451233, + "rewards/margins": 3.8650894165039062, + "rewards/rejected": -2.826765537261963, + "step": 726 + }, + { + "epoch": 0.93, + "learning_rate": 5.833164427880526e-08, + "logits/chosen": -3.3245320320129395, + "logits/rejected": -3.1357812881469727, + "logps/chosen": -247.10421752929688, + "logps/rejected": -632.0180053710938, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0089499950408936, + "rewards/margins": 3.725722551345825, + "rewards/rejected": -2.7167725563049316, + "step": 727 + }, + { + "epoch": 0.93, + "learning_rate": 5.8229729514036697e-08, + "logits/chosen": -3.2836713790893555, + "logits/rejected": -3.0047075748443604, + "logps/chosen": -268.4767150878906, + "logps/rejected": -1188.406494140625, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0271774530410767, + "rewards/margins": 5.9838972091674805, + "rewards/rejected": -4.956720352172852, + "step": 728 + }, + { + "epoch": 0.93, + "learning_rate": 5.812777959338791e-08, + "logits/chosen": -3.271798610687256, + "logits/rejected": -3.1075329780578613, + "logps/chosen": -246.19131469726562, + "logps/rejected": -667.22216796875, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9432587027549744, + "rewards/margins": 4.0882720947265625, + "rewards/rejected": -3.1450135707855225, + "step": 729 + }, + { + "epoch": 0.93, + "learning_rate": 5.802579495237003e-08, + "logits/chosen": -3.2036945819854736, + "logits/rejected": -3.097341537475586, + "logps/chosen": -301.4010009765625, + "logps/rejected": -786.6395263671875, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0702636241912842, + "rewards/margins": 5.311975002288818, + "rewards/rejected": -4.241711616516113, + "step": 730 + }, + { + "epoch": 0.93, + "learning_rate": 5.79237760266426e-08, + "logits/chosen": -3.2926690578460693, + "logits/rejected": -3.1697659492492676, + "logps/chosen": -239.3607177734375, + "logps/rejected": -670.8765258789062, + "loss": 0.2767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9564049243927002, + "rewards/margins": 4.500960826873779, + "rewards/rejected": -3.5445556640625, + "step": 731 + }, + { + "epoch": 0.93, + "learning_rate": 5.7821723252011546e-08, + "logits/chosen": -3.295822858810425, + "logits/rejected": -3.17606258392334, + "logps/chosen": -248.26300048828125, + "logps/rejected": -406.7499694824219, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.150781273841858, + "rewards/margins": 3.4346330165863037, + "rewards/rejected": -2.2838516235351562, + "step": 732 + }, + { + "epoch": 0.93, + "learning_rate": 5.771963706442744e-08, + "logits/chosen": -3.2515621185302734, + "logits/rejected": -3.2205169200897217, + "logps/chosen": -292.1411437988281, + "logps/rejected": -801.9161987304688, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.900299072265625, + "rewards/margins": 5.2326202392578125, + "rewards/rejected": -4.3323211669921875, + "step": 733 + }, + { + "epoch": 0.94, + "learning_rate": 5.7617517899983546e-08, + "logits/chosen": -3.191598892211914, + "logits/rejected": -3.084989547729492, + "logps/chosen": -282.37603759765625, + "logps/rejected": -547.4968872070312, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.054779052734375, + "rewards/margins": 3.706660509109497, + "rewards/rejected": -2.651881456375122, + "step": 734 + }, + { + "epoch": 0.94, + "learning_rate": 5.751536619491406e-08, + "logits/chosen": -3.2442469596862793, + "logits/rejected": -3.085822105407715, + "logps/chosen": -277.8446044921875, + "logps/rejected": -1020.4263916015625, + "loss": 0.3168, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0275894403457642, + "rewards/margins": 5.47873067855835, + "rewards/rejected": -4.451141357421875, + "step": 735 + }, + { + "epoch": 0.94, + "learning_rate": 5.741318238559209e-08, + "logits/chosen": -3.2448647022247314, + "logits/rejected": -3.1269125938415527, + "logps/chosen": -239.51846313476562, + "logps/rejected": -798.52978515625, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1812905073165894, + "rewards/margins": 5.04300594329834, + "rewards/rejected": -3.861715793609619, + "step": 736 + }, + { + "epoch": 0.94, + "learning_rate": 5.7310966908528e-08, + "logits/chosen": -3.1842758655548096, + "logits/rejected": -3.1270761489868164, + "logps/chosen": -256.83172607421875, + "logps/rejected": -547.211669921875, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0789649486541748, + "rewards/margins": 3.7404427528381348, + "rewards/rejected": -2.66147780418396, + "step": 737 + }, + { + "epoch": 0.94, + "learning_rate": 5.7208720200367334e-08, + "logits/chosen": -3.3010644912719727, + "logits/rejected": -3.2359862327575684, + "logps/chosen": -251.20028686523438, + "logps/rejected": -861.2357788085938, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0685722827911377, + "rewards/margins": 5.5270256996154785, + "rewards/rejected": -4.458453178405762, + "step": 738 + }, + { + "epoch": 0.94, + "learning_rate": 5.7106442697889124e-08, + "logits/chosen": -3.2712302207946777, + "logits/rejected": -3.1970319747924805, + "logps/chosen": -258.9510498046875, + "logps/rejected": -603.2684936523438, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3526413440704346, + "rewards/margins": 4.243293762207031, + "rewards/rejected": -2.8906524181365967, + "step": 739 + }, + { + "epoch": 0.94, + "learning_rate": 5.7004134838003895e-08, + "logits/chosen": -3.2226548194885254, + "logits/rejected": -3.0728461742401123, + "logps/chosen": -288.03973388671875, + "logps/rejected": -857.53125, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.079187035560608, + "rewards/margins": 4.4757537841796875, + "rewards/rejected": -3.396566867828369, + "step": 740 + }, + { + "epoch": 0.94, + "learning_rate": 5.690179705775189e-08, + "logits/chosen": -3.2236862182617188, + "logits/rejected": -3.013559579849243, + "logps/chosen": -276.9339599609375, + "logps/rejected": -271.763671875, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8982330560684204, + "rewards/margins": 1.5704574584960938, + "rewards/rejected": -0.6722244620323181, + "step": 741 + }, + { + "epoch": 0.95, + "learning_rate": 5.6799429794301135e-08, + "logits/chosen": -3.282285451889038, + "logits/rejected": -3.145700693130493, + "logps/chosen": -266.8681945800781, + "logps/rejected": -529.2003173828125, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1450165510177612, + "rewards/margins": 3.7651491165161133, + "rewards/rejected": -2.6201324462890625, + "step": 742 + }, + { + "epoch": 0.95, + "learning_rate": 5.6697033484945634e-08, + "logits/chosen": -3.2841672897338867, + "logits/rejected": -3.0975513458251953, + "logps/chosen": -245.37356567382812, + "logps/rejected": -554.064697265625, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0541107654571533, + "rewards/margins": 3.4572691917419434, + "rewards/rejected": -2.403158664703369, + "step": 743 + }, + { + "epoch": 0.95, + "learning_rate": 5.659460856710345e-08, + "logits/chosen": -3.1967427730560303, + "logits/rejected": -3.101440191268921, + "logps/chosen": -255.460693359375, + "logps/rejected": -620.1253051757812, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1361770629882812, + "rewards/margins": 3.6947860717773438, + "rewards/rejected": -2.5586090087890625, + "step": 744 + }, + { + "epoch": 0.95, + "learning_rate": 5.649215547831486e-08, + "logits/chosen": -3.1757850646972656, + "logits/rejected": -3.104977607727051, + "logps/chosen": -287.294677734375, + "logps/rejected": -1253.5250244140625, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1477553844451904, + "rewards/margins": 6.9392805099487305, + "rewards/rejected": -5.791525363922119, + "step": 745 + }, + { + "epoch": 0.95, + "learning_rate": 5.63896746562405e-08, + "logits/chosen": -3.2136192321777344, + "logits/rejected": -3.095236301422119, + "logps/chosen": -242.54583740234375, + "logps/rejected": -596.1800537109375, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0074493885040283, + "rewards/margins": 3.896589756011963, + "rewards/rejected": -2.8891403675079346, + "step": 746 + }, + { + "epoch": 0.95, + "learning_rate": 5.628716653865944e-08, + "logits/chosen": -3.1859097480773926, + "logits/rejected": -3.0623531341552734, + "logps/chosen": -297.6890869140625, + "logps/rejected": -303.160888671875, + "loss": 0.3223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9417297840118408, + "rewards/margins": 2.248762607574463, + "rewards/rejected": -1.307032823562622, + "step": 747 + }, + { + "epoch": 0.95, + "learning_rate": 5.618463156346739e-08, + "logits/chosen": -3.2423288822174072, + "logits/rejected": -3.0578413009643555, + "logps/chosen": -279.40325927734375, + "logps/rejected": -938.4310913085938, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1630370616912842, + "rewards/margins": 4.877734184265137, + "rewards/rejected": -3.7146973609924316, + "step": 748 + }, + { + "epoch": 0.95, + "learning_rate": 5.6082070168674754e-08, + "logits/chosen": -3.1883225440979004, + "logits/rejected": -3.0821897983551025, + "logps/chosen": -265.71331787109375, + "logps/rejected": -834.399169921875, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1263900995254517, + "rewards/margins": 4.385127067565918, + "rewards/rejected": -3.258737325668335, + "step": 749 + }, + { + "epoch": 0.96, + "learning_rate": 5.597948279240483e-08, + "logits/chosen": -3.315354347229004, + "logits/rejected": -3.1486456394195557, + "logps/chosen": -260.65191650390625, + "logps/rejected": -1503.023681640625, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0841546058654785, + "rewards/margins": 8.066305160522461, + "rewards/rejected": -6.982150077819824, + "step": 750 + }, + { + "epoch": 0.96, + "learning_rate": 5.587686987289188e-08, + "logits/chosen": -3.22005558013916, + "logits/rejected": -3.1170406341552734, + "logps/chosen": -258.9420471191406, + "logps/rejected": -587.601318359375, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1539299488067627, + "rewards/margins": 4.2950873374938965, + "rewards/rejected": -3.141157627105713, + "step": 751 + }, + { + "epoch": 0.96, + "learning_rate": 5.5774231848479313e-08, + "logits/chosen": -3.2204132080078125, + "logits/rejected": -3.123109817504883, + "logps/chosen": -261.6385192871094, + "logps/rejected": -526.366943359375, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1918511390686035, + "rewards/margins": 3.772674083709717, + "rewards/rejected": -2.5808229446411133, + "step": 752 + }, + { + "epoch": 0.96, + "learning_rate": 5.5671569157617726e-08, + "logits/chosen": -3.264150619506836, + "logits/rejected": -2.968067169189453, + "logps/chosen": -263.0998840332031, + "logps/rejected": -826.0394287109375, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1019043922424316, + "rewards/margins": 4.1242170333862305, + "rewards/rejected": -3.022313117980957, + "step": 753 + }, + { + "epoch": 0.96, + "learning_rate": 5.556888223886315e-08, + "logits/chosen": -3.2524828910827637, + "logits/rejected": -3.120105266571045, + "logps/chosen": -281.44879150390625, + "logps/rejected": -393.33624267578125, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9670486450195312, + "rewards/margins": 3.1805849075317383, + "rewards/rejected": -2.213536262512207, + "step": 754 + }, + { + "epoch": 0.96, + "learning_rate": 5.546617153087505e-08, + "logits/chosen": -3.208167552947998, + "logits/rejected": -3.184800148010254, + "logps/chosen": -230.54666137695312, + "logps/rejected": -648.0328979492188, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2760146856307983, + "rewards/margins": 4.617454528808594, + "rewards/rejected": -3.341439723968506, + "step": 755 + }, + { + "epoch": 0.96, + "learning_rate": 5.536343747241459e-08, + "logits/chosen": -3.25128173828125, + "logits/rejected": -3.111159324645996, + "logps/chosen": -276.23211669921875, + "logps/rejected": -461.3901672363281, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.160423994064331, + "rewards/margins": 3.453197479248047, + "rewards/rejected": -2.292773485183716, + "step": 756 + }, + { + "epoch": 0.96, + "learning_rate": 5.5260680502342595e-08, + "logits/chosen": -3.243509292602539, + "logits/rejected": -2.969472885131836, + "logps/chosen": -267.8327331542969, + "logps/rejected": -1558.3861083984375, + "loss": 0.3117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9958251714706421, + "rewards/margins": 7.36895751953125, + "rewards/rejected": -6.373132705688477, + "step": 757 + }, + { + "epoch": 0.97, + "learning_rate": 5.515790105961785e-08, + "logits/chosen": -3.27978515625, + "logits/rejected": -2.9798593521118164, + "logps/chosen": -284.7986145019531, + "logps/rejected": -424.1134338378906, + "loss": 0.3247, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.333592176437378, + "rewards/margins": 3.1149916648864746, + "rewards/rejected": -1.7813994884490967, + "step": 758 + }, + { + "epoch": 0.97, + "learning_rate": 5.505509958329507e-08, + "logits/chosen": -3.208517074584961, + "logits/rejected": -3.1833620071411133, + "logps/chosen": -271.3885192871094, + "logps/rejected": -1353.3671875, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2082030773162842, + "rewards/margins": 7.6175537109375, + "rewards/rejected": -6.409350395202637, + "step": 759 + }, + { + "epoch": 0.97, + "learning_rate": 5.495227651252314e-08, + "logits/chosen": -3.180421829223633, + "logits/rejected": -3.13511061668396, + "logps/chosen": -244.89373779296875, + "logps/rejected": -587.1878662109375, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1563949584960938, + "rewards/margins": 4.387312412261963, + "rewards/rejected": -3.230917453765869, + "step": 760 + }, + { + "epoch": 0.97, + "learning_rate": 5.484943228654315e-08, + "logits/chosen": -3.3344669342041016, + "logits/rejected": -3.1215968132019043, + "logps/chosen": -261.8267822265625, + "logps/rejected": -558.576904296875, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9958969354629517, + "rewards/margins": 3.7681076526641846, + "rewards/rejected": -2.7722105979919434, + "step": 761 + }, + { + "epoch": 0.97, + "learning_rate": 5.474656734468662e-08, + "logits/chosen": -3.1812727451324463, + "logits/rejected": -3.1634955406188965, + "logps/chosen": -271.1277770996094, + "logps/rejected": -718.273193359375, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2573425769805908, + "rewards/margins": 4.917109966278076, + "rewards/rejected": -3.6597671508789062, + "step": 762 + }, + { + "epoch": 0.97, + "learning_rate": 5.4643682126373505e-08, + "logits/chosen": -3.2680132389068604, + "logits/rejected": -3.1498470306396484, + "logps/chosen": -254.13365173339844, + "logps/rejected": -884.9483032226562, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.137298583984375, + "rewards/margins": 4.866662979125977, + "rewards/rejected": -3.7293639183044434, + "step": 763 + }, + { + "epoch": 0.97, + "learning_rate": 5.454077707111041e-08, + "logits/chosen": -3.2659311294555664, + "logits/rejected": -3.12626314163208, + "logps/chosen": -257.34710693359375, + "logps/rejected": -1192.430419921875, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1764588356018066, + "rewards/margins": 7.077667236328125, + "rewards/rejected": -5.901208877563477, + "step": 764 + }, + { + "epoch": 0.98, + "learning_rate": 5.443785261848867e-08, + "logits/chosen": -3.229379177093506, + "logits/rejected": -3.1970839500427246, + "logps/chosen": -277.41632080078125, + "logps/rejected": -799.2518920898438, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0178695917129517, + "rewards/margins": 4.8026018142700195, + "rewards/rejected": -3.784731864929199, + "step": 765 + }, + { + "epoch": 0.98, + "learning_rate": 5.433490920818249e-08, + "logits/chosen": -3.227325439453125, + "logits/rejected": -3.0599989891052246, + "logps/chosen": -265.53375244140625, + "logps/rejected": -352.52117919921875, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.212641954421997, + "rewards/margins": 2.86000919342041, + "rewards/rejected": -1.6473671197891235, + "step": 766 + }, + { + "epoch": 0.98, + "learning_rate": 5.4231947279947035e-08, + "logits/chosen": -3.2880187034606934, + "logits/rejected": -3.2187185287475586, + "logps/chosen": -297.38934326171875, + "logps/rejected": -977.475341796875, + "loss": 0.3299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9419876337051392, + "rewards/margins": 6.0042524337768555, + "rewards/rejected": -5.062265396118164, + "step": 767 + }, + { + "epoch": 0.98, + "learning_rate": 5.4128967273616623e-08, + "logits/chosen": -3.261833667755127, + "logits/rejected": -3.1563029289245605, + "logps/chosen": -257.5362548828125, + "logps/rejected": -357.6443786621094, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3554344177246094, + "rewards/margins": 2.8365097045898438, + "rewards/rejected": -1.4810752868652344, + "step": 768 + }, + { + "epoch": 0.98, + "learning_rate": 5.402596962910273e-08, + "logits/chosen": -3.2354838848114014, + "logits/rejected": -3.072735071182251, + "logps/chosen": -279.80364990234375, + "logps/rejected": -883.4815673828125, + "loss": 0.279, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.363551378250122, + "rewards/margins": 5.039271354675293, + "rewards/rejected": -3.67572021484375, + "step": 769 + }, + { + "epoch": 0.98, + "learning_rate": 5.392295478639225e-08, + "logits/chosen": -3.245725393295288, + "logits/rejected": -3.131135940551758, + "logps/chosen": -272.56683349609375, + "logps/rejected": -758.1331176757812, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1244338750839233, + "rewards/margins": 4.136362075805664, + "rewards/rejected": -3.011928081512451, + "step": 770 + }, + { + "epoch": 0.98, + "learning_rate": 5.381992318554549e-08, + "logits/chosen": -3.1686923503875732, + "logits/rejected": -3.037465810775757, + "logps/chosen": -302.29779052734375, + "logps/rejected": -545.5322875976562, + "loss": 0.3232, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3894882202148438, + "rewards/margins": 3.613028049468994, + "rewards/rejected": -2.2235398292541504, + "step": 771 + }, + { + "epoch": 0.98, + "learning_rate": 5.3716875266694385e-08, + "logits/chosen": -3.291574478149414, + "logits/rejected": -3.005234718322754, + "logps/chosen": -265.1171875, + "logps/rejected": -1486.538818359375, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2711502313613892, + "rewards/margins": 7.0985188484191895, + "rewards/rejected": -5.827368259429932, + "step": 772 + }, + { + "epoch": 0.99, + "learning_rate": 5.361381147004053e-08, + "logits/chosen": -3.25838041305542, + "logits/rejected": -3.149791717529297, + "logps/chosen": -230.39700317382812, + "logps/rejected": -749.2152099609375, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9927002191543579, + "rewards/margins": 5.569589614868164, + "rewards/rejected": -4.5768890380859375, + "step": 773 + }, + { + "epoch": 0.99, + "learning_rate": 5.351073223585341e-08, + "logits/chosen": -3.2781243324279785, + "logits/rejected": -3.171973943710327, + "logps/chosen": -288.91925048828125, + "logps/rejected": -658.1671752929688, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.183160424232483, + "rewards/margins": 4.873270034790039, + "rewards/rejected": -3.6901092529296875, + "step": 774 + }, + { + "epoch": 0.99, + "learning_rate": 5.340763800446839e-08, + "logits/chosen": -3.2563095092773438, + "logits/rejected": -3.0686566829681396, + "logps/chosen": -283.8312683105469, + "logps/rejected": -582.1956787109375, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9571434259414673, + "rewards/margins": 3.789544105529785, + "rewards/rejected": -2.8324005603790283, + "step": 775 + }, + { + "epoch": 0.99, + "learning_rate": 5.3304529216284965e-08, + "logits/chosen": -3.311347484588623, + "logits/rejected": -3.0950632095336914, + "logps/chosen": -291.96844482421875, + "logps/rejected": -364.05908203125, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9296325445175171, + "rewards/margins": 2.6252150535583496, + "rewards/rejected": -1.695582628250122, + "step": 776 + }, + { + "epoch": 0.99, + "learning_rate": 5.320140631176476e-08, + "logits/chosen": -3.2207064628601074, + "logits/rejected": -3.2979745864868164, + "logps/chosen": -239.831787109375, + "logps/rejected": -4217.13037109375, + "loss": 0.2618, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.211249589920044, + "rewards/margins": 5.813791751861572, + "rewards/rejected": -4.602541923522949, + "step": 777 + }, + { + "epoch": 0.99, + "learning_rate": 5.309826973142973e-08, + "logits/chosen": -3.0978736877441406, + "logits/rejected": -3.0987558364868164, + "logps/chosen": -281.70782470703125, + "logps/rejected": -1273.9140625, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2935981750488281, + "rewards/margins": 7.180365562438965, + "rewards/rejected": -5.886767387390137, + "step": 778 + }, + { + "epoch": 0.99, + "learning_rate": 5.299511991586023e-08, + "logits/chosen": -3.2803866863250732, + "logits/rejected": -3.179617404937744, + "logps/chosen": -274.7348937988281, + "logps/rejected": -632.1211547851562, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.159456729888916, + "rewards/margins": 4.313833236694336, + "rewards/rejected": -3.154376268386841, + "step": 779 + }, + { + "epoch": 0.99, + "learning_rate": 5.28919573056932e-08, + "logits/chosen": -3.224677324295044, + "logits/rejected": -3.0233426094055176, + "logps/chosen": -234.88059997558594, + "logps/rejected": -398.3298034667969, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3235458135604858, + "rewards/margins": 3.0013015270233154, + "rewards/rejected": -1.6777558326721191, + "step": 780 + }, + { + "epoch": 1.0, + "learning_rate": 5.2788782341620174e-08, + "logits/chosen": -3.2396512031555176, + "logits/rejected": -3.109309673309326, + "logps/chosen": -246.08132934570312, + "logps/rejected": -1089.2109375, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2802956104278564, + "rewards/margins": 5.526935577392578, + "rewards/rejected": -4.246640205383301, + "step": 781 + }, + { + "epoch": 1.0, + "learning_rate": 5.268559546438549e-08, + "logits/chosen": -3.215371608734131, + "logits/rejected": -3.079514741897583, + "logps/chosen": -295.7006530761719, + "logps/rejected": -518.8088989257812, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1283127069473267, + "rewards/margins": 3.6253724098205566, + "rewards/rejected": -2.4970598220825195, + "step": 782 + }, + { + "epoch": 1.0, + "learning_rate": 5.2582397114784404e-08, + "logits/chosen": -3.3069446086883545, + "logits/rejected": -3.1717209815979004, + "logps/chosen": -251.17718505859375, + "logps/rejected": -375.05694580078125, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3809601068496704, + "rewards/margins": 2.958331346511841, + "rewards/rejected": -1.5773712396621704, + "step": 783 + }, + { + "epoch": 1.0, + "learning_rate": 5.2479187733661114e-08, + "logits/chosen": -3.2391676902770996, + "logits/rejected": -3.150268077850342, + "logps/chosen": -249.37181091308594, + "logps/rejected": -632.6412353515625, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.17718505859375, + "rewards/margins": 4.088040351867676, + "rewards/rejected": -2.9108550548553467, + "step": 784 + }, + { + "epoch": 1.0, + "learning_rate": 5.2375967761907014e-08, + "logits/chosen": -3.2379212379455566, + "logits/rejected": -3.0989480018615723, + "logps/chosen": -211.80584716796875, + "logps/rejected": -388.78936767578125, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.065253496170044, + "rewards/margins": 2.8200950622558594, + "rewards/rejected": -1.7548415660858154, + "step": 785 + }, + { + "epoch": 1.0, + "learning_rate": 5.227273764045868e-08, + "logits/chosen": -3.2178990840911865, + "logits/rejected": -3.0952234268188477, + "logps/chosen": -253.03396606445312, + "logps/rejected": -884.6793212890625, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1348235607147217, + "rewards/margins": 5.522524833679199, + "rewards/rejected": -4.387701511383057, + "step": 786 + }, + { + "epoch": 1.0, + "learning_rate": 5.216949781029608e-08, + "logits/chosen": -3.241117477416992, + "logits/rejected": -3.099931001663208, + "logps/chosen": -259.8296813964844, + "logps/rejected": -511.6299133300781, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8885109424591064, + "rewards/margins": 3.3884758949279785, + "rewards/rejected": -2.499964952468872, + "step": 787 + }, + { + "epoch": 1.0, + "learning_rate": 5.2066248712440654e-08, + "logits/chosen": -3.2413992881774902, + "logits/rejected": -3.191455841064453, + "logps/chosen": -246.49688720703125, + "logps/rejected": -458.39373779296875, + "loss": 0.297, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0021110773086548, + "rewards/margins": 3.4100167751312256, + "rewards/rejected": -2.4079055786132812, + "step": 788 + }, + { + "epoch": 1.01, + "learning_rate": 5.196299078795343e-08, + "logits/chosen": -3.18338680267334, + "logits/rejected": -3.1471400260925293, + "logps/chosen": -287.5966796875, + "logps/rejected": -723.8621826171875, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0227211713790894, + "rewards/margins": 5.338837623596191, + "rewards/rejected": -4.3161163330078125, + "step": 789 + }, + { + "epoch": 1.01, + "learning_rate": 5.185972447793312e-08, + "logits/chosen": -3.218878746032715, + "logits/rejected": -3.177225351333618, + "logps/chosen": -288.15753173828125, + "logps/rejected": -446.03936767578125, + "loss": 0.3375, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1122009754180908, + "rewards/margins": 3.140301465988159, + "rewards/rejected": -2.0281004905700684, + "step": 790 + }, + { + "epoch": 1.01, + "learning_rate": 5.1756450223514295e-08, + "logits/chosen": -3.2581586837768555, + "logits/rejected": -3.062180995941162, + "logps/chosen": -256.7535400390625, + "logps/rejected": -467.3203125, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2454766035079956, + "rewards/margins": 3.4123024940490723, + "rewards/rejected": -2.166826009750366, + "step": 791 + }, + { + "epoch": 1.01, + "learning_rate": 5.16531684658654e-08, + "logits/chosen": -3.2520999908447266, + "logits/rejected": -3.120391845703125, + "logps/chosen": -266.9744873046875, + "logps/rejected": -674.6072998046875, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1862938404083252, + "rewards/margins": 5.160254955291748, + "rewards/rejected": -3.9739608764648438, + "step": 792 + }, + { + "epoch": 1.01, + "learning_rate": 5.1549879646187035e-08, + "logits/chosen": -3.2286925315856934, + "logits/rejected": -3.1401960849761963, + "logps/chosen": -308.4204406738281, + "logps/rejected": -596.10791015625, + "loss": 0.3104, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4341109991073608, + "rewards/margins": 4.912300109863281, + "rewards/rejected": -3.47818922996521, + "step": 793 + }, + { + "epoch": 1.01, + "learning_rate": 5.1446584205709856e-08, + "logits/chosen": -3.287750482559204, + "logits/rejected": -3.1208102703094482, + "logps/chosen": -270.0322265625, + "logps/rejected": -440.63787841796875, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.159143090248108, + "rewards/margins": 3.438494920730591, + "rewards/rejected": -2.2793517112731934, + "step": 794 + }, + { + "epoch": 1.01, + "learning_rate": 5.134328258569289e-08, + "logits/chosen": -3.213853359222412, + "logits/rejected": -3.0899558067321777, + "logps/chosen": -263.5126037597656, + "logps/rejected": -884.4671630859375, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0471237897872925, + "rewards/margins": 4.9367780685424805, + "rewards/rejected": -3.8896546363830566, + "step": 795 + }, + { + "epoch": 1.01, + "learning_rate": 5.123997522742151e-08, + "logits/chosen": -3.1879892349243164, + "logits/rejected": -3.107243299484253, + "logps/chosen": -300.166259765625, + "logps/rejected": -538.9993896484375, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2455368041992188, + "rewards/margins": 3.962550401687622, + "rewards/rejected": -2.7170135974884033, + "step": 796 + }, + { + "epoch": 1.02, + "learning_rate": 5.1136662572205605e-08, + "logits/chosen": -3.290238857269287, + "logits/rejected": -3.103997230529785, + "logps/chosen": -278.3192443847656, + "logps/rejected": -300.8626708984375, + "loss": 0.3295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1794875860214233, + "rewards/margins": 2.5154924392700195, + "rewards/rejected": -1.3360047340393066, + "step": 797 + }, + { + "epoch": 1.02, + "learning_rate": 5.103334506137772e-08, + "logits/chosen": -3.258148431777954, + "logits/rejected": -2.963809013366699, + "logps/chosen": -245.9380645751953, + "logps/rejected": -882.457763671875, + "loss": 0.2804, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9962555170059204, + "rewards/margins": 3.793072462081909, + "rewards/rejected": -2.7968170642852783, + "step": 798 + }, + { + "epoch": 1.02, + "learning_rate": 5.093002313629111e-08, + "logits/chosen": -3.2030954360961914, + "logits/rejected": -3.1464896202087402, + "logps/chosen": -284.3785095214844, + "logps/rejected": -961.203369140625, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.352393388748169, + "rewards/margins": 6.699155807495117, + "rewards/rejected": -5.346762180328369, + "step": 799 + }, + { + "epoch": 1.02, + "learning_rate": 5.082669723831793e-08, + "logits/chosen": -3.2767841815948486, + "logits/rejected": -3.068635940551758, + "logps/chosen": -284.2648010253906, + "logps/rejected": -688.26708984375, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0303558111190796, + "rewards/margins": 4.272671699523926, + "rewards/rejected": -3.2423157691955566, + "step": 800 + }, + { + "epoch": 1.02, + "learning_rate": 5.072336780884724e-08, + "logits/chosen": -3.190828323364258, + "logits/rejected": -3.1363277435302734, + "logps/chosen": -242.91241455078125, + "logps/rejected": -2844.917724609375, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.298529863357544, + "rewards/margins": 14.296154975891113, + "rewards/rejected": -12.997625350952148, + "step": 801 + }, + { + "epoch": 1.02, + "learning_rate": 5.062003528928327e-08, + "logits/chosen": -3.2947030067443848, + "logits/rejected": -3.168087959289551, + "logps/chosen": -219.31307983398438, + "logps/rejected": -956.4599609375, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0647385120391846, + "rewards/margins": 5.89579439163208, + "rewards/rejected": -4.831056118011475, + "step": 802 + }, + { + "epoch": 1.02, + "learning_rate": 5.051670012104336e-08, + "logits/chosen": -3.2605156898498535, + "logits/rejected": -3.116476535797119, + "logps/chosen": -268.9693603515625, + "logps/rejected": -1185.8974609375, + "loss": 0.2821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0726898908615112, + "rewards/margins": 6.64298152923584, + "rewards/rejected": -5.570291519165039, + "step": 803 + }, + { + "epoch": 1.02, + "learning_rate": 5.041336274555624e-08, + "logits/chosen": -3.2397236824035645, + "logits/rejected": -3.1023037433624268, + "logps/chosen": -284.0802001953125, + "logps/rejected": -456.5216369628906, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1107085943222046, + "rewards/margins": 2.8825926780700684, + "rewards/rejected": -1.7718842029571533, + "step": 804 + }, + { + "epoch": 1.03, + "learning_rate": 5.031002360426003e-08, + "logits/chosen": -3.161473274230957, + "logits/rejected": -3.1458754539489746, + "logps/chosen": -278.92559814453125, + "logps/rejected": -512.2708740234375, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0805480480194092, + "rewards/margins": 3.434692621231079, + "rewards/rejected": -2.354144334793091, + "step": 805 + }, + { + "epoch": 1.03, + "learning_rate": 5.0206683138600414e-08, + "logits/chosen": -3.314995288848877, + "logits/rejected": -3.1785640716552734, + "logps/chosen": -239.920654296875, + "logps/rejected": -711.9462890625, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1070587635040283, + "rewards/margins": 4.936486721038818, + "rewards/rejected": -3.829428195953369, + "step": 806 + }, + { + "epoch": 1.03, + "learning_rate": 5.01033417900287e-08, + "logits/chosen": -3.2205843925476074, + "logits/rejected": -3.167642831802368, + "logps/chosen": -265.5823974609375, + "logps/rejected": -618.7151489257812, + "loss": 0.319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8665786981582642, + "rewards/margins": 4.53282356262207, + "rewards/rejected": -3.6662447452545166, + "step": 807 + }, + { + "epoch": 1.03, + "learning_rate": 5e-08, + "logits/chosen": -3.1861963272094727, + "logits/rejected": -3.0812597274780273, + "logps/chosen": -262.11468505859375, + "logps/rejected": -298.84423828125, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0000076293945312, + "rewards/margins": 2.1276352405548096, + "rewards/rejected": -1.1276276111602783, + "step": 808 + }, + { + "epoch": 1.03, + "learning_rate": 4.9896658209971305e-08, + "logits/chosen": -3.255120277404785, + "logits/rejected": -3.104529857635498, + "logps/chosen": -241.1286163330078, + "logps/rejected": -743.8302001953125, + "loss": 0.2656, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2811737060546875, + "rewards/margins": 4.973828315734863, + "rewards/rejected": -3.692654609680176, + "step": 809 + }, + { + "epoch": 1.03, + "learning_rate": 4.9793316861399595e-08, + "logits/chosen": -3.222440242767334, + "logits/rejected": -3.132540702819824, + "logps/chosen": -242.8356170654297, + "logps/rejected": -1273.06982421875, + "loss": 0.2638, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2746597528457642, + "rewards/margins": 6.922816753387451, + "rewards/rejected": -5.648157119750977, + "step": 810 + }, + { + "epoch": 1.03, + "learning_rate": 4.9689976395739956e-08, + "logits/chosen": -3.2859725952148438, + "logits/rejected": -3.1218438148498535, + "logps/chosen": -251.31277465820312, + "logps/rejected": -3209.266845703125, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4042030572891235, + "rewards/margins": 7.089252471923828, + "rewards/rejected": -5.685049533843994, + "step": 811 + }, + { + "epoch": 1.04, + "learning_rate": 4.9586637254443753e-08, + "logits/chosen": -3.204268455505371, + "logits/rejected": -2.9885525703430176, + "logps/chosen": -265.3177490234375, + "logps/rejected": -946.2429809570312, + "loss": 0.2931, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1888405084609985, + "rewards/margins": 5.104746341705322, + "rewards/rejected": -3.9159059524536133, + "step": 812 + }, + { + "epoch": 1.04, + "learning_rate": 4.948329987895664e-08, + "logits/chosen": -3.3094635009765625, + "logits/rejected": -3.2334585189819336, + "logps/chosen": -286.28509521484375, + "logps/rejected": -536.7052001953125, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1541054248809814, + "rewards/margins": 3.873986005783081, + "rewards/rejected": -2.7198805809020996, + "step": 813 + }, + { + "epoch": 1.04, + "learning_rate": 4.937996471071675e-08, + "logits/chosen": -3.209049940109253, + "logits/rejected": -3.1240899562835693, + "logps/chosen": -244.0810546875, + "logps/rejected": -448.54925537109375, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1884422302246094, + "rewards/margins": 3.1771187782287598, + "rewards/rejected": -1.9886765480041504, + "step": 814 + }, + { + "epoch": 1.04, + "learning_rate": 4.927663219115275e-08, + "logits/chosen": -3.2953085899353027, + "logits/rejected": -3.184924840927124, + "logps/chosen": -237.09629821777344, + "logps/rejected": -781.935791015625, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9276611804962158, + "rewards/margins": 5.319277763366699, + "rewards/rejected": -4.3916168212890625, + "step": 815 + }, + { + "epoch": 1.04, + "learning_rate": 4.917330276168207e-08, + "logits/chosen": -3.25240421295166, + "logits/rejected": -3.0832886695861816, + "logps/chosen": -248.13311767578125, + "logps/rejected": -1227.447998046875, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.161299228668213, + "rewards/margins": 6.457472324371338, + "rewards/rejected": -5.296173095703125, + "step": 816 + }, + { + "epoch": 1.04, + "learning_rate": 4.906997686370889e-08, + "logits/chosen": -3.2380244731903076, + "logits/rejected": -3.0799074172973633, + "logps/chosen": -274.8534851074219, + "logps/rejected": -569.546630859375, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4370598793029785, + "rewards/margins": 3.6226327419281006, + "rewards/rejected": -2.185572862625122, + "step": 817 + }, + { + "epoch": 1.04, + "learning_rate": 4.8966654938622295e-08, + "logits/chosen": -3.2742061614990234, + "logits/rejected": -3.103954792022705, + "logps/chosen": -246.8990020751953, + "logps/rejected": -533.184814453125, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3465194702148438, + "rewards/margins": 4.139093399047852, + "rewards/rejected": -2.7925734519958496, + "step": 818 + }, + { + "epoch": 1.04, + "learning_rate": 4.886333742779439e-08, + "logits/chosen": -3.2221736907958984, + "logits/rejected": -3.146641731262207, + "logps/chosen": -267.3282470703125, + "logps/rejected": -314.95159912109375, + "loss": 0.2925, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.218927025794983, + "rewards/margins": 2.947808265686035, + "rewards/rejected": -1.7288811206817627, + "step": 819 + }, + { + "epoch": 1.05, + "learning_rate": 4.8760024772578495e-08, + "logits/chosen": -3.183626890182495, + "logits/rejected": -3.0663745403289795, + "logps/chosen": -240.84555053710938, + "logps/rejected": -1354.427978515625, + "loss": 0.2733, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1721657514572144, + "rewards/margins": 6.81851863861084, + "rewards/rejected": -5.646353244781494, + "step": 820 + }, + { + "epoch": 1.05, + "learning_rate": 4.865671741430711e-08, + "logits/chosen": -3.2556447982788086, + "logits/rejected": -3.125800371170044, + "logps/chosen": -265.446533203125, + "logps/rejected": -475.583251953125, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0309005975723267, + "rewards/margins": 3.4439454078674316, + "rewards/rejected": -2.4130449295043945, + "step": 821 + }, + { + "epoch": 1.05, + "learning_rate": 4.855341579429014e-08, + "logits/chosen": -3.2599477767944336, + "logits/rejected": -3.070375442504883, + "logps/chosen": -279.4562072753906, + "logps/rejected": -575.668701171875, + "loss": 0.3135, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2639023065567017, + "rewards/margins": 3.899231195449829, + "rewards/rejected": -2.635328769683838, + "step": 822 + }, + { + "epoch": 1.05, + "learning_rate": 4.845012035381298e-08, + "logits/chosen": -3.2683095932006836, + "logits/rejected": -3.102287769317627, + "logps/chosen": -255.2694091796875, + "logps/rejected": -662.21533203125, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.971448540687561, + "rewards/margins": 3.8342676162719727, + "rewards/rejected": -2.862818956375122, + "step": 823 + }, + { + "epoch": 1.05, + "learning_rate": 4.834683153413459e-08, + "logits/chosen": -3.232041835784912, + "logits/rejected": -3.215564250946045, + "logps/chosen": -253.16204833984375, + "logps/rejected": -661.39501953125, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1983017921447754, + "rewards/margins": 4.643229961395264, + "rewards/rejected": -3.4449281692504883, + "step": 824 + }, + { + "epoch": 1.05, + "learning_rate": 4.8243549776485714e-08, + "logits/chosen": -3.2736215591430664, + "logits/rejected": -3.1194405555725098, + "logps/chosen": -269.4471130371094, + "logps/rejected": -563.33740234375, + "loss": 0.3263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3500069379806519, + "rewards/margins": 4.075613975524902, + "rewards/rejected": -2.725607395172119, + "step": 825 + }, + { + "epoch": 1.05, + "learning_rate": 4.814027552206689e-08, + "logits/chosen": -3.2266666889190674, + "logits/rejected": -3.102018356323242, + "logps/chosen": -278.1918029785156, + "logps/rejected": -585.080322265625, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2663085460662842, + "rewards/margins": 4.5146942138671875, + "rewards/rejected": -3.2483856678009033, + "step": 826 + }, + { + "epoch": 1.05, + "learning_rate": 4.803700921204658e-08, + "logits/chosen": -3.2636210918426514, + "logits/rejected": -3.0324933528900146, + "logps/chosen": -232.48178100585938, + "logps/rejected": -990.2718505859375, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2100517749786377, + "rewards/margins": 5.369668006896973, + "rewards/rejected": -4.159615993499756, + "step": 827 + }, + { + "epoch": 1.06, + "learning_rate": 4.793375128755933e-08, + "logits/chosen": -3.2975592613220215, + "logits/rejected": -3.1014211177825928, + "logps/chosen": -245.6276397705078, + "logps/rejected": -535.621826171875, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0430206060409546, + "rewards/margins": 3.9023361206054688, + "rewards/rejected": -2.8593156337738037, + "step": 828 + }, + { + "epoch": 1.06, + "learning_rate": 4.7830502189703914e-08, + "logits/chosen": -3.145582675933838, + "logits/rejected": -3.119906425476074, + "logps/chosen": -268.751708984375, + "logps/rejected": -964.9918212890625, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1772735118865967, + "rewards/margins": 6.446011543273926, + "rewards/rejected": -5.26873779296875, + "step": 829 + }, + { + "epoch": 1.06, + "learning_rate": 4.7727262359541324e-08, + "logits/chosen": -3.174508571624756, + "logits/rejected": -3.0374739170074463, + "logps/chosen": -269.41912841796875, + "logps/rejected": -811.3302612304688, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.103609561920166, + "rewards/margins": 4.894234657287598, + "rewards/rejected": -3.7906250953674316, + "step": 830 + }, + { + "epoch": 1.06, + "learning_rate": 4.7624032238093e-08, + "logits/chosen": -3.217716693878174, + "logits/rejected": -3.0556278228759766, + "logps/chosen": -242.52774047851562, + "logps/rejected": -1626.892822265625, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1541335582733154, + "rewards/margins": 7.1442365646362305, + "rewards/rejected": -5.990103244781494, + "step": 831 + }, + { + "epoch": 1.06, + "learning_rate": 4.7520812266338875e-08, + "logits/chosen": -3.207028865814209, + "logits/rejected": -3.1220595836639404, + "logps/chosen": -243.49989318847656, + "logps/rejected": -486.12066650390625, + "loss": 0.2714, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0802704095840454, + "rewards/margins": 3.9954833984375, + "rewards/rejected": -2.915213108062744, + "step": 832 + }, + { + "epoch": 1.06, + "learning_rate": 4.7417602885215605e-08, + "logits/chosen": -3.2312049865722656, + "logits/rejected": -3.1186654567718506, + "logps/chosen": -279.5676574707031, + "logps/rejected": -426.0090637207031, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.149684190750122, + "rewards/margins": 3.7003021240234375, + "rewards/rejected": -2.5506181716918945, + "step": 833 + }, + { + "epoch": 1.06, + "learning_rate": 4.7314404535614514e-08, + "logits/chosen": -3.245495080947876, + "logits/rejected": -3.179534435272217, + "logps/chosen": -235.09716796875, + "logps/rejected": -585.3907470703125, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1903190612792969, + "rewards/margins": 4.18279504776001, + "rewards/rejected": -2.992475986480713, + "step": 834 + }, + { + "epoch": 1.06, + "learning_rate": 4.7211217658379835e-08, + "logits/chosen": -3.24485445022583, + "logits/rejected": -3.196881055831909, + "logps/chosen": -284.35601806640625, + "logps/rejected": -722.3223876953125, + "loss": 0.3396, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.261234998703003, + "rewards/margins": 5.031806945800781, + "rewards/rejected": -3.7705719470977783, + "step": 835 + }, + { + "epoch": 1.07, + "learning_rate": 4.7108042694306806e-08, + "logits/chosen": -3.2415177822113037, + "logits/rejected": -3.115119695663452, + "logps/chosen": -267.8130798339844, + "logps/rejected": -1052.93115234375, + "loss": 0.297, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1605926752090454, + "rewards/margins": 6.402127265930176, + "rewards/rejected": -5.24153470993042, + "step": 836 + }, + { + "epoch": 1.07, + "learning_rate": 4.700488008413975e-08, + "logits/chosen": -3.232219934463501, + "logits/rejected": -3.172532081604004, + "logps/chosen": -256.5403747558594, + "logps/rejected": -459.549560546875, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3259377479553223, + "rewards/margins": 3.5497446060180664, + "rewards/rejected": -2.223806858062744, + "step": 837 + }, + { + "epoch": 1.07, + "learning_rate": 4.690173026857027e-08, + "logits/chosen": -3.2316734790802, + "logits/rejected": -3.183192253112793, + "logps/chosen": -240.5075225830078, + "logps/rejected": -841.7562255859375, + "loss": 0.2783, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9623031616210938, + "rewards/margins": 5.877863883972168, + "rewards/rejected": -4.915560722351074, + "step": 838 + }, + { + "epoch": 1.07, + "learning_rate": 4.679859368823524e-08, + "logits/chosen": -3.1994919776916504, + "logits/rejected": -3.091930866241455, + "logps/chosen": -282.50665283203125, + "logps/rejected": -1225.72021484375, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2188630104064941, + "rewards/margins": 5.814361572265625, + "rewards/rejected": -4.595499038696289, + "step": 839 + }, + { + "epoch": 1.07, + "learning_rate": 4.669547078371503e-08, + "logits/chosen": -3.217411518096924, + "logits/rejected": -3.1209921836853027, + "logps/chosen": -291.68572998046875, + "logps/rejected": -596.6261596679688, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.178501844406128, + "rewards/margins": 3.66558837890625, + "rewards/rejected": -2.487086534500122, + "step": 840 + }, + { + "epoch": 1.07, + "learning_rate": 4.659236199553159e-08, + "logits/chosen": -3.284074306488037, + "logits/rejected": -3.053302526473999, + "logps/chosen": -289.78594970703125, + "logps/rejected": -808.2755126953125, + "loss": 0.2914, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2250969409942627, + "rewards/margins": 4.813021183013916, + "rewards/rejected": -3.5879244804382324, + "step": 841 + }, + { + "epoch": 1.07, + "learning_rate": 4.648926776414659e-08, + "logits/chosen": -3.253066062927246, + "logits/rejected": -3.016396999359131, + "logps/chosen": -234.7766876220703, + "logps/rejected": -641.1756591796875, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1082611083984375, + "rewards/margins": 3.572161912918091, + "rewards/rejected": -2.4639008045196533, + "step": 842 + }, + { + "epoch": 1.07, + "learning_rate": 4.638618852995947e-08, + "logits/chosen": -3.241419792175293, + "logits/rejected": -3.1344051361083984, + "logps/chosen": -275.9601745605469, + "logps/rejected": -642.995849609375, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9675544500350952, + "rewards/margins": 4.354236602783203, + "rewards/rejected": -3.3866822719573975, + "step": 843 + }, + { + "epoch": 1.08, + "learning_rate": 4.6283124733305623e-08, + "logits/chosen": -3.2716822624206543, + "logits/rejected": -3.0817174911499023, + "logps/chosen": -248.4237823486328, + "logps/rejected": -445.8251953125, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3659896850585938, + "rewards/margins": 3.5918173789978027, + "rewards/rejected": -2.225827932357788, + "step": 844 + }, + { + "epoch": 1.08, + "learning_rate": 4.6180076814454503e-08, + "logits/chosen": -3.2824811935424805, + "logits/rejected": -3.081486225128174, + "logps/chosen": -249.58627319335938, + "logps/rejected": -492.2528381347656, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0720497369766235, + "rewards/margins": 2.915280818939209, + "rewards/rejected": -1.843231201171875, + "step": 845 + }, + { + "epoch": 1.08, + "learning_rate": 4.6077045213607754e-08, + "logits/chosen": -3.182791233062744, + "logits/rejected": -3.0919504165649414, + "logps/chosen": -241.0277099609375, + "logps/rejected": -554.9336547851562, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.071192979812622, + "rewards/margins": 3.54787278175354, + "rewards/rejected": -2.476680040359497, + "step": 846 + }, + { + "epoch": 1.08, + "learning_rate": 4.597403037089728e-08, + "logits/chosen": -3.2244625091552734, + "logits/rejected": -3.1161131858825684, + "logps/chosen": -284.856689453125, + "logps/rejected": -1265.073974609375, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4357315301895142, + "rewards/margins": 8.538087844848633, + "rewards/rejected": -7.10235595703125, + "step": 847 + }, + { + "epoch": 1.08, + "learning_rate": 4.5871032726383385e-08, + "logits/chosen": -3.208662748336792, + "logits/rejected": -3.11830735206604, + "logps/chosen": -265.0048828125, + "logps/rejected": -615.3741455078125, + "loss": 0.3148, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1529998779296875, + "rewards/margins": 4.052212715148926, + "rewards/rejected": -2.8992128372192383, + "step": 848 + }, + { + "epoch": 1.08, + "learning_rate": 4.576805272005297e-08, + "logits/chosen": -3.2324488162994385, + "logits/rejected": -3.1284897327423096, + "logps/chosen": -258.16656494140625, + "logps/rejected": -692.74951171875, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.788703203201294, + "rewards/margins": 4.766141891479492, + "rewards/rejected": -3.977438449859619, + "step": 849 + }, + { + "epoch": 1.08, + "learning_rate": 4.566509079181751e-08, + "logits/chosen": -3.2321293354034424, + "logits/rejected": -3.120149612426758, + "logps/chosen": -271.86798095703125, + "logps/rejected": -761.6764526367188, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2466933727264404, + "rewards/margins": 5.891875267028809, + "rewards/rejected": -4.645181179046631, + "step": 850 + }, + { + "epoch": 1.08, + "learning_rate": 4.5562147381511334e-08, + "logits/chosen": -3.2209229469299316, + "logits/rejected": -3.1109180450439453, + "logps/chosen": -237.1632537841797, + "logps/rejected": -1543.143310546875, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1522598266601562, + "rewards/margins": 8.420106887817383, + "rewards/rejected": -7.267847061157227, + "step": 851 + }, + { + "epoch": 1.09, + "learning_rate": 4.5459222928889584e-08, + "logits/chosen": -3.2282485961914062, + "logits/rejected": -3.2056877613067627, + "logps/chosen": -243.94676208496094, + "logps/rejected": -709.369873046875, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.130354404449463, + "rewards/margins": 5.274177551269531, + "rewards/rejected": -4.143823146820068, + "step": 852 + }, + { + "epoch": 1.09, + "learning_rate": 4.53563178736265e-08, + "logits/chosen": -3.2141857147216797, + "logits/rejected": -3.149350643157959, + "logps/chosen": -275.2303466796875, + "logps/rejected": -360.8993835449219, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2742691040039062, + "rewards/margins": 3.062985420227051, + "rewards/rejected": -1.788716197013855, + "step": 853 + }, + { + "epoch": 1.09, + "learning_rate": 4.525343265531338e-08, + "logits/chosen": -3.2330708503723145, + "logits/rejected": -3.1819968223571777, + "logps/chosen": -275.84320068359375, + "logps/rejected": -611.7193603515625, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1941285133361816, + "rewards/margins": 4.409221172332764, + "rewards/rejected": -3.215092658996582, + "step": 854 + }, + { + "epoch": 1.09, + "learning_rate": 4.515056771345685e-08, + "logits/chosen": -3.2986040115356445, + "logits/rejected": -3.2020015716552734, + "logps/chosen": -224.48777770996094, + "logps/rejected": -622.6881103515625, + "loss": 0.2828, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1777374744415283, + "rewards/margins": 4.311984062194824, + "rewards/rejected": -3.134246826171875, + "step": 855 + }, + { + "epoch": 1.09, + "learning_rate": 4.504772348747686e-08, + "logits/chosen": -3.2165913581848145, + "logits/rejected": -3.1277990341186523, + "logps/chosen": -263.0519714355469, + "logps/rejected": -912.3709106445312, + "loss": 0.2829, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2715133428573608, + "rewards/margins": 6.275120735168457, + "rewards/rejected": -5.003607273101807, + "step": 856 + }, + { + "epoch": 1.09, + "learning_rate": 4.494490041670494e-08, + "logits/chosen": -3.25146746635437, + "logits/rejected": -3.046191692352295, + "logps/chosen": -275.56072998046875, + "logps/rejected": -307.853271484375, + "loss": 0.3345, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2493133544921875, + "rewards/margins": 2.6494805812835693, + "rewards/rejected": -1.4001671075820923, + "step": 857 + }, + { + "epoch": 1.09, + "learning_rate": 4.484209894038215e-08, + "logits/chosen": -3.2348668575286865, + "logits/rejected": -3.149772882461548, + "logps/chosen": -277.0849304199219, + "logps/rejected": -713.1826782226562, + "loss": 0.3074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4619522094726562, + "rewards/margins": 5.021318435668945, + "rewards/rejected": -3.55936598777771, + "step": 858 + }, + { + "epoch": 1.09, + "learning_rate": 4.473931949765741e-08, + "logits/chosen": -3.251371383666992, + "logits/rejected": -3.214062213897705, + "logps/chosen": -250.93719482421875, + "logps/rejected": -1203.3834228515625, + "loss": 0.2585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0487968921661377, + "rewards/margins": 6.957060813903809, + "rewards/rejected": -5.90826416015625, + "step": 859 + }, + { + "epoch": 1.1, + "learning_rate": 4.463656252758542e-08, + "logits/chosen": -3.1840014457702637, + "logits/rejected": -3.0877695083618164, + "logps/chosen": -245.40301513671875, + "logps/rejected": -603.5806884765625, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.062950849533081, + "rewards/margins": 3.879192352294922, + "rewards/rejected": -2.816241502761841, + "step": 860 + }, + { + "epoch": 1.1, + "learning_rate": 4.453382846912495e-08, + "logits/chosen": -3.2457022666931152, + "logits/rejected": -3.0526256561279297, + "logps/chosen": -274.2144470214844, + "logps/rejected": -1807.881591796875, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.237095594406128, + "rewards/margins": 8.710832595825195, + "rewards/rejected": -7.473736763000488, + "step": 861 + }, + { + "epoch": 1.1, + "learning_rate": 4.443111776113686e-08, + "logits/chosen": -3.2644691467285156, + "logits/rejected": -3.2062840461730957, + "logps/chosen": -255.95472717285156, + "logps/rejected": -683.6300048828125, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2315078973770142, + "rewards/margins": 4.943109512329102, + "rewards/rejected": -3.7116012573242188, + "step": 862 + }, + { + "epoch": 1.1, + "learning_rate": 4.432843084238228e-08, + "logits/chosen": -3.2217183113098145, + "logits/rejected": -3.1132946014404297, + "logps/chosen": -277.56524658203125, + "logps/rejected": -621.31591796875, + "loss": 0.2721, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1883468627929688, + "rewards/margins": 4.453465461730957, + "rewards/rejected": -3.265118360519409, + "step": 863 + }, + { + "epoch": 1.1, + "learning_rate": 4.4225768151520695e-08, + "logits/chosen": -3.2663233280181885, + "logits/rejected": -3.2441916465759277, + "logps/chosen": -300.42108154296875, + "logps/rejected": -715.6365966796875, + "loss": 0.3094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4342743158340454, + "rewards/margins": 5.92034912109375, + "rewards/rejected": -4.486074924468994, + "step": 864 + }, + { + "epoch": 1.1, + "learning_rate": 4.412313012710812e-08, + "logits/chosen": -3.245445489883423, + "logits/rejected": -3.101686716079712, + "logps/chosen": -267.44561767578125, + "logps/rejected": -418.2472229003906, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3479598760604858, + "rewards/margins": 3.3426315784454346, + "rewards/rejected": -1.9946717023849487, + "step": 865 + }, + { + "epoch": 1.1, + "learning_rate": 4.402051720759518e-08, + "logits/chosen": -3.236477851867676, + "logits/rejected": -3.0823841094970703, + "logps/chosen": -267.8712158203125, + "logps/rejected": -831.1948852539062, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.290632724761963, + "rewards/margins": 5.304084777832031, + "rewards/rejected": -4.013452529907227, + "step": 866 + }, + { + "epoch": 1.11, + "learning_rate": 4.391792983132525e-08, + "logits/chosen": -3.2179741859436035, + "logits/rejected": -3.147470235824585, + "logps/chosen": -284.93603515625, + "logps/rejected": -931.8097534179688, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.428486704826355, + "rewards/margins": 5.6796555519104, + "rewards/rejected": -4.251168727874756, + "step": 867 + }, + { + "epoch": 1.11, + "learning_rate": 4.3815368436532614e-08, + "logits/chosen": -3.2729177474975586, + "logits/rejected": -3.21144437789917, + "logps/chosen": -259.282958984375, + "logps/rejected": -505.36407470703125, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2040725946426392, + "rewards/margins": 3.970916748046875, + "rewards/rejected": -2.7668442726135254, + "step": 868 + }, + { + "epoch": 1.11, + "learning_rate": 4.371283346134056e-08, + "logits/chosen": -3.2437925338745117, + "logits/rejected": -3.125124931335449, + "logps/chosen": -261.47161865234375, + "logps/rejected": -632.0044555664062, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1210434436798096, + "rewards/margins": 4.504167556762695, + "rewards/rejected": -3.3831238746643066, + "step": 869 + }, + { + "epoch": 1.11, + "learning_rate": 4.361032534375951e-08, + "logits/chosen": -3.2548208236694336, + "logits/rejected": -3.0169973373413086, + "logps/chosen": -287.41571044921875, + "logps/rejected": -285.0220947265625, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4619140625, + "rewards/margins": 3.1268372535705566, + "rewards/rejected": -1.664923071861267, + "step": 870 + }, + { + "epoch": 1.11, + "learning_rate": 4.350784452168514e-08, + "logits/chosen": -3.2768421173095703, + "logits/rejected": -3.1523754596710205, + "logps/chosen": -278.6858215332031, + "logps/rejected": -749.6019897460938, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1737732887268066, + "rewards/margins": 5.577963352203369, + "rewards/rejected": -4.4041900634765625, + "step": 871 + }, + { + "epoch": 1.11, + "learning_rate": 4.340539143289655e-08, + "logits/chosen": -3.190260410308838, + "logits/rejected": -3.099597692489624, + "logps/chosen": -281.8913879394531, + "logps/rejected": -440.62738037109375, + "loss": 0.3107, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1712448596954346, + "rewards/margins": 3.3159830570220947, + "rewards/rejected": -2.14473819732666, + "step": 872 + }, + { + "epoch": 1.11, + "learning_rate": 4.3302966515054374e-08, + "logits/chosen": -3.2565841674804688, + "logits/rejected": -3.1340794563293457, + "logps/chosen": -282.8582763671875, + "logps/rejected": -902.6724243164062, + "loss": 0.305, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1523605585098267, + "rewards/margins": 5.52084493637085, + "rewards/rejected": -4.3684844970703125, + "step": 873 + }, + { + "epoch": 1.11, + "learning_rate": 4.320057020569888e-08, + "logits/chosen": -3.2699387073516846, + "logits/rejected": -3.122677803039551, + "logps/chosen": -262.8391418457031, + "logps/rejected": -1092.7222900390625, + "loss": 0.2965, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0246765613555908, + "rewards/margins": 6.034731864929199, + "rewards/rejected": -5.0100555419921875, + "step": 874 + }, + { + "epoch": 1.12, + "learning_rate": 4.309820294224812e-08, + "logits/chosen": -3.234910488128662, + "logits/rejected": -3.052011251449585, + "logps/chosen": -251.82492065429688, + "logps/rejected": -290.25390625, + "loss": 0.3033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.183556318283081, + "rewards/margins": 2.783989667892456, + "rewards/rejected": -1.600433349609375, + "step": 875 + }, + { + "epoch": 1.12, + "learning_rate": 4.29958651619961e-08, + "logits/chosen": -3.185305595397949, + "logits/rejected": -3.136566638946533, + "logps/chosen": -302.03082275390625, + "logps/rejected": -895.8548583984375, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.226861596107483, + "rewards/margins": 6.154000759124756, + "rewards/rejected": -4.9271392822265625, + "step": 876 + }, + { + "epoch": 1.12, + "learning_rate": 4.289355730211088e-08, + "logits/chosen": -3.2407593727111816, + "logits/rejected": -3.1343259811401367, + "logps/chosen": -250.22308349609375, + "logps/rejected": -548.7429809570312, + "loss": 0.3046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9668365716934204, + "rewards/margins": 3.6717257499694824, + "rewards/rejected": -2.7048890590667725, + "step": 877 + }, + { + "epoch": 1.12, + "learning_rate": 4.279127979963266e-08, + "logits/chosen": -3.2110209465026855, + "logits/rejected": -3.144874334335327, + "logps/chosen": -247.8845672607422, + "logps/rejected": -696.3213500976562, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2318840026855469, + "rewards/margins": 5.201404571533203, + "rewards/rejected": -3.9695205688476562, + "step": 878 + }, + { + "epoch": 1.12, + "learning_rate": 4.268903309147201e-08, + "logits/chosen": -3.192817211151123, + "logits/rejected": -3.132622241973877, + "logps/chosen": -284.150390625, + "logps/rejected": -1340.743408203125, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.291133165359497, + "rewards/margins": 7.992231845855713, + "rewards/rejected": -6.701098442077637, + "step": 879 + }, + { + "epoch": 1.12, + "learning_rate": 4.2586817614407896e-08, + "logits/chosen": -3.2273366451263428, + "logits/rejected": -3.2037434577941895, + "logps/chosen": -241.6009521484375, + "logps/rejected": -726.7899169921875, + "loss": 0.3082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.211541771888733, + "rewards/margins": 5.203585624694824, + "rewards/rejected": -3.992043972015381, + "step": 880 + }, + { + "epoch": 1.12, + "learning_rate": 4.248463380508595e-08, + "logits/chosen": -3.24938702583313, + "logits/rejected": -3.187814235687256, + "logps/chosen": -257.455078125, + "logps/rejected": -1004.388916015625, + "loss": 0.309, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4169647693634033, + "rewards/margins": 7.304934501647949, + "rewards/rejected": -5.887969970703125, + "step": 881 + }, + { + "epoch": 1.12, + "learning_rate": 4.238248210001645e-08, + "logits/chosen": -3.232820749282837, + "logits/rejected": -3.100131034851074, + "logps/chosen": -270.66473388671875, + "logps/rejected": -388.4500732421875, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2429970502853394, + "rewards/margins": 3.2991387844085693, + "rewards/rejected": -2.0561416149139404, + "step": 882 + }, + { + "epoch": 1.13, + "learning_rate": 4.2280362935572575e-08, + "logits/chosen": -3.253898859024048, + "logits/rejected": -3.066509485244751, + "logps/chosen": -274.40972900390625, + "logps/rejected": -552.1100463867188, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.093721866607666, + "rewards/margins": 3.944171905517578, + "rewards/rejected": -2.850450038909912, + "step": 883 + }, + { + "epoch": 1.13, + "learning_rate": 4.217827674798844e-08, + "logits/chosen": -3.1411068439483643, + "logits/rejected": -3.0089430809020996, + "logps/chosen": -286.3582763671875, + "logps/rejected": -406.9273986816406, + "loss": 0.299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.289088487625122, + "rewards/margins": 2.923609972000122, + "rewards/rejected": -1.634521484375, + "step": 884 + }, + { + "epoch": 1.13, + "learning_rate": 4.2076223973357403e-08, + "logits/chosen": -3.273669719696045, + "logits/rejected": -3.130819320678711, + "logps/chosen": -267.99749755859375, + "logps/rejected": -819.5901489257812, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1760444641113281, + "rewards/margins": 5.330554962158203, + "rewards/rejected": -4.154510498046875, + "step": 885 + }, + { + "epoch": 1.13, + "learning_rate": 4.197420504762997e-08, + "logits/chosen": -3.2589669227600098, + "logits/rejected": -3.1853675842285156, + "logps/chosen": -253.36248779296875, + "logps/rejected": -597.0970458984375, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.065474033355713, + "rewards/margins": 4.387922763824463, + "rewards/rejected": -3.32244873046875, + "step": 886 + }, + { + "epoch": 1.13, + "learning_rate": 4.1872220406612115e-08, + "logits/chosen": -3.251728057861328, + "logits/rejected": -3.103537082672119, + "logps/chosen": -255.63983154296875, + "logps/rejected": -262.7564697265625, + "loss": 0.3034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.080784559249878, + "rewards/margins": 2.3845038414001465, + "rewards/rejected": -1.3037192821502686, + "step": 887 + }, + { + "epoch": 1.13, + "learning_rate": 4.177027048596329e-08, + "logits/chosen": -3.2363319396972656, + "logits/rejected": -3.069465398788452, + "logps/chosen": -256.46673583984375, + "logps/rejected": -968.2591552734375, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.211220622062683, + "rewards/margins": 5.127758026123047, + "rewards/rejected": -3.9165377616882324, + "step": 888 + }, + { + "epoch": 1.13, + "learning_rate": 4.1668355721194745e-08, + "logits/chosen": -3.145120143890381, + "logits/rejected": -3.089989185333252, + "logps/chosen": -239.8448944091797, + "logps/rejected": -727.9208984375, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0905975103378296, + "rewards/margins": 4.124731540679932, + "rewards/rejected": -3.0341339111328125, + "step": 889 + }, + { + "epoch": 1.13, + "learning_rate": 4.156647654766743e-08, + "logits/chosen": -3.2774147987365723, + "logits/rejected": -3.1343331336975098, + "logps/chosen": -280.00946044921875, + "logps/rejected": -752.0543212890625, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0359092950820923, + "rewards/margins": 4.921364784240723, + "rewards/rejected": -3.88545560836792, + "step": 890 + }, + { + "epoch": 1.14, + "learning_rate": 4.146463340059033e-08, + "logits/chosen": -3.2782163619995117, + "logits/rejected": -3.104036331176758, + "logps/chosen": -257.64593505859375, + "logps/rejected": -405.2252502441406, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1251716613769531, + "rewards/margins": 2.8044652938842773, + "rewards/rejected": -1.6792938709259033, + "step": 891 + }, + { + "epoch": 1.14, + "learning_rate": 4.13628267150185e-08, + "logits/chosen": -3.2705304622650146, + "logits/rejected": -3.2073981761932373, + "logps/chosen": -250.4090576171875, + "logps/rejected": -639.0400390625, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2451492547988892, + "rewards/margins": 4.851602554321289, + "rewards/rejected": -3.6064529418945312, + "step": 892 + }, + { + "epoch": 1.14, + "learning_rate": 4.126105692585121e-08, + "logits/chosen": -3.1881046295166016, + "logits/rejected": -3.1389105319976807, + "logps/chosen": -253.64141845703125, + "logps/rejected": -479.7770080566406, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2535606622695923, + "rewards/margins": 3.7821342945098877, + "rewards/rejected": -2.528573513031006, + "step": 893 + }, + { + "epoch": 1.14, + "learning_rate": 4.1159324467830196e-08, + "logits/chosen": -3.266284465789795, + "logits/rejected": -3.1653194427490234, + "logps/chosen": -269.1548156738281, + "logps/rejected": -670.227294921875, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2253494262695312, + "rewards/margins": 4.7640838623046875, + "rewards/rejected": -3.5387344360351562, + "step": 894 + }, + { + "epoch": 1.14, + "learning_rate": 4.105762977553767e-08, + "logits/chosen": -3.251354217529297, + "logits/rejected": -3.1966419219970703, + "logps/chosen": -253.99441528320312, + "logps/rejected": -668.9312744140625, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3592820167541504, + "rewards/margins": 5.212590217590332, + "rewards/rejected": -3.8533082008361816, + "step": 895 + }, + { + "epoch": 1.14, + "learning_rate": 4.095597328339452e-08, + "logits/chosen": -3.259258270263672, + "logits/rejected": -3.2041473388671875, + "logps/chosen": -270.8636169433594, + "logps/rejected": -602.7384643554688, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2175095081329346, + "rewards/margins": 4.507162570953369, + "rewards/rejected": -3.2896533012390137, + "step": 896 + }, + { + "epoch": 1.14, + "learning_rate": 4.0854355425658415e-08, + "logits/chosen": -3.1949892044067383, + "logits/rejected": -3.1363065242767334, + "logps/chosen": -292.4947204589844, + "logps/rejected": -453.075439453125, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4733796119689941, + "rewards/margins": 4.089914321899414, + "rewards/rejected": -2.616534471511841, + "step": 897 + }, + { + "epoch": 1.14, + "learning_rate": 4.075277663642208e-08, + "logits/chosen": -3.226642608642578, + "logits/rejected": -3.131859302520752, + "logps/chosen": -274.7918701171875, + "logps/rejected": -385.4629211425781, + "loss": 0.2971, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3359512090682983, + "rewards/margins": 3.1986083984375, + "rewards/rejected": -1.8626571893692017, + "step": 898 + }, + { + "epoch": 1.15, + "learning_rate": 4.0651237349611265e-08, + "logits/chosen": -3.184795379638672, + "logits/rejected": -3.0795297622680664, + "logps/chosen": -246.84136962890625, + "logps/rejected": -280.0705871582031, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4971901178359985, + "rewards/margins": 2.6529641151428223, + "rewards/rejected": -1.1557738780975342, + "step": 899 + }, + { + "epoch": 1.15, + "learning_rate": 4.054973799898299e-08, + "logits/chosen": -3.2403883934020996, + "logits/rejected": -3.143545389175415, + "logps/chosen": -249.4680633544922, + "logps/rejected": -476.266357421875, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2345497608184814, + "rewards/margins": 3.845165491104126, + "rewards/rejected": -2.6106157302856445, + "step": 900 + }, + { + "epoch": 1.15, + "learning_rate": 4.044827901812367e-08, + "logits/chosen": -3.260852098464966, + "logits/rejected": -3.166505813598633, + "logps/chosen": -274.767578125, + "logps/rejected": -348.9951171875, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3575515747070312, + "rewards/margins": 3.8418214321136475, + "rewards/rejected": -2.484269857406616, + "step": 901 + }, + { + "epoch": 1.15, + "learning_rate": 4.0346860840447325e-08, + "logits/chosen": -3.265317916870117, + "logits/rejected": -3.0554375648498535, + "logps/chosen": -269.725830078125, + "logps/rejected": -531.5113525390625, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0013443231582642, + "rewards/margins": 3.576443672180176, + "rewards/rejected": -2.575099229812622, + "step": 902 + }, + { + "epoch": 1.15, + "learning_rate": 4.0245483899193586e-08, + "logits/chosen": -3.258678913116455, + "logits/rejected": -3.233088493347168, + "logps/chosen": -288.3923034667969, + "logps/rejected": -561.3702392578125, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3200180530548096, + "rewards/margins": 4.185858249664307, + "rewards/rejected": -2.865840196609497, + "step": 903 + }, + { + "epoch": 1.15, + "learning_rate": 4.014414862742599e-08, + "logits/chosen": -3.2727344036102295, + "logits/rejected": -3.0668234825134277, + "logps/chosen": -257.3120422363281, + "logps/rejected": -1003.91015625, + "loss": 0.3138, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.057691216468811, + "rewards/margins": 5.019312381744385, + "rewards/rejected": -3.961621046066284, + "step": 904 + }, + { + "epoch": 1.15, + "learning_rate": 4.004285545803004e-08, + "logits/chosen": -3.2987220287323, + "logits/rejected": -3.047567844390869, + "logps/chosen": -263.7392883300781, + "logps/rejected": -4239.8681640625, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2070229053497314, + "rewards/margins": 12.313096046447754, + "rewards/rejected": -11.106073379516602, + "step": 905 + }, + { + "epoch": 1.15, + "learning_rate": 3.994160482371138e-08, + "logits/chosen": -3.273637294769287, + "logits/rejected": -3.050373077392578, + "logps/chosen": -243.54913330078125, + "logps/rejected": -447.69573974609375, + "loss": 0.3024, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.419518232345581, + "rewards/margins": 3.7854957580566406, + "rewards/rejected": -2.3659775257110596, + "step": 906 + }, + { + "epoch": 1.16, + "learning_rate": 3.9840397156994e-08, + "logits/chosen": -3.261369466781616, + "logits/rejected": -3.134589195251465, + "logps/chosen": -263.9455261230469, + "logps/rejected": -1087.48291015625, + "loss": 0.2993, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1634575128555298, + "rewards/margins": 5.49941349029541, + "rewards/rejected": -4.33595609664917, + "step": 907 + }, + { + "epoch": 1.16, + "learning_rate": 3.973923289021829e-08, + "logits/chosen": -3.312959671020508, + "logits/rejected": -3.1538338661193848, + "logps/chosen": -239.082275390625, + "logps/rejected": -607.7415771484375, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1040092706680298, + "rewards/margins": 3.9469094276428223, + "rewards/rejected": -2.842900276184082, + "step": 908 + }, + { + "epoch": 1.16, + "learning_rate": 3.9638112455539266e-08, + "logits/chosen": -3.2692060470581055, + "logits/rejected": -3.1261003017425537, + "logps/chosen": -284.29644775390625, + "logps/rejected": -325.70111083984375, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.416750431060791, + "rewards/margins": 3.0884995460510254, + "rewards/rejected": -1.6717491149902344, + "step": 909 + }, + { + "epoch": 1.16, + "learning_rate": 3.953703628492467e-08, + "logits/chosen": -3.2361183166503906, + "logits/rejected": -3.0456719398498535, + "logps/chosen": -238.36553955078125, + "logps/rejected": -478.8368835449219, + "loss": 0.2618, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2393471002578735, + "rewards/margins": 3.4349985122680664, + "rewards/rejected": -2.1956512928009033, + "step": 910 + }, + { + "epoch": 1.16, + "learning_rate": 3.943600481015323e-08, + "logits/chosen": -3.2590174674987793, + "logits/rejected": -3.074842929840088, + "logps/chosen": -296.4798583984375, + "logps/rejected": -1281.3916015625, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.137636661529541, + "rewards/margins": 6.746541976928711, + "rewards/rejected": -5.60890531539917, + "step": 911 + }, + { + "epoch": 1.16, + "learning_rate": 3.933501846281266e-08, + "logits/chosen": -3.2459311485290527, + "logits/rejected": -3.0811033248901367, + "logps/chosen": -227.31976318359375, + "logps/rejected": -513.4785766601562, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2178573608398438, + "rewards/margins": 3.594966411590576, + "rewards/rejected": -2.3771088123321533, + "step": 912 + }, + { + "epoch": 1.16, + "learning_rate": 3.923407767429795e-08, + "logits/chosen": -3.2445292472839355, + "logits/rejected": -3.1278984546661377, + "logps/chosen": -222.40377807617188, + "logps/rejected": -1075.711669921875, + "loss": 0.2639, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.012215495109558, + "rewards/margins": 6.266597747802734, + "rewards/rejected": -5.254382610321045, + "step": 913 + }, + { + "epoch": 1.17, + "learning_rate": 3.913318287580942e-08, + "logits/chosen": -3.2215423583984375, + "logits/rejected": -3.1973159313201904, + "logps/chosen": -249.52890014648438, + "logps/rejected": -779.2432250976562, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.35869300365448, + "rewards/margins": 4.934181213378906, + "rewards/rejected": -3.575488328933716, + "step": 914 + }, + { + "epoch": 1.17, + "learning_rate": 3.9032334498351e-08, + "logits/chosen": -3.2018022537231445, + "logits/rejected": -3.1184403896331787, + "logps/chosen": -304.96368408203125, + "logps/rejected": -1238.034912109375, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2711760997772217, + "rewards/margins": 7.650686740875244, + "rewards/rejected": -6.379510402679443, + "step": 915 + }, + { + "epoch": 1.17, + "learning_rate": 3.893153297272828e-08, + "logits/chosen": -3.196308135986328, + "logits/rejected": -3.13606595993042, + "logps/chosen": -285.8365173339844, + "logps/rejected": -532.33056640625, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0931313037872314, + "rewards/margins": 3.8839774131774902, + "rewards/rejected": -2.790846347808838, + "step": 916 + }, + { + "epoch": 1.17, + "learning_rate": 3.8830778729546686e-08, + "logits/chosen": -3.209958553314209, + "logits/rejected": -3.186302661895752, + "logps/chosen": -304.503662109375, + "logps/rejected": -684.3297119140625, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8515075445175171, + "rewards/margins": 4.157046318054199, + "rewards/rejected": -3.305539131164551, + "step": 917 + }, + { + "epoch": 1.17, + "learning_rate": 3.87300721992097e-08, + "logits/chosen": -3.250992774963379, + "logits/rejected": -3.1471686363220215, + "logps/chosen": -263.2308654785156, + "logps/rejected": -322.86932373046875, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0852088928222656, + "rewards/margins": 2.7589454650878906, + "rewards/rejected": -1.673736572265625, + "step": 918 + }, + { + "epoch": 1.17, + "learning_rate": 3.862941381191693e-08, + "logits/chosen": -3.237645149230957, + "logits/rejected": -3.1589488983154297, + "logps/chosen": -271.2464294433594, + "logps/rejected": -530.1302490234375, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3216866254806519, + "rewards/margins": 4.361504554748535, + "rewards/rejected": -3.0398178100585938, + "step": 919 + }, + { + "epoch": 1.17, + "learning_rate": 3.8528803997662425e-08, + "logits/chosen": -3.2750048637390137, + "logits/rejected": -3.0248141288757324, + "logps/chosen": -257.74517822265625, + "logps/rejected": -928.3980102539062, + "loss": 0.2964, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.279144287109375, + "rewards/margins": 5.339030742645264, + "rewards/rejected": -4.0598859786987305, + "step": 920 + }, + { + "epoch": 1.17, + "learning_rate": 3.842824318623264e-08, + "logits/chosen": -3.22709321975708, + "logits/rejected": -3.127593517303467, + "logps/chosen": -273.4373779296875, + "logps/rejected": -587.4820556640625, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.303643822669983, + "rewards/margins": 4.474539279937744, + "rewards/rejected": -3.1708953380584717, + "step": 921 + }, + { + "epoch": 1.18, + "learning_rate": 3.8327731807204744e-08, + "logits/chosen": -3.272298812866211, + "logits/rejected": -3.068589687347412, + "logps/chosen": -265.4630126953125, + "logps/rejected": -920.6456298828125, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.104353427886963, + "rewards/margins": 6.341835021972656, + "rewards/rejected": -5.237481594085693, + "step": 922 + }, + { + "epoch": 1.18, + "learning_rate": 3.822727028994471e-08, + "logits/chosen": -3.266918420791626, + "logits/rejected": -3.1501848697662354, + "logps/chosen": -303.14068603515625, + "logps/rejected": -638.9306640625, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2818603515625, + "rewards/margins": 4.240076065063477, + "rewards/rejected": -2.9582152366638184, + "step": 923 + }, + { + "epoch": 1.18, + "learning_rate": 3.812685906360557e-08, + "logits/chosen": -3.182182788848877, + "logits/rejected": -3.1496422290802, + "logps/chosen": -298.81890869140625, + "logps/rejected": -1323.268798828125, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0918289422988892, + "rewards/margins": 8.718925476074219, + "rewards/rejected": -7.627096652984619, + "step": 924 + }, + { + "epoch": 1.18, + "learning_rate": 3.802649855712548e-08, + "logits/chosen": -3.207512378692627, + "logits/rejected": -3.1205596923828125, + "logps/chosen": -278.4842834472656, + "logps/rejected": -831.2266845703125, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9878082275390625, + "rewards/margins": 4.852989196777344, + "rewards/rejected": -3.8651809692382812, + "step": 925 + }, + { + "epoch": 1.18, + "learning_rate": 3.792618919922591e-08, + "logits/chosen": -3.2423620223999023, + "logits/rejected": -3.079629421234131, + "logps/chosen": -294.4984130859375, + "logps/rejected": -559.7957153320312, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2352157831192017, + "rewards/margins": 3.891327142715454, + "rewards/rejected": -2.656111240386963, + "step": 926 + }, + { + "epoch": 1.18, + "learning_rate": 3.782593141840986e-08, + "logits/chosen": -3.2435741424560547, + "logits/rejected": -3.1847760677337646, + "logps/chosen": -230.98086547851562, + "logps/rejected": -470.5499572753906, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.298191785812378, + "rewards/margins": 3.4181625843048096, + "rewards/rejected": -2.1199707984924316, + "step": 927 + }, + { + "epoch": 1.18, + "learning_rate": 3.7725725642960044e-08, + "logits/chosen": -3.2864837646484375, + "logits/rejected": -3.1147799491882324, + "logps/chosen": -263.0453796386719, + "logps/rejected": -263.27032470703125, + "loss": 0.3145, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3312606811523438, + "rewards/margins": 2.940998077392578, + "rewards/rejected": -1.6097373962402344, + "step": 928 + }, + { + "epoch": 1.18, + "learning_rate": 3.7625572300936934e-08, + "logits/chosen": -3.2270541191101074, + "logits/rejected": -3.1299147605895996, + "logps/chosen": -245.57041931152344, + "logps/rejected": -679.1785888671875, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9875023365020752, + "rewards/margins": 4.543395519256592, + "rewards/rejected": -3.5558929443359375, + "step": 929 + }, + { + "epoch": 1.19, + "learning_rate": 3.752547182017708e-08, + "logits/chosen": -3.1747937202453613, + "logits/rejected": -3.0512752532958984, + "logps/chosen": -287.18707275390625, + "logps/rejected": -895.79833984375, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3254210948944092, + "rewards/margins": 5.1080322265625, + "rewards/rejected": -3.78261137008667, + "step": 930 + }, + { + "epoch": 1.19, + "learning_rate": 3.742542462829117e-08, + "logits/chosen": -3.2186498641967773, + "logits/rejected": -3.0860414505004883, + "logps/chosen": -260.9615478515625, + "logps/rejected": -477.6030578613281, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.206322431564331, + "rewards/margins": 3.7470147609710693, + "rewards/rejected": -2.5406923294067383, + "step": 931 + }, + { + "epoch": 1.19, + "learning_rate": 3.7325431152662297e-08, + "logits/chosen": -3.236555576324463, + "logits/rejected": -3.196615695953369, + "logps/chosen": -246.1558837890625, + "logps/rejected": -739.78125, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.349571943283081, + "rewards/margins": 5.913384437561035, + "rewards/rejected": -4.563812255859375, + "step": 932 + }, + { + "epoch": 1.19, + "learning_rate": 3.722549182044407e-08, + "logits/chosen": -3.2805182933807373, + "logits/rejected": -3.177621364593506, + "logps/chosen": -290.8318176269531, + "logps/rejected": -535.178955078125, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.101647138595581, + "rewards/margins": 3.928870439529419, + "rewards/rejected": -2.827223300933838, + "step": 933 + }, + { + "epoch": 1.19, + "learning_rate": 3.7125607058558804e-08, + "logits/chosen": -3.3412232398986816, + "logits/rejected": -3.1398398876190186, + "logps/chosen": -247.04693603515625, + "logps/rejected": -467.5669250488281, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.314997911453247, + "rewards/margins": 4.246911525726318, + "rewards/rejected": -2.9319138526916504, + "step": 934 + }, + { + "epoch": 1.19, + "learning_rate": 3.702577729369569e-08, + "logits/chosen": -3.252007246017456, + "logits/rejected": -3.172700881958008, + "logps/chosen": -263.4062194824219, + "logps/rejected": -288.12359619140625, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3558471202850342, + "rewards/margins": 2.8333425521850586, + "rewards/rejected": -1.477495551109314, + "step": 935 + }, + { + "epoch": 1.19, + "learning_rate": 3.692600295230901e-08, + "logits/chosen": -3.303736686706543, + "logits/rejected": -3.215083122253418, + "logps/chosen": -268.1888732910156, + "logps/rejected": -882.917724609375, + "loss": 0.3228, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.401435136795044, + "rewards/margins": 5.592029571533203, + "rewards/rejected": -4.190594673156738, + "step": 936 + }, + { + "epoch": 1.19, + "learning_rate": 3.682628446061627e-08, + "logits/chosen": -3.231602668762207, + "logits/rejected": -3.144894599914551, + "logps/chosen": -275.2578125, + "logps/rejected": -265.4284362792969, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.263056993484497, + "rewards/margins": 2.7151970863342285, + "rewards/rejected": -1.4521400928497314, + "step": 937 + }, + { + "epoch": 1.2, + "learning_rate": 3.6726622244596394e-08, + "logits/chosen": -3.2872071266174316, + "logits/rejected": -3.1774778366088867, + "logps/chosen": -254.62625122070312, + "logps/rejected": -518.42236328125, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3519859313964844, + "rewards/margins": 3.8289542198181152, + "rewards/rejected": -2.476968288421631, + "step": 938 + }, + { + "epoch": 1.2, + "learning_rate": 3.662701672998789e-08, + "logits/chosen": -3.283362865447998, + "logits/rejected": -3.1011054515838623, + "logps/chosen": -219.4809112548828, + "logps/rejected": -518.0474853515625, + "loss": 0.3016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2050583362579346, + "rewards/margins": 3.4527649879455566, + "rewards/rejected": -2.247706651687622, + "step": 939 + }, + { + "epoch": 1.2, + "learning_rate": 3.6527468342287096e-08, + "logits/chosen": -3.2572882175445557, + "logits/rejected": -3.1224706172943115, + "logps/chosen": -259.39556884765625, + "logps/rejected": -1060.3382568359375, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.220410943031311, + "rewards/margins": 5.649381637573242, + "rewards/rejected": -4.4289703369140625, + "step": 940 + }, + { + "epoch": 1.2, + "learning_rate": 3.6427977506746285e-08, + "logits/chosen": -3.184335231781006, + "logits/rejected": -2.97487211227417, + "logps/chosen": -233.0596923828125, + "logps/rejected": -1320.6572265625, + "loss": 0.2646, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1327743530273438, + "rewards/margins": 6.19890022277832, + "rewards/rejected": -5.066125869750977, + "step": 941 + }, + { + "epoch": 1.2, + "learning_rate": 3.632854464837188e-08, + "logits/chosen": -3.1933579444885254, + "logits/rejected": -3.1457865238189697, + "logps/chosen": -258.1109619140625, + "logps/rejected": -692.6392822265625, + "loss": 0.2823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9110596179962158, + "rewards/margins": 4.626135349273682, + "rewards/rejected": -3.715075969696045, + "step": 942 + }, + { + "epoch": 1.2, + "learning_rate": 3.622917019192261e-08, + "logits/chosen": -3.237818717956543, + "logits/rejected": -3.127816677093506, + "logps/chosen": -219.37171936035156, + "logps/rejected": -392.0054016113281, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1980148553848267, + "rewards/margins": 3.5647294521331787, + "rewards/rejected": -2.3667144775390625, + "step": 943 + }, + { + "epoch": 1.2, + "learning_rate": 3.612985456190778e-08, + "logits/chosen": -3.2543067932128906, + "logits/rejected": -3.0976390838623047, + "logps/chosen": -268.18701171875, + "logps/rejected": -632.1007080078125, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1050338745117188, + "rewards/margins": 4.338810920715332, + "rewards/rejected": -3.2337770462036133, + "step": 944 + }, + { + "epoch": 1.2, + "learning_rate": 3.6030598182585335e-08, + "logits/chosen": -3.2507381439208984, + "logits/rejected": -3.090390205383301, + "logps/chosen": -258.4672546386719, + "logps/rejected": -795.7562255859375, + "loss": 0.2624, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.150739312171936, + "rewards/margins": 5.740894317626953, + "rewards/rejected": -4.590155124664307, + "step": 945 + }, + { + "epoch": 1.21, + "learning_rate": 3.5931401477960176e-08, + "logits/chosen": -3.2274720668792725, + "logits/rejected": -3.154691219329834, + "logps/chosen": -263.837890625, + "logps/rejected": -954.203125, + "loss": 0.2907, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0691635608673096, + "rewards/margins": 6.006807327270508, + "rewards/rejected": -4.937643527984619, + "step": 946 + }, + { + "epoch": 1.21, + "learning_rate": 3.5832264871782235e-08, + "logits/chosen": -3.2317442893981934, + "logits/rejected": -3.213207483291626, + "logps/chosen": -281.661376953125, + "logps/rejected": -3735.2685546875, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.053924560546875, + "rewards/margins": 6.007861614227295, + "rewards/rejected": -4.953936576843262, + "step": 947 + }, + { + "epoch": 1.21, + "learning_rate": 3.5733188787544745e-08, + "logits/chosen": -3.223446846008301, + "logits/rejected": -3.1057891845703125, + "logps/chosen": -303.83941650390625, + "logps/rejected": -563.959228515625, + "loss": 0.3164, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2544418573379517, + "rewards/margins": 4.356419563293457, + "rewards/rejected": -3.101977586746216, + "step": 948 + }, + { + "epoch": 1.21, + "learning_rate": 3.5634173648482355e-08, + "logits/chosen": -3.293013095855713, + "logits/rejected": -3.0280704498291016, + "logps/chosen": -283.6339111328125, + "logps/rejected": -850.3909912109375, + "loss": 0.3214, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0669434070587158, + "rewards/margins": 4.864593505859375, + "rewards/rejected": -3.797650098800659, + "step": 949 + }, + { + "epoch": 1.21, + "learning_rate": 3.553521987756945e-08, + "logits/chosen": -3.260580062866211, + "logits/rejected": -3.1215786933898926, + "logps/chosen": -276.5420227050781, + "logps/rejected": -577.4533081054688, + "loss": 0.3078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1835007667541504, + "rewards/margins": 4.343684196472168, + "rewards/rejected": -3.160183906555176, + "step": 950 + }, + { + "epoch": 1.21, + "learning_rate": 3.543632789751817e-08, + "logits/chosen": -3.196861743927002, + "logits/rejected": -2.988149642944336, + "logps/chosen": -237.47793579101562, + "logps/rejected": -1472.3472900390625, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.098942518234253, + "rewards/margins": 6.954390525817871, + "rewards/rejected": -5.855447769165039, + "step": 951 + }, + { + "epoch": 1.21, + "learning_rate": 3.5337498130776766e-08, + "logits/chosen": -3.19222092628479, + "logits/rejected": -3.172417163848877, + "logps/chosen": -253.92108154296875, + "logps/rejected": -683.5418701171875, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0991332530975342, + "rewards/margins": 5.1770920753479, + "rewards/rejected": -4.077958583831787, + "step": 952 + }, + { + "epoch": 1.21, + "learning_rate": 3.523873099952768e-08, + "logits/chosen": -3.1885862350463867, + "logits/rejected": -3.041912078857422, + "logps/chosen": -280.1577453613281, + "logps/rejected": -1454.31298828125, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.155127763748169, + "rewards/margins": 8.250697135925293, + "rewards/rejected": -7.095568656921387, + "step": 953 + }, + { + "epoch": 1.22, + "learning_rate": 3.5140026925685804e-08, + "logits/chosen": -3.242338180541992, + "logits/rejected": -3.1421594619750977, + "logps/chosen": -302.8808288574219, + "logps/rejected": -1066.266357421875, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2416976690292358, + "rewards/margins": 6.201845169067383, + "rewards/rejected": -4.960146903991699, + "step": 954 + }, + { + "epoch": 1.22, + "learning_rate": 3.504138633089667e-08, + "logits/chosen": -3.2492942810058594, + "logits/rejected": -3.177426338195801, + "logps/chosen": -262.535400390625, + "logps/rejected": -571.20068359375, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1989487409591675, + "rewards/margins": 3.618058204650879, + "rewards/rejected": -2.419109344482422, + "step": 955 + }, + { + "epoch": 1.22, + "learning_rate": 3.494280963653463e-08, + "logits/chosen": -3.2200632095336914, + "logits/rejected": -3.1040658950805664, + "logps/chosen": -265.41241455078125, + "logps/rejected": -487.06689453125, + "loss": 0.3193, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.384162187576294, + "rewards/margins": 3.690741777420044, + "rewards/rejected": -2.30657958984375, + "step": 956 + }, + { + "epoch": 1.22, + "learning_rate": 3.484429726370105e-08, + "logits/chosen": -3.2931532859802246, + "logits/rejected": -3.0985727310180664, + "logps/chosen": -257.9748229980469, + "logps/rejected": -214.27732849121094, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4107627868652344, + "rewards/margins": 2.5032851696014404, + "rewards/rejected": -1.092522382736206, + "step": 957 + }, + { + "epoch": 1.22, + "learning_rate": 3.474584963322256e-08, + "logits/chosen": -3.216395854949951, + "logits/rejected": -3.1417832374572754, + "logps/chosen": -295.75445556640625, + "logps/rejected": -3520.165771484375, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.070845127105713, + "rewards/margins": 7.117952346801758, + "rewards/rejected": -6.047107219696045, + "step": 958 + }, + { + "epoch": 1.22, + "learning_rate": 3.464746716564919e-08, + "logits/chosen": -3.2194881439208984, + "logits/rejected": -3.1375808715820312, + "logps/chosen": -255.11959838867188, + "logps/rejected": -904.1591186523438, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2468948364257812, + "rewards/margins": 5.464634895324707, + "rewards/rejected": -4.217740058898926, + "step": 959 + }, + { + "epoch": 1.22, + "learning_rate": 3.4549150281252633e-08, + "logits/chosen": -3.2658138275146484, + "logits/rejected": -3.0814528465270996, + "logps/chosen": -229.72296142578125, + "logps/rejected": -1183.907958984375, + "loss": 0.2718, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4023643732070923, + "rewards/margins": 6.2271294593811035, + "rewards/rejected": -4.824765205383301, + "step": 960 + }, + { + "epoch": 1.22, + "learning_rate": 3.44508994000244e-08, + "logits/chosen": -3.2309231758117676, + "logits/rejected": -3.123706817626953, + "logps/chosen": -283.40277099609375, + "logps/rejected": -903.5623779296875, + "loss": 0.3237, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3412292003631592, + "rewards/margins": 5.572253227233887, + "rewards/rejected": -4.231024265289307, + "step": 961 + }, + { + "epoch": 1.23, + "learning_rate": 3.435271494167404e-08, + "logits/chosen": -3.2811429500579834, + "logits/rejected": -2.9750144481658936, + "logps/chosen": -250.21334838867188, + "logps/rejected": -1105.694580078125, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2468171119689941, + "rewards/margins": 5.645985126495361, + "rewards/rejected": -4.399168491363525, + "step": 962 + }, + { + "epoch": 1.23, + "learning_rate": 3.425459732562741e-08, + "logits/chosen": -3.3177599906921387, + "logits/rejected": -3.2172129154205322, + "logps/chosen": -270.2707214355469, + "logps/rejected": -731.8352661132812, + "loss": 0.2757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2289291620254517, + "rewards/margins": 5.1829328536987305, + "rewards/rejected": -3.9540040493011475, + "step": 963 + }, + { + "epoch": 1.23, + "learning_rate": 3.415654697102478e-08, + "logits/chosen": -3.2535037994384766, + "logits/rejected": -3.153604507446289, + "logps/chosen": -278.77105712890625, + "logps/rejected": -749.8096923828125, + "loss": 0.2732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0059601068496704, + "rewards/margins": 4.654223442077637, + "rewards/rejected": -3.648263454437256, + "step": 964 + }, + { + "epoch": 1.23, + "learning_rate": 3.40585642967191e-08, + "logits/chosen": -3.253465414047241, + "logits/rejected": -3.1438302993774414, + "logps/chosen": -266.89453125, + "logps/rejected": -539.02587890625, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1378036737442017, + "rewards/margins": 3.6144027709960938, + "rewards/rejected": -2.4765992164611816, + "step": 965 + }, + { + "epoch": 1.23, + "learning_rate": 3.396064972127421e-08, + "logits/chosen": -3.1998515129089355, + "logits/rejected": -3.124507427215576, + "logps/chosen": -228.43502807617188, + "logps/rejected": -1296.34033203125, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1418426036834717, + "rewards/margins": 7.936279296875, + "rewards/rejected": -6.794436454772949, + "step": 966 + }, + { + "epoch": 1.23, + "learning_rate": 3.3862803662963037e-08, + "logits/chosen": -3.2841315269470215, + "logits/rejected": -3.205225944519043, + "logps/chosen": -260.9809265136719, + "logps/rejected": -625.4967041015625, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.123883843421936, + "rewards/margins": 4.547481536865234, + "rewards/rejected": -3.423597812652588, + "step": 967 + }, + { + "epoch": 1.23, + "learning_rate": 3.376502653976583e-08, + "logits/chosen": -3.1691224575042725, + "logits/rejected": -3.0134353637695312, + "logps/chosen": -299.5661315917969, + "logps/rejected": -996.3180541992188, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2842864990234375, + "rewards/margins": 5.959811210632324, + "rewards/rejected": -4.675524711608887, + "step": 968 + }, + { + "epoch": 1.24, + "learning_rate": 3.366731876936833e-08, + "logits/chosen": -3.247180700302124, + "logits/rejected": -3.1126275062561035, + "logps/chosen": -269.925537109375, + "logps/rejected": -1010.3431396484375, + "loss": 0.2772, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2776581048965454, + "rewards/margins": 6.060168266296387, + "rewards/rejected": -4.782510280609131, + "step": 969 + }, + { + "epoch": 1.24, + "learning_rate": 3.356968076916006e-08, + "logits/chosen": -3.2702789306640625, + "logits/rejected": -3.122614860534668, + "logps/chosen": -258.5954895019531, + "logps/rejected": -731.1018676757812, + "loss": 0.2774, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.077793836593628, + "rewards/margins": 4.725584506988525, + "rewards/rejected": -3.6477906703948975, + "step": 970 + }, + { + "epoch": 1.24, + "learning_rate": 3.3472112956232454e-08, + "logits/chosen": -3.272970199584961, + "logits/rejected": -3.1780333518981934, + "logps/chosen": -269.5203857421875, + "logps/rejected": -738.2449951171875, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.333042860031128, + "rewards/margins": 5.463273525238037, + "rewards/rejected": -4.130230903625488, + "step": 971 + }, + { + "epoch": 1.24, + "learning_rate": 3.337461574737716e-08, + "logits/chosen": -3.253681182861328, + "logits/rejected": -3.124211311340332, + "logps/chosen": -244.1944122314453, + "logps/rejected": -367.0530700683594, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.178072452545166, + "rewards/margins": 3.084981679916382, + "rewards/rejected": -1.9069092273712158, + "step": 972 + }, + { + "epoch": 1.24, + "learning_rate": 3.327718955908419e-08, + "logits/chosen": -3.170943021774292, + "logits/rejected": -3.0947937965393066, + "logps/chosen": -283.50177001953125, + "logps/rejected": -628.74072265625, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1559562683105469, + "rewards/margins": 4.455385684967041, + "rewards/rejected": -3.299429416656494, + "step": 973 + }, + { + "epoch": 1.24, + "learning_rate": 3.317983480754015e-08, + "logits/chosen": -3.2425148487091064, + "logits/rejected": -3.1594388484954834, + "logps/chosen": -254.41867065429688, + "logps/rejected": -868.8846435546875, + "loss": 0.2715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1032615900039673, + "rewards/margins": 5.224971771240234, + "rewards/rejected": -4.121710300445557, + "step": 974 + }, + { + "epoch": 1.24, + "learning_rate": 3.3082551908626515e-08, + "logits/chosen": -3.215993881225586, + "logits/rejected": -3.1309316158294678, + "logps/chosen": -277.1290283203125, + "logps/rejected": -766.94775390625, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0718483924865723, + "rewards/margins": 4.899585723876953, + "rewards/rejected": -3.82773756980896, + "step": 975 + }, + { + "epoch": 1.24, + "learning_rate": 3.298534127791784e-08, + "logits/chosen": -3.268155574798584, + "logits/rejected": -3.160801887512207, + "logps/chosen": -223.47518920898438, + "logps/rejected": -1388.429443359375, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.160414218902588, + "rewards/margins": 8.241493225097656, + "rewards/rejected": -7.081079483032227, + "step": 976 + }, + { + "epoch": 1.25, + "learning_rate": 3.2888203330679934e-08, + "logits/chosen": -3.3169469833374023, + "logits/rejected": -3.1393823623657227, + "logps/chosen": -247.34695434570312, + "logps/rejected": -754.3050537109375, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5404725074768066, + "rewards/margins": 5.337182998657227, + "rewards/rejected": -3.79671049118042, + "step": 977 + }, + { + "epoch": 1.25, + "learning_rate": 3.279113848186808e-08, + "logits/chosen": -3.224076747894287, + "logits/rejected": -3.0727295875549316, + "logps/chosen": -270.00396728515625, + "logps/rejected": -502.80230712890625, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.199284315109253, + "rewards/margins": 3.621457576751709, + "rewards/rejected": -2.422173261642456, + "step": 978 + }, + { + "epoch": 1.25, + "learning_rate": 3.269414714612534e-08, + "logits/chosen": -3.2659084796905518, + "logits/rejected": -3.1178457736968994, + "logps/chosen": -240.28244018554688, + "logps/rejected": -598.516845703125, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.104939341545105, + "rewards/margins": 4.0572710037231445, + "rewards/rejected": -2.95233154296875, + "step": 979 + }, + { + "epoch": 1.25, + "learning_rate": 3.259722973778077e-08, + "logits/chosen": -3.258605718612671, + "logits/rejected": -3.05631947517395, + "logps/chosen": -281.420166015625, + "logps/rejected": -409.177978515625, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1520836353302002, + "rewards/margins": 3.1267409324645996, + "rewards/rejected": -1.974657416343689, + "step": 980 + }, + { + "epoch": 1.25, + "learning_rate": 3.250038667084758e-08, + "logits/chosen": -3.1952316761016846, + "logits/rejected": -3.062086820602417, + "logps/chosen": -340.90606689453125, + "logps/rejected": -713.2886962890625, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1082229614257812, + "rewards/margins": 4.194719314575195, + "rewards/rejected": -3.086496114730835, + "step": 981 + }, + { + "epoch": 1.25, + "learning_rate": 3.24036183590214e-08, + "logits/chosen": -3.3465523719787598, + "logits/rejected": -3.1177401542663574, + "logps/chosen": -274.2239990234375, + "logps/rejected": -932.5028686523438, + "loss": 0.2683, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2065842151641846, + "rewards/margins": 6.261018753051758, + "rewards/rejected": -5.054434299468994, + "step": 982 + }, + { + "epoch": 1.25, + "learning_rate": 3.2306925215678516e-08, + "logits/chosen": -3.2828383445739746, + "logits/rejected": -3.1580448150634766, + "logps/chosen": -291.3163757324219, + "logps/rejected": -660.0802001953125, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.205152153968811, + "rewards/margins": 5.150205135345459, + "rewards/rejected": -3.9450531005859375, + "step": 983 + }, + { + "epoch": 1.25, + "learning_rate": 3.221030765387417e-08, + "logits/chosen": -3.215120315551758, + "logits/rejected": -3.1601696014404297, + "logps/chosen": -292.13433837890625, + "logps/rejected": -568.5753173828125, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0296066999435425, + "rewards/margins": 4.550357341766357, + "rewards/rejected": -3.5207505226135254, + "step": 984 + }, + { + "epoch": 1.26, + "learning_rate": 3.211376608634068e-08, + "logits/chosen": -3.15582013130188, + "logits/rejected": -3.126598358154297, + "logps/chosen": -261.1094970703125, + "logps/rejected": -373.4412841796875, + "loss": 0.3074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3251266479492188, + "rewards/margins": 3.2799665927886963, + "rewards/rejected": -1.954840064048767, + "step": 985 + }, + { + "epoch": 1.26, + "learning_rate": 3.201730092548573e-08, + "logits/chosen": -3.247527599334717, + "logits/rejected": -3.0954151153564453, + "logps/chosen": -298.66290283203125, + "logps/rejected": -1084.563720703125, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0526764392852783, + "rewards/margins": 5.648906230926514, + "rewards/rejected": -4.596229553222656, + "step": 986 + }, + { + "epoch": 1.26, + "learning_rate": 3.192091258339064e-08, + "logits/chosen": -3.2817986011505127, + "logits/rejected": -3.150373935699463, + "logps/chosen": -237.23046875, + "logps/rejected": -420.38287353515625, + "loss": 0.2954, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.25291907787323, + "rewards/margins": 3.7048888206481934, + "rewards/rejected": -2.451969861984253, + "step": 987 + }, + { + "epoch": 1.26, + "learning_rate": 3.18246014718085e-08, + "logits/chosen": -3.2660746574401855, + "logits/rejected": -3.1726036071777344, + "logps/chosen": -278.326904296875, + "logps/rejected": -3399.45751953125, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2963547706604004, + "rewards/margins": 8.790184020996094, + "rewards/rejected": -7.493829727172852, + "step": 988 + }, + { + "epoch": 1.26, + "learning_rate": 3.172836800216262e-08, + "logits/chosen": -3.3191633224487305, + "logits/rejected": -3.1529228687286377, + "logps/chosen": -254.74392700195312, + "logps/rejected": -425.6627197265625, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3368134498596191, + "rewards/margins": 3.645293712615967, + "rewards/rejected": -2.3084800243377686, + "step": 989 + }, + { + "epoch": 1.26, + "learning_rate": 3.16322125855445e-08, + "logits/chosen": -3.315962076187134, + "logits/rejected": -3.1491219997406006, + "logps/chosen": -250.0421142578125, + "logps/rejected": -839.595947265625, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2227973937988281, + "rewards/margins": 5.963868141174316, + "rewards/rejected": -4.741070747375488, + "step": 990 + }, + { + "epoch": 1.26, + "learning_rate": 3.1536135632712315e-08, + "logits/chosen": -3.225599765777588, + "logits/rejected": -3.104146957397461, + "logps/chosen": -269.8277893066406, + "logps/rejected": -483.7783508300781, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.268068790435791, + "rewards/margins": 3.9226157665252686, + "rewards/rejected": -2.6545472145080566, + "step": 991 + }, + { + "epoch": 1.26, + "learning_rate": 3.1440137554088955e-08, + "logits/chosen": -3.295759677886963, + "logits/rejected": -3.1244919300079346, + "logps/chosen": -266.4819641113281, + "logps/rejected": -1390.0008544921875, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3357536792755127, + "rewards/margins": 6.464794635772705, + "rewards/rejected": -5.1290411949157715, + "step": 992 + }, + { + "epoch": 1.27, + "learning_rate": 3.134421875976049e-08, + "logits/chosen": -3.222740411758423, + "logits/rejected": -3.1052870750427246, + "logps/chosen": -240.6844024658203, + "logps/rejected": -395.45660400390625, + "loss": 0.2888, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3641693592071533, + "rewards/margins": 3.2722091674804688, + "rewards/rejected": -1.908039927482605, + "step": 993 + }, + { + "epoch": 1.27, + "learning_rate": 3.1248379659474225e-08, + "logits/chosen": -3.27437686920166, + "logits/rejected": -3.137376070022583, + "logps/chosen": -244.73721313476562, + "logps/rejected": -545.310302734375, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0241326093673706, + "rewards/margins": 3.3860979080200195, + "rewards/rejected": -2.3619651794433594, + "step": 994 + }, + { + "epoch": 1.27, + "learning_rate": 3.115262066263704e-08, + "logits/chosen": -3.19968843460083, + "logits/rejected": -3.126309394836426, + "logps/chosen": -299.6536865234375, + "logps/rejected": -1330.6497802734375, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1742874383926392, + "rewards/margins": 7.501710891723633, + "rewards/rejected": -6.327423095703125, + "step": 995 + }, + { + "epoch": 1.27, + "learning_rate": 3.1056942178313604e-08, + "logits/chosen": -3.220177173614502, + "logits/rejected": -3.0806655883789062, + "logps/chosen": -246.30682373046875, + "logps/rejected": -1369.03173828125, + "loss": 0.2829, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.399263858795166, + "rewards/margins": 7.817629337310791, + "rewards/rejected": -6.418365478515625, + "step": 996 + }, + { + "epoch": 1.27, + "learning_rate": 3.096134461522475e-08, + "logits/chosen": -3.2415480613708496, + "logits/rejected": -3.1240031719207764, + "logps/chosen": -251.37738037109375, + "logps/rejected": -664.2733764648438, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9536498785018921, + "rewards/margins": 4.399054050445557, + "rewards/rejected": -3.445404052734375, + "step": 997 + }, + { + "epoch": 1.27, + "learning_rate": 3.086582838174551e-08, + "logits/chosen": -3.2218739986419678, + "logits/rejected": -3.069117546081543, + "logps/chosen": -273.11376953125, + "logps/rejected": -526.9230346679688, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3057808876037598, + "rewards/margins": 4.037887096405029, + "rewards/rejected": -2.7321059703826904, + "step": 998 + }, + { + "epoch": 1.27, + "learning_rate": 3.077039388590357e-08, + "logits/chosen": -3.2390313148498535, + "logits/rejected": -3.149071216583252, + "logps/chosen": -255.18203735351562, + "logps/rejected": -648.5692138671875, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0513442754745483, + "rewards/margins": 4.354067802429199, + "rewards/rejected": -3.3027238845825195, + "step": 999 + }, + { + "epoch": 1.27, + "learning_rate": 3.0675041535377396e-08, + "logits/chosen": -3.248171806335449, + "logits/rejected": -3.1300785541534424, + "logps/chosen": -299.6677551269531, + "logps/rejected": -349.02630615234375, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4027130603790283, + "rewards/margins": 3.1574478149414062, + "rewards/rejected": -1.754734754562378, + "step": 1000 + }, + { + "epoch": 1.28, + "learning_rate": 3.0579771737494563e-08, + "logits/chosen": -3.261906147003174, + "logits/rejected": -3.1208536624908447, + "logps/chosen": -277.9562683105469, + "logps/rejected": -711.2673950195312, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4734420776367188, + "rewards/margins": 5.403782844543457, + "rewards/rejected": -3.930340528488159, + "step": 1001 + }, + { + "epoch": 1.28, + "learning_rate": 3.048458489923005e-08, + "logits/chosen": -3.244250774383545, + "logits/rejected": -3.1839208602905273, + "logps/chosen": -256.26983642578125, + "logps/rejected": -629.6480712890625, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3265502452850342, + "rewards/margins": 4.624170303344727, + "rewards/rejected": -3.2976198196411133, + "step": 1002 + }, + { + "epoch": 1.28, + "learning_rate": 3.038948142720437e-08, + "logits/chosen": -3.261038303375244, + "logits/rejected": -3.143423557281494, + "logps/chosen": -255.58038330078125, + "logps/rejected": -505.5746154785156, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3226401805877686, + "rewards/margins": 4.06417179107666, + "rewards/rejected": -2.7415313720703125, + "step": 1003 + }, + { + "epoch": 1.28, + "learning_rate": 3.029446172768193e-08, + "logits/chosen": -3.29756236076355, + "logits/rejected": -3.1943001747131348, + "logps/chosen": -249.08206176757812, + "logps/rejected": -646.3360595703125, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1739128828048706, + "rewards/margins": 4.985851287841797, + "rewards/rejected": -3.8119382858276367, + "step": 1004 + }, + { + "epoch": 1.28, + "learning_rate": 3.019952620656928e-08, + "logits/chosen": -3.28963041305542, + "logits/rejected": -3.1105096340179443, + "logps/chosen": -285.19683837890625, + "logps/rejected": -440.5293884277344, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4244736433029175, + "rewards/margins": 3.9546327590942383, + "rewards/rejected": -2.5301589965820312, + "step": 1005 + }, + { + "epoch": 1.28, + "learning_rate": 3.0104675269413436e-08, + "logits/chosen": -3.302536964416504, + "logits/rejected": -3.095816135406494, + "logps/chosen": -268.66082763671875, + "logps/rejected": -1743.5078125, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4266250133514404, + "rewards/margins": 9.129585266113281, + "rewards/rejected": -7.70296049118042, + "step": 1006 + }, + { + "epoch": 1.28, + "learning_rate": 3.00099093214e-08, + "logits/chosen": -3.21356463432312, + "logits/rejected": -3.1570234298706055, + "logps/chosen": -292.58197021484375, + "logps/rejected": -464.8410949707031, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3460098505020142, + "rewards/margins": 3.553990364074707, + "rewards/rejected": -2.2079803943634033, + "step": 1007 + }, + { + "epoch": 1.28, + "learning_rate": 2.991522876735154e-08, + "logits/chosen": -3.1575682163238525, + "logits/rejected": -3.0698719024658203, + "logps/chosen": -270.174072265625, + "logps/rejected": -469.32208251953125, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.224581241607666, + "rewards/margins": 3.35380482673645, + "rewards/rejected": -2.129223585128784, + "step": 1008 + }, + { + "epoch": 1.29, + "learning_rate": 2.982063401172584e-08, + "logits/chosen": -3.3345861434936523, + "logits/rejected": -3.1861391067504883, + "logps/chosen": -213.39944458007812, + "logps/rejected": -740.6362915039062, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9805405139923096, + "rewards/margins": 4.953385829925537, + "rewards/rejected": -3.9728455543518066, + "step": 1009 + }, + { + "epoch": 1.29, + "learning_rate": 2.9726125458614215e-08, + "logits/chosen": -3.220078945159912, + "logits/rejected": -3.196746826171875, + "logps/chosen": -271.3592224121094, + "logps/rejected": -636.9109497070312, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1968262195587158, + "rewards/margins": 4.904558181762695, + "rewards/rejected": -3.7077317237854004, + "step": 1010 + }, + { + "epoch": 1.29, + "learning_rate": 2.963170351173968e-08, + "logits/chosen": -3.3048176765441895, + "logits/rejected": -2.974337339401245, + "logps/chosen": -275.8819580078125, + "logps/rejected": -2791.617919921875, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3534722328186035, + "rewards/margins": 11.976893424987793, + "rewards/rejected": -10.623420715332031, + "step": 1011 + }, + { + "epoch": 1.29, + "learning_rate": 2.9537368574455303e-08, + "logits/chosen": -3.321847677230835, + "logits/rejected": -3.1391234397888184, + "logps/chosen": -230.619140625, + "logps/rejected": -488.9814453125, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2437348365783691, + "rewards/margins": 3.758078098297119, + "rewards/rejected": -2.51434326171875, + "step": 1012 + }, + { + "epoch": 1.29, + "learning_rate": 2.944312104974247e-08, + "logits/chosen": -3.2392983436584473, + "logits/rejected": -3.15545654296875, + "logps/chosen": -240.30780029296875, + "logps/rejected": -581.3718872070312, + "loss": 0.2763, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3275146484375, + "rewards/margins": 4.379336357116699, + "rewards/rejected": -3.0518219470977783, + "step": 1013 + }, + { + "epoch": 1.29, + "learning_rate": 2.9348961340209117e-08, + "logits/chosen": -3.2935032844543457, + "logits/rejected": -3.086994171142578, + "logps/chosen": -252.77059936523438, + "logps/rejected": -768.7288208007812, + "loss": 0.2919, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3338630199432373, + "rewards/margins": 5.211223602294922, + "rewards/rejected": -3.8773603439331055, + "step": 1014 + }, + { + "epoch": 1.29, + "learning_rate": 2.9254889848088133e-08, + "logits/chosen": -3.2464284896850586, + "logits/rejected": -3.14286470413208, + "logps/chosen": -248.480712890625, + "logps/rejected": -666.432373046875, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2916648387908936, + "rewards/margins": 4.663229942321777, + "rewards/rejected": -3.371565341949463, + "step": 1015 + }, + { + "epoch": 1.3, + "learning_rate": 2.916090697523549e-08, + "logits/chosen": -3.313753366470337, + "logits/rejected": -3.1247599124908447, + "logps/chosen": -253.91436767578125, + "logps/rejected": -661.6468505859375, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1781768798828125, + "rewards/margins": 4.9199676513671875, + "rewards/rejected": -3.741790771484375, + "step": 1016 + }, + { + "epoch": 1.3, + "learning_rate": 2.906701312312861e-08, + "logits/chosen": -3.178145408630371, + "logits/rejected": -2.998000383377075, + "logps/chosen": -262.13543701171875, + "logps/rejected": -769.5026245117188, + "loss": 0.2993, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.339704990386963, + "rewards/margins": 4.412959098815918, + "rewards/rejected": -3.0732545852661133, + "step": 1017 + }, + { + "epoch": 1.3, + "learning_rate": 2.897320869286462e-08, + "logits/chosen": -3.298712730407715, + "logits/rejected": -3.038949489593506, + "logps/chosen": -245.1542205810547, + "logps/rejected": -1612.4775390625, + "loss": 0.3048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.264662265777588, + "rewards/margins": 7.197633743286133, + "rewards/rejected": -5.932971477508545, + "step": 1018 + }, + { + "epoch": 1.3, + "learning_rate": 2.887949408515872e-08, + "logits/chosen": -3.2306246757507324, + "logits/rejected": -3.1243505477905273, + "logps/chosen": -276.82489013671875, + "logps/rejected": -632.7005615234375, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.42236328125, + "rewards/margins": 4.476341247558594, + "rewards/rejected": -3.0539779663085938, + "step": 1019 + }, + { + "epoch": 1.3, + "learning_rate": 2.8785869700342317e-08, + "logits/chosen": -3.2249817848205566, + "logits/rejected": -3.05014705657959, + "logps/chosen": -283.9545593261719, + "logps/rejected": -682.160400390625, + "loss": 0.2794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3951218128204346, + "rewards/margins": 4.798090934753418, + "rewards/rejected": -3.4029693603515625, + "step": 1020 + }, + { + "epoch": 1.3, + "learning_rate": 2.8692335938361466e-08, + "logits/chosen": -3.2634077072143555, + "logits/rejected": -3.184325933456421, + "logps/chosen": -262.451171875, + "logps/rejected": -959.7686157226562, + "loss": 0.2762, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.332099199295044, + "rewards/margins": 6.263584136962891, + "rewards/rejected": -4.931485176086426, + "step": 1021 + }, + { + "epoch": 1.3, + "learning_rate": 2.8598893198775044e-08, + "logits/chosen": -3.260310649871826, + "logits/rejected": -3.155951499938965, + "logps/chosen": -249.7733917236328, + "logps/rejected": -349.793701171875, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4361892938613892, + "rewards/margins": 3.1699676513671875, + "rewards/rejected": -1.733778476715088, + "step": 1022 + }, + { + "epoch": 1.3, + "learning_rate": 2.850554188075317e-08, + "logits/chosen": -3.334036350250244, + "logits/rejected": -3.1071441173553467, + "logps/chosen": -277.3504943847656, + "logps/rejected": -402.13555908203125, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.418664574623108, + "rewards/margins": 3.376939296722412, + "rewards/rejected": -1.9582748413085938, + "step": 1023 + }, + { + "epoch": 1.31, + "learning_rate": 2.841228238307536e-08, + "logits/chosen": -3.2959446907043457, + "logits/rejected": -3.142618179321289, + "logps/chosen": -270.61083984375, + "logps/rejected": -682.9768676757812, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3450896739959717, + "rewards/margins": 5.389199733734131, + "rewards/rejected": -4.044110298156738, + "step": 1024 + }, + { + "epoch": 1.31, + "learning_rate": 2.831911510412892e-08, + "logits/chosen": -3.2402267456054688, + "logits/rejected": -3.1765167713165283, + "logps/chosen": -270.3200988769531, + "logps/rejected": -704.0091552734375, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5112007856369019, + "rewards/margins": 4.948838233947754, + "rewards/rejected": -3.4376373291015625, + "step": 1025 + }, + { + "epoch": 1.31, + "learning_rate": 2.8226040441907207e-08, + "logits/chosen": -3.202169895172119, + "logits/rejected": -3.129155158996582, + "logps/chosen": -259.76837158203125, + "logps/rejected": -541.18798828125, + "loss": 0.2925, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2200226783752441, + "rewards/margins": 3.833609104156494, + "rewards/rejected": -2.61358642578125, + "step": 1026 + }, + { + "epoch": 1.31, + "learning_rate": 2.8133058794007924e-08, + "logits/chosen": -3.307197093963623, + "logits/rejected": -3.160231590270996, + "logps/chosen": -268.42657470703125, + "logps/rejected": -1241.570556640625, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3961228132247925, + "rewards/margins": 7.627891540527344, + "rewards/rejected": -6.23176908493042, + "step": 1027 + }, + { + "epoch": 1.31, + "learning_rate": 2.8040170557631488e-08, + "logits/chosen": -3.225430965423584, + "logits/rejected": -3.0658721923828125, + "logps/chosen": -300.92535400390625, + "logps/rejected": -875.5632934570312, + "loss": 0.3206, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2532989978790283, + "rewards/margins": 5.641695976257324, + "rewards/rejected": -4.388397216796875, + "step": 1028 + }, + { + "epoch": 1.31, + "learning_rate": 2.7947376129579216e-08, + "logits/chosen": -3.2063045501708984, + "logits/rejected": -3.0997133255004883, + "logps/chosen": -303.2962341308594, + "logps/rejected": -688.1590576171875, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.331965684890747, + "rewards/margins": 4.629004001617432, + "rewards/rejected": -3.2970383167266846, + "step": 1029 + }, + { + "epoch": 1.31, + "learning_rate": 2.7854675906251723e-08, + "logits/chosen": -3.209412097930908, + "logits/rejected": -3.0516300201416016, + "logps/chosen": -289.19512939453125, + "logps/rejected": -1489.7994384765625, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3346214294433594, + "rewards/margins": 7.993782997131348, + "rewards/rejected": -6.659161567687988, + "step": 1030 + }, + { + "epoch": 1.31, + "learning_rate": 2.7762070283647177e-08, + "logits/chosen": -3.2939372062683105, + "logits/rejected": -3.2370378971099854, + "logps/chosen": -253.6716766357422, + "logps/rejected": -835.2138671875, + "loss": 0.2863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.455683946609497, + "rewards/margins": 6.693595886230469, + "rewards/rejected": -5.237912178039551, + "step": 1031 + }, + { + "epoch": 1.32, + "learning_rate": 2.7669559657359676e-08, + "logits/chosen": -3.2298707962036133, + "logits/rejected": -3.144969940185547, + "logps/chosen": -232.53451538085938, + "logps/rejected": -617.22119140625, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2372620105743408, + "rewards/margins": 5.189944267272949, + "rewards/rejected": -3.9526824951171875, + "step": 1032 + }, + { + "epoch": 1.32, + "learning_rate": 2.757714442257747e-08, + "logits/chosen": -3.268091917037964, + "logits/rejected": -3.120483160018921, + "logps/chosen": -253.1172332763672, + "logps/rejected": -662.9132080078125, + "loss": 0.2595, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2437347173690796, + "rewards/margins": 4.632910251617432, + "rewards/rejected": -3.3891754150390625, + "step": 1033 + }, + { + "epoch": 1.32, + "learning_rate": 2.7484824974081323e-08, + "logits/chosen": -3.2223076820373535, + "logits/rejected": -3.0621776580810547, + "logps/chosen": -258.9573059082031, + "logps/rejected": -280.9051208496094, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0997390747070312, + "rewards/margins": 2.1898574829101562, + "rewards/rejected": -1.090118408203125, + "step": 1034 + }, + { + "epoch": 1.32, + "learning_rate": 2.73926017062428e-08, + "logits/chosen": -3.250302791595459, + "logits/rejected": -3.0948781967163086, + "logps/chosen": -241.04681396484375, + "logps/rejected": -784.601318359375, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1061989068984985, + "rewards/margins": 4.864325523376465, + "rewards/rejected": -3.758126735687256, + "step": 1035 + }, + { + "epoch": 1.32, + "learning_rate": 2.730047501302266e-08, + "logits/chosen": -3.210934638977051, + "logits/rejected": -3.0714969635009766, + "logps/chosen": -285.89013671875, + "logps/rejected": -653.8311767578125, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3454620838165283, + "rewards/margins": 4.321577548980713, + "rewards/rejected": -2.9761154651641846, + "step": 1036 + }, + { + "epoch": 1.32, + "learning_rate": 2.720844528796906e-08, + "logits/chosen": -3.308053493499756, + "logits/rejected": -3.115584373474121, + "logps/chosen": -283.05340576171875, + "logps/rejected": -560.2102661132812, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2478034496307373, + "rewards/margins": 4.550171852111816, + "rewards/rejected": -3.3023681640625, + "step": 1037 + }, + { + "epoch": 1.32, + "learning_rate": 2.711651292421593e-08, + "logits/chosen": -3.2757105827331543, + "logits/rejected": -3.1539435386657715, + "logps/chosen": -293.6260681152344, + "logps/rejected": -439.1058349609375, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0723037719726562, + "rewards/margins": 3.535810708999634, + "rewards/rejected": -2.4635071754455566, + "step": 1038 + }, + { + "epoch": 1.32, + "learning_rate": 2.702467831448131e-08, + "logits/chosen": -3.1915695667266846, + "logits/rejected": -3.127852201461792, + "logps/chosen": -246.44308471679688, + "logps/rejected": -1290.0462646484375, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2235229015350342, + "rewards/margins": 7.357632637023926, + "rewards/rejected": -6.1341094970703125, + "step": 1039 + }, + { + "epoch": 1.33, + "learning_rate": 2.6932941851065616e-08, + "logits/chosen": -3.2362470626831055, + "logits/rejected": -3.180056095123291, + "logps/chosen": -259.48199462890625, + "logps/rejected": -732.6458740234375, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2572418451309204, + "rewards/margins": 5.697766304016113, + "rewards/rejected": -4.440524101257324, + "step": 1040 + }, + { + "epoch": 1.33, + "learning_rate": 2.6841303925850067e-08, + "logits/chosen": -3.2726361751556396, + "logits/rejected": -3.182941436767578, + "logps/chosen": -258.76593017578125, + "logps/rejected": -460.3320007324219, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.306462049484253, + "rewards/margins": 4.042344570159912, + "rewards/rejected": -2.7358827590942383, + "step": 1041 + }, + { + "epoch": 1.33, + "learning_rate": 2.6749764930294905e-08, + "logits/chosen": -3.2038450241088867, + "logits/rejected": -3.0925440788269043, + "logps/chosen": -254.2467803955078, + "logps/rejected": -353.54547119140625, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.266279697418213, + "rewards/margins": 2.995492458343506, + "rewards/rejected": -1.729212999343872, + "step": 1042 + }, + { + "epoch": 1.33, + "learning_rate": 2.665832525543774e-08, + "logits/chosen": -3.3033838272094727, + "logits/rejected": -3.1100943088531494, + "logps/chosen": -287.90234375, + "logps/rejected": -1318.8331298828125, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3323249816894531, + "rewards/margins": 6.636536598205566, + "rewards/rejected": -5.304211616516113, + "step": 1043 + }, + { + "epoch": 1.33, + "learning_rate": 2.656698529189193e-08, + "logits/chosen": -3.2611474990844727, + "logits/rejected": -3.1669912338256836, + "logps/chosen": -256.3360595703125, + "logps/rejected": -467.0954895019531, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3226547241210938, + "rewards/margins": 3.953869581222534, + "rewards/rejected": -2.6312148571014404, + "step": 1044 + }, + { + "epoch": 1.33, + "learning_rate": 2.6475745429844908e-08, + "logits/chosen": -3.215268135070801, + "logits/rejected": -3.0941925048828125, + "logps/chosen": -267.885009765625, + "logps/rejected": -536.901123046875, + "loss": 0.2974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1339080333709717, + "rewards/margins": 3.9012742042541504, + "rewards/rejected": -2.7673661708831787, + "step": 1045 + }, + { + "epoch": 1.33, + "learning_rate": 2.638460605905646e-08, + "logits/chosen": -3.233099937438965, + "logits/rejected": -3.11814022064209, + "logps/chosen": -218.29212951660156, + "logps/rejected": -504.494873046875, + "loss": 0.2742, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0707085132598877, + "rewards/margins": 3.4261131286621094, + "rewards/rejected": -2.3554046154022217, + "step": 1046 + }, + { + "epoch": 1.33, + "learning_rate": 2.6293567568857078e-08, + "logits/chosen": -3.2784647941589355, + "logits/rejected": -3.1450958251953125, + "logps/chosen": -214.6044464111328, + "logps/rejected": -683.1911010742188, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1286804676055908, + "rewards/margins": 5.0694899559021, + "rewards/rejected": -3.940809726715088, + "step": 1047 + }, + { + "epoch": 1.34, + "learning_rate": 2.620263034814632e-08, + "logits/chosen": -3.2656264305114746, + "logits/rejected": -3.095749855041504, + "logps/chosen": -267.622802734375, + "logps/rejected": -1140.3553466796875, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3748047351837158, + "rewards/margins": 6.92128324508667, + "rewards/rejected": -5.546478271484375, + "step": 1048 + }, + { + "epoch": 1.34, + "learning_rate": 2.6111794785391196e-08, + "logits/chosen": -3.2706379890441895, + "logits/rejected": -3.129359722137451, + "logps/chosen": -241.9056854248047, + "logps/rejected": -575.5223999023438, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0701500177383423, + "rewards/margins": 3.7723684310913086, + "rewards/rejected": -2.702218532562256, + "step": 1049 + }, + { + "epoch": 1.34, + "learning_rate": 2.6021061268624378e-08, + "logits/chosen": -3.230746030807495, + "logits/rejected": -3.1275081634521484, + "logps/chosen": -260.58331298828125, + "logps/rejected": -638.5823974609375, + "loss": 0.2874, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4460029602050781, + "rewards/margins": 4.9897685050964355, + "rewards/rejected": -3.5437655448913574, + "step": 1050 + }, + { + "epoch": 1.34, + "learning_rate": 2.5930430185442653e-08, + "logits/chosen": -3.206005096435547, + "logits/rejected": -3.1509599685668945, + "logps/chosen": -266.1434326171875, + "logps/rejected": -321.2939453125, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2804383039474487, + "rewards/margins": 3.087986707687378, + "rewards/rejected": -1.8075485229492188, + "step": 1051 + }, + { + "epoch": 1.34, + "learning_rate": 2.5839901923005202e-08, + "logits/chosen": -3.219326972961426, + "logits/rejected": -3.0964455604553223, + "logps/chosen": -253.7720489501953, + "logps/rejected": -664.237060546875, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2045814990997314, + "rewards/margins": 5.171670913696289, + "rewards/rejected": -3.9670896530151367, + "step": 1052 + }, + { + "epoch": 1.34, + "learning_rate": 2.5749476868032023e-08, + "logits/chosen": -3.2212271690368652, + "logits/rejected": -3.1480109691619873, + "logps/chosen": -272.43365478515625, + "logps/rejected": -1119.895751953125, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1508941650390625, + "rewards/margins": 7.5660400390625, + "rewards/rejected": -6.4151458740234375, + "step": 1053 + }, + { + "epoch": 1.34, + "learning_rate": 2.5659155406802192e-08, + "logits/chosen": -3.2853610515594482, + "logits/rejected": -3.067167282104492, + "logps/chosen": -310.70916748046875, + "logps/rejected": -466.86895751953125, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2890716791152954, + "rewards/margins": 3.282613515853882, + "rewards/rejected": -1.9935417175292969, + "step": 1054 + }, + { + "epoch": 1.34, + "learning_rate": 2.5568937925152267e-08, + "logits/chosen": -3.2909183502197266, + "logits/rejected": -3.1651229858398438, + "logps/chosen": -239.656494140625, + "logps/rejected": -574.8937377929688, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.217871904373169, + "rewards/margins": 4.377811431884766, + "rewards/rejected": -3.159939765930176, + "step": 1055 + }, + { + "epoch": 1.35, + "learning_rate": 2.5478824808474607e-08, + "logits/chosen": -3.2823362350463867, + "logits/rejected": -3.1483206748962402, + "logps/chosen": -241.15924072265625, + "logps/rejected": -434.19049072265625, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3206138610839844, + "rewards/margins": 3.808673858642578, + "rewards/rejected": -2.4880599975585938, + "step": 1056 + }, + { + "epoch": 1.35, + "learning_rate": 2.5388816441715755e-08, + "logits/chosen": -3.2704644203186035, + "logits/rejected": -3.1254239082336426, + "logps/chosen": -267.2807922363281, + "logps/rejected": -903.6220703125, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1600418090820312, + "rewards/margins": 4.6633501052856445, + "rewards/rejected": -3.5033082962036133, + "step": 1057 + }, + { + "epoch": 1.35, + "learning_rate": 2.5298913209374804e-08, + "logits/chosen": -3.243004560470581, + "logits/rejected": -3.1895289421081543, + "logps/chosen": -235.39511108398438, + "logps/rejected": -387.592041015625, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.391181230545044, + "rewards/margins": 3.590991973876953, + "rewards/rejected": -2.199810743331909, + "step": 1058 + }, + { + "epoch": 1.35, + "learning_rate": 2.520911549550171e-08, + "logits/chosen": -3.2250149250030518, + "logits/rejected": -3.155796527862549, + "logps/chosen": -274.1022033691406, + "logps/rejected": -601.4659423828125, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4124603271484375, + "rewards/margins": 4.957491874694824, + "rewards/rejected": -3.545031785964966, + "step": 1059 + }, + { + "epoch": 1.35, + "learning_rate": 2.5119423683695657e-08, + "logits/chosen": -3.251755952835083, + "logits/rejected": -3.114823579788208, + "logps/chosen": -291.2002868652344, + "logps/rejected": -666.5637817382812, + "loss": 0.3085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.007378339767456, + "rewards/margins": 4.71187686920166, + "rewards/rejected": -3.704498291015625, + "step": 1060 + }, + { + "epoch": 1.35, + "learning_rate": 2.5029838157103444e-08, + "logits/chosen": -3.1868252754211426, + "logits/rejected": -3.0034542083740234, + "logps/chosen": -236.6761932373047, + "logps/rejected": -538.275634765625, + "loss": 0.272, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.296412706375122, + "rewards/margins": 4.450202941894531, + "rewards/rejected": -3.153790235519409, + "step": 1061 + }, + { + "epoch": 1.35, + "learning_rate": 2.494035929841789e-08, + "logits/chosen": -3.214918613433838, + "logits/rejected": -3.131279945373535, + "logps/chosen": -258.63909912109375, + "logps/rejected": -600.3983764648438, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2793686389923096, + "rewards/margins": 4.562849521636963, + "rewards/rejected": -3.2834808826446533, + "step": 1062 + }, + { + "epoch": 1.36, + "learning_rate": 2.485098748987608e-08, + "logits/chosen": -3.23720645904541, + "logits/rejected": -3.1155152320861816, + "logps/chosen": -240.06361389160156, + "logps/rejected": -606.1566772460938, + "loss": 0.2652, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1928527355194092, + "rewards/margins": 4.182677268981934, + "rewards/rejected": -2.989824056625366, + "step": 1063 + }, + { + "epoch": 1.36, + "learning_rate": 2.4761723113257826e-08, + "logits/chosen": -3.20835018157959, + "logits/rejected": -3.149214267730713, + "logps/chosen": -272.6499938964844, + "logps/rejected": -423.7852783203125, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.103729248046875, + "rewards/margins": 3.3680543899536133, + "rewards/rejected": -2.2643251419067383, + "step": 1064 + }, + { + "epoch": 1.36, + "learning_rate": 2.4672566549884005e-08, + "logits/chosen": -3.2326483726501465, + "logits/rejected": -3.1163551807403564, + "logps/chosen": -252.0284881591797, + "logps/rejected": -605.2437744140625, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5848426818847656, + "rewards/margins": 4.881565570831299, + "rewards/rejected": -3.296722650527954, + "step": 1065 + }, + { + "epoch": 1.36, + "learning_rate": 2.458351818061497e-08, + "logits/chosen": -3.2607779502868652, + "logits/rejected": -3.095210075378418, + "logps/chosen": -283.8885803222656, + "logps/rejected": -454.4396057128906, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3078124523162842, + "rewards/margins": 3.70357084274292, + "rewards/rejected": -2.3957581520080566, + "step": 1066 + }, + { + "epoch": 1.36, + "learning_rate": 2.4494578385848845e-08, + "logits/chosen": -3.260399341583252, + "logits/rejected": -3.1852035522460938, + "logps/chosen": -290.5274658203125, + "logps/rejected": -605.43310546875, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4524627923965454, + "rewards/margins": 5.270957946777344, + "rewards/rejected": -3.818495273590088, + "step": 1067 + }, + { + "epoch": 1.36, + "learning_rate": 2.4405747545519962e-08, + "logits/chosen": -3.2219953536987305, + "logits/rejected": -3.158257484436035, + "logps/chosen": -284.66864013671875, + "logps/rejected": -967.3613891601562, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4532928466796875, + "rewards/margins": 6.799777507781982, + "rewards/rejected": -5.346484661102295, + "step": 1068 + }, + { + "epoch": 1.36, + "learning_rate": 2.431702603909722e-08, + "logits/chosen": -3.282618284225464, + "logits/rejected": -3.157759428024292, + "logps/chosen": -261.303466796875, + "logps/rejected": -484.2860412597656, + "loss": 0.2954, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2737336158752441, + "rewards/margins": 3.746302843093872, + "rewards/rejected": -2.472569227218628, + "step": 1069 + }, + { + "epoch": 1.36, + "learning_rate": 2.422841424558244e-08, + "logits/chosen": -3.180455446243286, + "logits/rejected": -3.096881866455078, + "logps/chosen": -250.59231567382812, + "logps/rejected": -423.85565185546875, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.021125078201294, + "rewards/margins": 2.8839988708496094, + "rewards/rejected": -1.8628737926483154, + "step": 1070 + }, + { + "epoch": 1.37, + "learning_rate": 2.4139912543508833e-08, + "logits/chosen": -3.2635204792022705, + "logits/rejected": -3.069852590560913, + "logps/chosen": -273.4951477050781, + "logps/rejected": -548.0218505859375, + "loss": 0.2878, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4218826293945312, + "rewards/margins": 3.597790479660034, + "rewards/rejected": -2.175907850265503, + "step": 1071 + }, + { + "epoch": 1.37, + "learning_rate": 2.4051521310939256e-08, + "logits/chosen": -3.210991859436035, + "logits/rejected": -3.184643268585205, + "logps/chosen": -284.84698486328125, + "logps/rejected": -1023.9031982421875, + "loss": 0.2864, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.244776964187622, + "rewards/margins": 6.0110979080200195, + "rewards/rejected": -4.766321182250977, + "step": 1072 + }, + { + "epoch": 1.37, + "learning_rate": 2.3963240925464682e-08, + "logits/chosen": -3.291536808013916, + "logits/rejected": -3.110745429992676, + "logps/chosen": -262.9481201171875, + "logps/rejected": -288.1609191894531, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1366729736328125, + "rewards/margins": 2.495842695236206, + "rewards/rejected": -1.3591697216033936, + "step": 1073 + }, + { + "epoch": 1.37, + "learning_rate": 2.3875071764202558e-08, + "logits/chosen": -3.2199606895446777, + "logits/rejected": -3.166720390319824, + "logps/chosen": -247.7613525390625, + "logps/rejected": -409.1984558105469, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3543510437011719, + "rewards/margins": 3.407421350479126, + "rewards/rejected": -2.053070068359375, + "step": 1074 + }, + { + "epoch": 1.37, + "learning_rate": 2.378701420379524e-08, + "logits/chosen": -3.1599693298339844, + "logits/rejected": -3.1713271141052246, + "logps/chosen": -267.7906188964844, + "logps/rejected": -734.2332153320312, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3500763177871704, + "rewards/margins": 5.433462619781494, + "rewards/rejected": -4.083386421203613, + "step": 1075 + }, + { + "epoch": 1.37, + "learning_rate": 2.3699068620408304e-08, + "logits/chosen": -3.289135456085205, + "logits/rejected": -3.081291437149048, + "logps/chosen": -277.22021484375, + "logps/rejected": -2631.383056640625, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2471871376037598, + "rewards/margins": 10.842377662658691, + "rewards/rejected": -9.59519100189209, + "step": 1076 + }, + { + "epoch": 1.37, + "learning_rate": 2.3611235389728985e-08, + "logits/chosen": -3.2490973472595215, + "logits/rejected": -3.0295114517211914, + "logps/chosen": -240.15914916992188, + "logps/rejected": -572.6619262695312, + "loss": 0.3102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.200127363204956, + "rewards/margins": 3.494666576385498, + "rewards/rejected": -2.294538974761963, + "step": 1077 + }, + { + "epoch": 1.37, + "learning_rate": 2.352351488696457e-08, + "logits/chosen": -3.2684812545776367, + "logits/rejected": -3.1851072311401367, + "logps/chosen": -233.20724487304688, + "logps/rejected": -588.43359375, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1012687683105469, + "rewards/margins": 4.37136173248291, + "rewards/rejected": -3.2700929641723633, + "step": 1078 + }, + { + "epoch": 1.38, + "learning_rate": 2.343590748684082e-08, + "logits/chosen": -3.2586112022399902, + "logits/rejected": -3.1250972747802734, + "logps/chosen": -228.3115234375, + "logps/rejected": -910.02001953125, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0519462823867798, + "rewards/margins": 5.960140228271484, + "rewards/rejected": -4.908194065093994, + "step": 1079 + }, + { + "epoch": 1.38, + "learning_rate": 2.3348413563600322e-08, + "logits/chosen": -3.2685132026672363, + "logits/rejected": -3.1705355644226074, + "logps/chosen": -236.28065490722656, + "logps/rejected": -909.4041137695312, + "loss": 0.2781, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0074669122695923, + "rewards/margins": 6.2784905433654785, + "rewards/rejected": -5.271023750305176, + "step": 1080 + }, + { + "epoch": 1.38, + "learning_rate": 2.3261033491000902e-08, + "logits/chosen": -3.228053569793701, + "logits/rejected": -3.1691291332244873, + "logps/chosen": -268.4226379394531, + "logps/rejected": -348.35693359375, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3718795776367188, + "rewards/margins": 3.044783115386963, + "rewards/rejected": -1.6729034185409546, + "step": 1081 + }, + { + "epoch": 1.38, + "learning_rate": 2.317376764231403e-08, + "logits/chosen": -3.2286882400512695, + "logits/rejected": -3.101682186126709, + "logps/chosen": -304.7772216796875, + "logps/rejected": -535.20068359375, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.074920654296875, + "rewards/margins": 3.309872627258301, + "rewards/rejected": -2.2349517345428467, + "step": 1082 + }, + { + "epoch": 1.38, + "learning_rate": 2.308661639032328e-08, + "logits/chosen": -3.2675116062164307, + "logits/rejected": -3.2127676010131836, + "logps/chosen": -254.963134765625, + "logps/rejected": -728.94580078125, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3015503883361816, + "rewards/margins": 5.290594577789307, + "rewards/rejected": -3.989044189453125, + "step": 1083 + }, + { + "epoch": 1.38, + "learning_rate": 2.2999580107322654e-08, + "logits/chosen": -3.239975690841675, + "logits/rejected": -3.166964054107666, + "logps/chosen": -303.4548645019531, + "logps/rejected": -428.3599548339844, + "loss": 0.3207, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.308386206626892, + "rewards/margins": 3.5651140213012695, + "rewards/rejected": -2.256727695465088, + "step": 1084 + }, + { + "epoch": 1.38, + "learning_rate": 2.2912659165115023e-08, + "logits/chosen": -3.22774076461792, + "logits/rejected": -3.1265907287597656, + "logps/chosen": -263.88592529296875, + "logps/rejected": -985.5457153320312, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3204689025878906, + "rewards/margins": 6.915415287017822, + "rewards/rejected": -5.594946384429932, + "step": 1085 + }, + { + "epoch": 1.38, + "learning_rate": 2.2825853935010535e-08, + "logits/chosen": -3.174152374267578, + "logits/rejected": -3.1368300914764404, + "logps/chosen": -249.27186584472656, + "logps/rejected": -413.52838134765625, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9995574951171875, + "rewards/margins": 3.0430221557617188, + "rewards/rejected": -2.0434646606445312, + "step": 1086 + }, + { + "epoch": 1.39, + "learning_rate": 2.2739164787825095e-08, + "logits/chosen": -3.2021493911743164, + "logits/rejected": -3.106656312942505, + "logps/chosen": -273.2758483886719, + "logps/rejected": -802.1885986328125, + "loss": 0.2809, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4456665515899658, + "rewards/margins": 5.644995212554932, + "rewards/rejected": -4.199328899383545, + "step": 1087 + }, + { + "epoch": 1.39, + "learning_rate": 2.2652592093878663e-08, + "logits/chosen": -3.347712755203247, + "logits/rejected": -3.063741683959961, + "logps/chosen": -256.16961669921875, + "logps/rejected": -323.9251708984375, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4163048267364502, + "rewards/margins": 3.071086883544922, + "rewards/rejected": -1.6547820568084717, + "step": 1088 + }, + { + "epoch": 1.39, + "learning_rate": 2.256613622299376e-08, + "logits/chosen": -3.3184328079223633, + "logits/rejected": -3.1036455631256104, + "logps/chosen": -283.4802551269531, + "logps/rejected": -572.7686157226562, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4781219959259033, + "rewards/margins": 4.2811126708984375, + "rewards/rejected": -2.802990674972534, + "step": 1089 + }, + { + "epoch": 1.39, + "learning_rate": 2.2479797544493827e-08, + "logits/chosen": -3.31719708442688, + "logits/rejected": -3.124114751815796, + "logps/chosen": -244.60804748535156, + "logps/rejected": -399.0925598144531, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.280432105064392, + "rewards/margins": 3.364025831222534, + "rewards/rejected": -2.0835938453674316, + "step": 1090 + }, + { + "epoch": 1.39, + "learning_rate": 2.2393576427201748e-08, + "logits/chosen": -3.2986888885498047, + "logits/rejected": -3.217792272567749, + "logps/chosen": -261.81072998046875, + "logps/rejected": -616.5196533203125, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.049322485923767, + "rewards/margins": 4.615365982055664, + "rewards/rejected": -3.5660431385040283, + "step": 1091 + }, + { + "epoch": 1.39, + "learning_rate": 2.2307473239438153e-08, + "logits/chosen": -3.263214111328125, + "logits/rejected": -3.1893310546875, + "logps/chosen": -301.22576904296875, + "logps/rejected": -630.90576171875, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.472529649734497, + "rewards/margins": 4.852317810058594, + "rewards/rejected": -3.379788398742676, + "step": 1092 + }, + { + "epoch": 1.39, + "learning_rate": 2.22214883490199e-08, + "logits/chosen": -3.2726268768310547, + "logits/rejected": -3.147946357727051, + "logps/chosen": -241.0496063232422, + "logps/rejected": -645.2723999023438, + "loss": 0.3035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2498642206192017, + "rewards/margins": 5.277417182922363, + "rewards/rejected": -4.027552604675293, + "step": 1093 + }, + { + "epoch": 1.39, + "learning_rate": 2.2135622123258513e-08, + "logits/chosen": -3.2062978744506836, + "logits/rejected": -3.051084518432617, + "logps/chosen": -239.61529541015625, + "logps/rejected": -199.44610595703125, + "loss": 0.3218, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1157753467559814, + "rewards/margins": 2.2298049926757812, + "rewards/rejected": -1.1140296459197998, + "step": 1094 + }, + { + "epoch": 1.4, + "learning_rate": 2.2049874928958628e-08, + "logits/chosen": -3.202941417694092, + "logits/rejected": -3.104107618331909, + "logps/chosen": -281.8095703125, + "logps/rejected": -4265.4052734375, + "loss": 0.3209, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.260339379310608, + "rewards/margins": 6.163769721984863, + "rewards/rejected": -4.903430461883545, + "step": 1095 + }, + { + "epoch": 1.4, + "learning_rate": 2.196424713241637e-08, + "logits/chosen": -3.187743663787842, + "logits/rejected": -3.090437889099121, + "logps/chosen": -283.94696044921875, + "logps/rejected": -634.2294311523438, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.133387804031372, + "rewards/margins": 4.6009979248046875, + "rewards/rejected": -3.4676101207733154, + "step": 1096 + }, + { + "epoch": 1.4, + "learning_rate": 2.1878739099417802e-08, + "logits/chosen": -3.2181386947631836, + "logits/rejected": -3.0771138668060303, + "logps/chosen": -256.5408020019531, + "logps/rejected": -512.7520751953125, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0511620044708252, + "rewards/margins": 3.5786585807800293, + "rewards/rejected": -2.527496576309204, + "step": 1097 + }, + { + "epoch": 1.4, + "learning_rate": 2.1793351195237446e-08, + "logits/chosen": -3.2530126571655273, + "logits/rejected": -3.145893096923828, + "logps/chosen": -301.49761962890625, + "logps/rejected": -863.6259765625, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3006302118301392, + "rewards/margins": 5.147071838378906, + "rewards/rejected": -3.8464417457580566, + "step": 1098 + }, + { + "epoch": 1.4, + "learning_rate": 2.1708083784636588e-08, + "logits/chosen": -3.2018113136291504, + "logits/rejected": -3.0304088592529297, + "logps/chosen": -245.96456909179688, + "logps/rejected": -1337.6566162109375, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.307464599609375, + "rewards/margins": 6.922796726226807, + "rewards/rejected": -5.615332126617432, + "step": 1099 + }, + { + "epoch": 1.4, + "learning_rate": 2.162293723186182e-08, + "logits/chosen": -3.246537685394287, + "logits/rejected": -3.0528955459594727, + "logps/chosen": -254.78414916992188, + "logps/rejected": -1119.4459228515625, + "loss": 0.2905, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0821328163146973, + "rewards/margins": 5.691160202026367, + "rewards/rejected": -4.609026908874512, + "step": 1100 + }, + { + "epoch": 1.4, + "learning_rate": 2.1537911900643425e-08, + "logits/chosen": -3.2226972579956055, + "logits/rejected": -3.186953067779541, + "logps/chosen": -263.22943115234375, + "logps/rejected": -765.3766479492188, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.171607255935669, + "rewards/margins": 5.907636642456055, + "rewards/rejected": -4.736029148101807, + "step": 1101 + }, + { + "epoch": 1.4, + "learning_rate": 2.1453008154193904e-08, + "logits/chosen": -3.219895124435425, + "logits/rejected": -3.0844345092773438, + "logps/chosen": -272.832275390625, + "logps/rejected": -481.68414306640625, + "loss": 0.3586, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.34100341796875, + "rewards/margins": 4.137528419494629, + "rewards/rejected": -2.7965247631073, + "step": 1102 + }, + { + "epoch": 1.41, + "learning_rate": 2.136822635520632e-08, + "logits/chosen": -3.2792346477508545, + "logits/rejected": -3.080143451690674, + "logps/chosen": -241.98294067382812, + "logps/rejected": -524.8260498046875, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4679367542266846, + "rewards/margins": 4.067396640777588, + "rewards/rejected": -2.5994598865509033, + "step": 1103 + }, + { + "epoch": 1.41, + "learning_rate": 2.128356686585282e-08, + "logits/chosen": -3.214733839035034, + "logits/rejected": -3.217578887939453, + "logps/chosen": -272.90362548828125, + "logps/rejected": -673.2615966796875, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2106605768203735, + "rewards/margins": 4.367737770080566, + "rewards/rejected": -3.1570770740509033, + "step": 1104 + }, + { + "epoch": 1.41, + "learning_rate": 2.119903004778304e-08, + "logits/chosen": -3.2910308837890625, + "logits/rejected": -3.063333749771118, + "logps/chosen": -291.2593994140625, + "logps/rejected": -447.3338928222656, + "loss": 0.2822, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.247308373451233, + "rewards/margins": 3.319340467453003, + "rewards/rejected": -2.0720322132110596, + "step": 1105 + }, + { + "epoch": 1.41, + "learning_rate": 2.1114616262122648e-08, + "logits/chosen": -3.3245935440063477, + "logits/rejected": -3.097428321838379, + "logps/chosen": -270.33148193359375, + "logps/rejected": -512.1995849609375, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3813873529434204, + "rewards/margins": 3.899624824523926, + "rewards/rejected": -2.518237352371216, + "step": 1106 + }, + { + "epoch": 1.41, + "learning_rate": 2.1030325869471682e-08, + "logits/chosen": -3.1421923637390137, + "logits/rejected": -3.1420631408691406, + "logps/chosen": -277.0576477050781, + "logps/rejected": -1019.7950439453125, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.345862627029419, + "rewards/margins": 7.055051803588867, + "rewards/rejected": -5.709188938140869, + "step": 1107 + }, + { + "epoch": 1.41, + "learning_rate": 2.0946159229903088e-08, + "logits/chosen": -3.2353148460388184, + "logits/rejected": -3.193436861038208, + "logps/chosen": -258.4212646484375, + "logps/rejected": -692.2122802734375, + "loss": 0.2823, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3094162940979004, + "rewards/margins": 5.879203796386719, + "rewards/rejected": -4.569787979125977, + "step": 1108 + }, + { + "epoch": 1.41, + "learning_rate": 2.0862116702961145e-08, + "logits/chosen": -3.2597250938415527, + "logits/rejected": -3.2108449935913086, + "logps/chosen": -277.4127197265625, + "logps/rejected": -367.4320373535156, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485844373703003, + "rewards/margins": 3.4096786975860596, + "rewards/rejected": -1.9238343238830566, + "step": 1109 + }, + { + "epoch": 1.41, + "learning_rate": 2.077819864766e-08, + "logits/chosen": -3.2197108268737793, + "logits/rejected": -3.1524195671081543, + "logps/chosen": -275.687255859375, + "logps/rejected": -448.5677795410156, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.383815050125122, + "rewards/margins": 3.51918888092041, + "rewards/rejected": -2.135373830795288, + "step": 1110 + }, + { + "epoch": 1.42, + "learning_rate": 2.069440542248202e-08, + "logits/chosen": -3.178821563720703, + "logits/rejected": -3.025801420211792, + "logps/chosen": -272.76702880859375, + "logps/rejected": -664.7799072265625, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3847954273223877, + "rewards/margins": 4.322752952575684, + "rewards/rejected": -2.937957763671875, + "step": 1111 + }, + { + "epoch": 1.42, + "learning_rate": 2.0610737385376347e-08, + "logits/chosen": -3.2721335887908936, + "logits/rejected": -3.160116195678711, + "logps/chosen": -256.4518127441406, + "logps/rejected": -832.0401611328125, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2349830865859985, + "rewards/margins": 5.612221717834473, + "rewards/rejected": -4.3772382736206055, + "step": 1112 + }, + { + "epoch": 1.42, + "learning_rate": 2.052719489375732e-08, + "logits/chosen": -3.234649419784546, + "logits/rejected": -3.158076286315918, + "logps/chosen": -225.6110076904297, + "logps/rejected": -531.630859375, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2599601745605469, + "rewards/margins": 5.135463714599609, + "rewards/rejected": -3.8755035400390625, + "step": 1113 + }, + { + "epoch": 1.42, + "learning_rate": 2.0443778304503024e-08, + "logits/chosen": -3.2148656845092773, + "logits/rejected": -3.1635046005249023, + "logps/chosen": -290.88385009765625, + "logps/rejected": -656.77880859375, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2924057245254517, + "rewards/margins": 4.810752868652344, + "rewards/rejected": -3.5183472633361816, + "step": 1114 + }, + { + "epoch": 1.42, + "learning_rate": 2.0360487973953666e-08, + "logits/chosen": -3.23931884765625, + "logits/rejected": -2.9596028327941895, + "logps/chosen": -254.431884765625, + "logps/rejected": -1715.524169921875, + "loss": 0.275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5498077869415283, + "rewards/margins": 9.178372383117676, + "rewards/rejected": -7.628564834594727, + "step": 1115 + }, + { + "epoch": 1.42, + "learning_rate": 2.0277324257910105e-08, + "logits/chosen": -3.262730598449707, + "logits/rejected": -3.1105475425720215, + "logps/chosen": -283.0797119140625, + "logps/rejected": -650.76904296875, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0591659545898438, + "rewards/margins": 4.118786811828613, + "rewards/rejected": -3.0596206188201904, + "step": 1116 + }, + { + "epoch": 1.42, + "learning_rate": 2.0194287511632323e-08, + "logits/chosen": -3.2885375022888184, + "logits/rejected": -3.155815601348877, + "logps/chosen": -259.19970703125, + "logps/rejected": -403.0566711425781, + "loss": 0.2633, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5206215381622314, + "rewards/margins": 3.3662562370300293, + "rewards/rejected": -1.8456344604492188, + "step": 1117 + }, + { + "epoch": 1.43, + "learning_rate": 2.0111378089837954e-08, + "logits/chosen": -3.243501663208008, + "logits/rejected": -3.2142696380615234, + "logps/chosen": -247.74801635742188, + "logps/rejected": -406.6148376464844, + "loss": 0.2746, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3440048694610596, + "rewards/margins": 3.635446310043335, + "rewards/rejected": -2.2914414405822754, + "step": 1118 + }, + { + "epoch": 1.43, + "learning_rate": 2.0028596346700664e-08, + "logits/chosen": -3.2287254333496094, + "logits/rejected": -3.092288017272949, + "logps/chosen": -315.4478759765625, + "logps/rejected": -561.9969482421875, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1874878406524658, + "rewards/margins": 3.9418764114379883, + "rewards/rejected": -2.7543883323669434, + "step": 1119 + }, + { + "epoch": 1.43, + "learning_rate": 1.9945942635848744e-08, + "logits/chosen": -3.226651668548584, + "logits/rejected": -3.1519217491149902, + "logps/chosen": -261.4826354980469, + "logps/rejected": -550.6581420898438, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.236127495765686, + "rewards/margins": 4.192839622497559, + "rewards/rejected": -2.956712245941162, + "step": 1120 + }, + { + "epoch": 1.43, + "learning_rate": 1.9863417310363527e-08, + "logits/chosen": -3.2813942432403564, + "logits/rejected": -3.170844554901123, + "logps/chosen": -252.10931396484375, + "logps/rejected": -626.7388916015625, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3399070501327515, + "rewards/margins": 4.536473751068115, + "rewards/rejected": -3.1965668201446533, + "step": 1121 + }, + { + "epoch": 1.43, + "learning_rate": 1.978102072277791e-08, + "logits/chosen": -3.3112478256225586, + "logits/rejected": -3.1730265617370605, + "logps/chosen": -259.7835693359375, + "logps/rejected": -539.85009765625, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.003819227218628, + "rewards/margins": 3.3940749168395996, + "rewards/rejected": -2.3902556896209717, + "step": 1122 + }, + { + "epoch": 1.43, + "learning_rate": 1.9698753225074882e-08, + "logits/chosen": -3.2848286628723145, + "logits/rejected": -3.101972818374634, + "logps/chosen": -296.4232482910156, + "logps/rejected": -940.7413940429688, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9947280883789062, + "rewards/margins": 5.920192241668701, + "rewards/rejected": -4.925464153289795, + "step": 1123 + }, + { + "epoch": 1.43, + "learning_rate": 1.961661516868594e-08, + "logits/chosen": -3.224972724914551, + "logits/rejected": -3.1321778297424316, + "logps/chosen": -266.8641052246094, + "logps/rejected": -1155.8858642578125, + "loss": 0.2596, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0471924543380737, + "rewards/margins": 7.242712497711182, + "rewards/rejected": -6.195520401000977, + "step": 1124 + }, + { + "epoch": 1.43, + "learning_rate": 1.9534606904489647e-08, + "logits/chosen": -3.2450954914093018, + "logits/rejected": -3.1284713745117188, + "logps/chosen": -241.31341552734375, + "logps/rejected": -646.3194580078125, + "loss": 0.2719, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4642150402069092, + "rewards/margins": 4.8317718505859375, + "rewards/rejected": -3.3675568103790283, + "step": 1125 + }, + { + "epoch": 1.44, + "learning_rate": 1.9452728782810107e-08, + "logits/chosen": -3.246791362762451, + "logits/rejected": -3.077686309814453, + "logps/chosen": -260.0072021484375, + "logps/rejected": -416.1474914550781, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.333221435546875, + "rewards/margins": 3.304064989089966, + "rewards/rejected": -1.9708435535430908, + "step": 1126 + }, + { + "epoch": 1.44, + "learning_rate": 1.9370981153415523e-08, + "logits/chosen": -3.236778497695923, + "logits/rejected": -2.9420220851898193, + "logps/chosen": -266.0011291503906, + "logps/rejected": -677.1834716796875, + "loss": 0.3129, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0927948951721191, + "rewards/margins": 4.231744766235352, + "rewards/rejected": -3.1389498710632324, + "step": 1127 + }, + { + "epoch": 1.44, + "learning_rate": 1.928936436551661e-08, + "logits/chosen": -3.2923049926757812, + "logits/rejected": -3.1557278633117676, + "logps/chosen": -270.14471435546875, + "logps/rejected": -776.8833618164062, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2394577264785767, + "rewards/margins": 5.041780471801758, + "rewards/rejected": -3.8023223876953125, + "step": 1128 + }, + { + "epoch": 1.44, + "learning_rate": 1.9207878767765162e-08, + "logits/chosen": -3.2499001026153564, + "logits/rejected": -3.159548282623291, + "logps/chosen": -228.69158935546875, + "logps/rejected": -765.9482421875, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.090986728668213, + "rewards/margins": 5.589727878570557, + "rewards/rejected": -4.498741149902344, + "step": 1129 + }, + { + "epoch": 1.44, + "learning_rate": 1.9126524708252555e-08, + "logits/chosen": -3.2185091972351074, + "logits/rejected": -3.0722761154174805, + "logps/chosen": -250.2293701171875, + "logps/rejected": -1629.2691650390625, + "loss": 0.2673, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2666412591934204, + "rewards/margins": 8.249588012695312, + "rewards/rejected": -6.982946872711182, + "step": 1130 + }, + { + "epoch": 1.44, + "learning_rate": 1.9045302534508296e-08, + "logits/chosen": -3.2136549949645996, + "logits/rejected": -3.0769214630126953, + "logps/chosen": -297.22076416015625, + "logps/rejected": -809.3461303710938, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3749557733535767, + "rewards/margins": 5.400351047515869, + "rewards/rejected": -4.025395393371582, + "step": 1131 + }, + { + "epoch": 1.44, + "learning_rate": 1.8964212593498442e-08, + "logits/chosen": -3.230653762817383, + "logits/rejected": -3.1271116733551025, + "logps/chosen": -283.5120544433594, + "logps/rejected": -892.6417236328125, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4113922119140625, + "rewards/margins": 6.6139984130859375, + "rewards/rejected": -5.202606201171875, + "step": 1132 + }, + { + "epoch": 1.44, + "learning_rate": 1.8883255231624213e-08, + "logits/chosen": -3.2298460006713867, + "logits/rejected": -3.159494161605835, + "logps/chosen": -285.97332763671875, + "logps/rejected": -620.3023681640625, + "loss": 0.2867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3400115966796875, + "rewards/margins": 4.514739990234375, + "rewards/rejected": -3.1747283935546875, + "step": 1133 + }, + { + "epoch": 1.45, + "learning_rate": 1.8802430794720454e-08, + "logits/chosen": -3.2939984798431396, + "logits/rejected": -3.1381120681762695, + "logps/chosen": -242.79225158691406, + "logps/rejected": -383.0774230957031, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.268286943435669, + "rewards/margins": 3.5026116371154785, + "rewards/rejected": -2.2343246936798096, + "step": 1134 + }, + { + "epoch": 1.45, + "learning_rate": 1.8721739628054185e-08, + "logits/chosen": -3.220874547958374, + "logits/rejected": -3.0693554878234863, + "logps/chosen": -249.46456909179688, + "logps/rejected": -1333.040771484375, + "loss": 0.2811, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1733101606369019, + "rewards/margins": 6.94575309753418, + "rewards/rejected": -5.772442817687988, + "step": 1135 + }, + { + "epoch": 1.45, + "learning_rate": 1.8641182076323148e-08, + "logits/chosen": -3.2199511528015137, + "logits/rejected": -3.0570502281188965, + "logps/chosen": -269.6544494628906, + "logps/rejected": -371.4897766113281, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.321058750152588, + "rewards/margins": 2.692296028137207, + "rewards/rejected": -1.3712371587753296, + "step": 1136 + }, + { + "epoch": 1.45, + "learning_rate": 1.856075848365427e-08, + "logits/chosen": -3.255009174346924, + "logits/rejected": -3.0959091186523438, + "logps/chosen": -261.7792663574219, + "logps/rejected": -757.82080078125, + "loss": 0.2855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4148651361465454, + "rewards/margins": 5.789712905883789, + "rewards/rejected": -4.374847412109375, + "step": 1137 + }, + { + "epoch": 1.45, + "learning_rate": 1.848046919360225e-08, + "logits/chosen": -3.269260883331299, + "logits/rejected": -3.1570141315460205, + "logps/chosen": -256.5840759277344, + "logps/rejected": -554.1776733398438, + "loss": 0.3111, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1679368019104004, + "rewards/margins": 4.273287773132324, + "rewards/rejected": -3.105351209640503, + "step": 1138 + }, + { + "epoch": 1.45, + "learning_rate": 1.840031454914804e-08, + "logits/chosen": -3.175614833831787, + "logits/rejected": -3.020543098449707, + "logps/chosen": -267.58966064453125, + "logps/rejected": -625.4298095703125, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.220484972000122, + "rewards/margins": 4.538569927215576, + "rewards/rejected": -3.318084716796875, + "step": 1139 + }, + { + "epoch": 1.45, + "learning_rate": 1.8320294892697475e-08, + "logits/chosen": -3.2618141174316406, + "logits/rejected": -3.1621646881103516, + "logps/chosen": -301.5961608886719, + "logps/rejected": -799.6797485351562, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0062286853790283, + "rewards/margins": 5.270834445953369, + "rewards/rejected": -4.264605522155762, + "step": 1140 + }, + { + "epoch": 1.45, + "learning_rate": 1.8240410566079684e-08, + "logits/chosen": -3.2517316341400146, + "logits/rejected": -3.23404598236084, + "logps/chosen": -267.994384765625, + "logps/rejected": -776.7867431640625, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3194351196289062, + "rewards/margins": 5.985896587371826, + "rewards/rejected": -4.666460990905762, + "step": 1141 + }, + { + "epoch": 1.46, + "learning_rate": 1.8160661910545715e-08, + "logits/chosen": -3.1617846488952637, + "logits/rejected": -3.1406445503234863, + "logps/chosen": -273.2911376953125, + "logps/rejected": -715.8590087890625, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2905585765838623, + "rewards/margins": 4.985907554626465, + "rewards/rejected": -3.6953492164611816, + "step": 1142 + }, + { + "epoch": 1.46, + "learning_rate": 1.8081049266767045e-08, + "logits/chosen": -3.3229331970214844, + "logits/rejected": -3.17446231842041, + "logps/chosen": -236.9075164794922, + "logps/rejected": -792.4066162109375, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3261902332305908, + "rewards/margins": 5.883657932281494, + "rewards/rejected": -4.557467460632324, + "step": 1143 + }, + { + "epoch": 1.46, + "learning_rate": 1.8001572974834166e-08, + "logits/chosen": -3.2522289752960205, + "logits/rejected": -3.19093656539917, + "logps/chosen": -250.35391235351562, + "logps/rejected": -745.8955078125, + "loss": 0.2524, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.221379041671753, + "rewards/margins": 5.6707563400268555, + "rewards/rejected": -4.449377536773682, + "step": 1144 + }, + { + "epoch": 1.46, + "learning_rate": 1.7922233374255074e-08, + "logits/chosen": -3.2924880981445312, + "logits/rejected": -3.1450235843658447, + "logps/chosen": -265.368896484375, + "logps/rejected": -601.65478515625, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.305171251296997, + "rewards/margins": 4.2244553565979, + "rewards/rejected": -2.9192841053009033, + "step": 1145 + }, + { + "epoch": 1.46, + "learning_rate": 1.7843030803953834e-08, + "logits/chosen": -3.2815494537353516, + "logits/rejected": -3.1258435249328613, + "logps/chosen": -265.7127685546875, + "logps/rejected": -693.8712158203125, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3355622291564941, + "rewards/margins": 4.9109697341918945, + "rewards/rejected": -3.5754075050354004, + "step": 1146 + }, + { + "epoch": 1.46, + "learning_rate": 1.776396560226915e-08, + "logits/chosen": -3.251325845718384, + "logits/rejected": -3.1842854022979736, + "logps/chosen": -237.32089233398438, + "logps/rejected": -819.124267578125, + "loss": 0.2759, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2220710515975952, + "rewards/margins": 5.857639312744141, + "rewards/rejected": -4.635568618774414, + "step": 1147 + }, + { + "epoch": 1.46, + "learning_rate": 1.768503810695295e-08, + "logits/chosen": -3.2592453956604004, + "logits/rejected": -3.163264274597168, + "logps/chosen": -248.30499267578125, + "logps/rejected": -379.3896789550781, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.056481957435608, + "rewards/margins": 3.418750047683716, + "rewards/rejected": -2.3622682094573975, + "step": 1148 + }, + { + "epoch": 1.46, + "learning_rate": 1.760624865516886e-08, + "logits/chosen": -3.2920026779174805, + "logits/rejected": -3.1057052612304688, + "logps/chosen": -223.04568481445312, + "logps/rejected": -673.01171875, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5502235889434814, + "rewards/margins": 4.398331642150879, + "rewards/rejected": -2.8481080532073975, + "step": 1149 + }, + { + "epoch": 1.47, + "learning_rate": 1.7527597583490822e-08, + "logits/chosen": -3.2345027923583984, + "logits/rejected": -3.1690454483032227, + "logps/chosen": -262.8484191894531, + "logps/rejected": -779.3536987304688, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3843094110488892, + "rewards/margins": 5.112083435058594, + "rewards/rejected": -3.727774143218994, + "step": 1150 + }, + { + "epoch": 1.47, + "learning_rate": 1.744908522790165e-08, + "logits/chosen": -3.271864175796509, + "logits/rejected": -3.1356639862060547, + "logps/chosen": -275.8346862792969, + "logps/rejected": -594.9804077148438, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3534401655197144, + "rewards/margins": 4.497931480407715, + "rewards/rejected": -3.144491672515869, + "step": 1151 + }, + { + "epoch": 1.47, + "learning_rate": 1.7370711923791564e-08, + "logits/chosen": -3.3028483390808105, + "logits/rejected": -3.1820387840270996, + "logps/chosen": -279.63275146484375, + "logps/rejected": -658.4431762695312, + "loss": 0.2848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.349642276763916, + "rewards/margins": 4.824191093444824, + "rewards/rejected": -3.47454833984375, + "step": 1152 + }, + { + "epoch": 1.47, + "learning_rate": 1.7292478005956846e-08, + "logits/chosen": -3.242203712463379, + "logits/rejected": -3.202812671661377, + "logps/chosen": -270.16009521484375, + "logps/rejected": -810.2015380859375, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.174140214920044, + "rewards/margins": 6.066709518432617, + "rewards/rejected": -4.892569065093994, + "step": 1153 + }, + { + "epoch": 1.47, + "learning_rate": 1.7214383808598282e-08, + "logits/chosen": -3.2501273155212402, + "logits/rejected": -3.169975519180298, + "logps/chosen": -240.11453247070312, + "logps/rejected": -641.7176513671875, + "loss": 0.2709, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2894668579101562, + "rewards/margins": 4.587144374847412, + "rewards/rejected": -3.297677516937256, + "step": 1154 + }, + { + "epoch": 1.47, + "learning_rate": 1.713642966531982e-08, + "logits/chosen": -3.223076820373535, + "logits/rejected": -3.1272034645080566, + "logps/chosen": -292.03790283203125, + "logps/rejected": -705.8579711914062, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4246841669082642, + "rewards/margins": 4.934288024902344, + "rewards/rejected": -3.509603977203369, + "step": 1155 + }, + { + "epoch": 1.47, + "learning_rate": 1.70586159091271e-08, + "logits/chosen": -3.263260841369629, + "logits/rejected": -3.132633686065674, + "logps/chosen": -256.509033203125, + "logps/rejected": -458.30084228515625, + "loss": 0.2798, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2676254510879517, + "rewards/margins": 3.546539306640625, + "rewards/rejected": -2.278913974761963, + "step": 1156 + }, + { + "epoch": 1.47, + "learning_rate": 1.698094287242611e-08, + "logits/chosen": -3.3048224449157715, + "logits/rejected": -3.087355613708496, + "logps/chosen": -244.3580322265625, + "logps/rejected": -881.3289794921875, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2154381275177002, + "rewards/margins": 4.988758087158203, + "rewards/rejected": -3.773320198059082, + "step": 1157 + }, + { + "epoch": 1.48, + "learning_rate": 1.6903410887021675e-08, + "logits/chosen": -3.2654755115509033, + "logits/rejected": -3.0697782039642334, + "logps/chosen": -254.9468536376953, + "logps/rejected": -1275.7310791015625, + "loss": 0.27, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.273646593093872, + "rewards/margins": 6.472296714782715, + "rewards/rejected": -5.198649883270264, + "step": 1158 + }, + { + "epoch": 1.48, + "learning_rate": 1.682602028411606e-08, + "logits/chosen": -3.2790584564208984, + "logits/rejected": -3.1054039001464844, + "logps/chosen": -229.07650756835938, + "logps/rejected": -385.3168640136719, + "loss": 0.3212, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3235504627227783, + "rewards/margins": 3.2490479946136475, + "rewards/rejected": -1.9254975318908691, + "step": 1159 + }, + { + "epoch": 1.48, + "learning_rate": 1.674877139430758e-08, + "logits/chosen": -3.1980385780334473, + "logits/rejected": -3.0881612300872803, + "logps/chosen": -278.4991760253906, + "logps/rejected": -1325.916259765625, + "loss": 0.2801, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2521499395370483, + "rewards/margins": 7.31649923324585, + "rewards/rejected": -6.064349174499512, + "step": 1160 + }, + { + "epoch": 1.48, + "learning_rate": 1.6671664547589215e-08, + "logits/chosen": -3.236575126647949, + "logits/rejected": -3.1609811782836914, + "logps/chosen": -272.4652099609375, + "logps/rejected": -561.5181884765625, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0253692865371704, + "rewards/margins": 3.80126953125, + "rewards/rejected": -2.775900363922119, + "step": 1161 + }, + { + "epoch": 1.48, + "learning_rate": 1.6594700073347112e-08, + "logits/chosen": -3.268357753753662, + "logits/rejected": -3.152733087539673, + "logps/chosen": -246.3203125, + "logps/rejected": -723.1522216796875, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2053558826446533, + "rewards/margins": 5.414987564086914, + "rewards/rejected": -4.209631443023682, + "step": 1162 + }, + { + "epoch": 1.48, + "learning_rate": 1.6517878300359256e-08, + "logits/chosen": -3.266808032989502, + "logits/rejected": -3.0425753593444824, + "logps/chosen": -241.72576904296875, + "logps/rejected": -1230.178955078125, + "loss": 0.283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8636566400527954, + "rewards/margins": 6.088693618774414, + "rewards/rejected": -5.22503662109375, + "step": 1163 + }, + { + "epoch": 1.48, + "learning_rate": 1.6441199556794033e-08, + "logits/chosen": -3.194068431854248, + "logits/rejected": -3.082955837249756, + "logps/chosen": -258.03857421875, + "logps/rejected": -586.2860107421875, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3356208801269531, + "rewards/margins": 4.36821985244751, + "rewards/rejected": -3.0325989723205566, + "step": 1164 + }, + { + "epoch": 1.49, + "learning_rate": 1.6364664170208812e-08, + "logits/chosen": -3.215756893157959, + "logits/rejected": -3.1774978637695312, + "logps/chosen": -249.8394317626953, + "logps/rejected": -611.9830322265625, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.249781847000122, + "rewards/margins": 4.460182189941406, + "rewards/rejected": -3.2104005813598633, + "step": 1165 + }, + { + "epoch": 1.49, + "learning_rate": 1.6288272467548632e-08, + "logits/chosen": -3.2496840953826904, + "logits/rejected": -3.1323671340942383, + "logps/chosen": -229.0006103515625, + "logps/rejected": -638.95458984375, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.206865668296814, + "rewards/margins": 4.577669143676758, + "rewards/rejected": -3.3708038330078125, + "step": 1166 + }, + { + "epoch": 1.49, + "learning_rate": 1.6212024775144666e-08, + "logits/chosen": -3.2796263694763184, + "logits/rejected": -3.2445077896118164, + "logps/chosen": -239.88487243652344, + "logps/rejected": -654.3489379882812, + "loss": 0.2522, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2354583740234375, + "rewards/margins": 5.13059139251709, + "rewards/rejected": -3.895132541656494, + "step": 1167 + }, + { + "epoch": 1.49, + "learning_rate": 1.6135921418712954e-08, + "logits/chosen": -3.254077434539795, + "logits/rejected": -3.0551936626434326, + "logps/chosen": -235.46177673339844, + "logps/rejected": -574.781982421875, + "loss": 0.2724, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3939728736877441, + "rewards/margins": 4.026034355163574, + "rewards/rejected": -2.632061719894409, + "step": 1168 + }, + { + "epoch": 1.49, + "learning_rate": 1.605996272335291e-08, + "logits/chosen": -3.2243971824645996, + "logits/rejected": -3.104043960571289, + "logps/chosen": -266.6265563964844, + "logps/rejected": -673.5126953125, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.022088646888733, + "rewards/margins": 4.424066543579102, + "rewards/rejected": -3.4019775390625, + "step": 1169 + }, + { + "epoch": 1.49, + "learning_rate": 1.5984149013546046e-08, + "logits/chosen": -3.184086799621582, + "logits/rejected": -3.1527435779571533, + "logps/chosen": -241.87533569335938, + "logps/rejected": -720.32763671875, + "loss": 0.2572, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5248451232910156, + "rewards/margins": 5.537507057189941, + "rewards/rejected": -4.012661933898926, + "step": 1170 + }, + { + "epoch": 1.49, + "learning_rate": 1.5908480613154468e-08, + "logits/chosen": -3.2909317016601562, + "logits/rejected": -3.2182531356811523, + "logps/chosen": -262.49310302734375, + "logps/rejected": -568.6566162109375, + "loss": 0.298, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.169642686843872, + "rewards/margins": 4.4709153175354, + "rewards/rejected": -3.3012726306915283, + "step": 1171 + }, + { + "epoch": 1.49, + "learning_rate": 1.583295784541958e-08, + "logits/chosen": -3.250596284866333, + "logits/rejected": -2.990060329437256, + "logps/chosen": -275.1548767089844, + "logps/rejected": -1172.4879150390625, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2111831903457642, + "rewards/margins": 6.010459899902344, + "rewards/rejected": -4.799276828765869, + "step": 1172 + }, + { + "epoch": 1.5, + "learning_rate": 1.5757581032960636e-08, + "logits/chosen": -3.2983784675598145, + "logits/rejected": -3.134446144104004, + "logps/chosen": -250.91148376464844, + "logps/rejected": -535.0380249023438, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2774155139923096, + "rewards/margins": 4.353299140930176, + "rewards/rejected": -3.075883388519287, + "step": 1173 + }, + { + "epoch": 1.5, + "learning_rate": 1.568235049777345e-08, + "logits/chosen": -3.2236948013305664, + "logits/rejected": -3.1193437576293945, + "logps/chosen": -259.6990051269531, + "logps/rejected": -771.5838012695312, + "loss": 0.299, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1158729791641235, + "rewards/margins": 5.23430871963501, + "rewards/rejected": -4.118435859680176, + "step": 1174 + }, + { + "epoch": 1.5, + "learning_rate": 1.560726656122892e-08, + "logits/chosen": -3.205087661743164, + "logits/rejected": -3.1423773765563965, + "logps/chosen": -258.2913818359375, + "logps/rejected": -770.8612060546875, + "loss": 0.263, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4114456176757812, + "rewards/margins": 5.60085916519165, + "rewards/rejected": -4.189413547515869, + "step": 1175 + }, + { + "epoch": 1.5, + "learning_rate": 1.553232954407171e-08, + "logits/chosen": -3.2673439979553223, + "logits/rejected": -3.140573501586914, + "logps/chosen": -249.3372344970703, + "logps/rejected": -313.130126953125, + "loss": 0.2867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4669480323791504, + "rewards/margins": 3.337602138519287, + "rewards/rejected": -1.8706543445587158, + "step": 1176 + }, + { + "epoch": 1.5, + "learning_rate": 1.5457539766418887e-08, + "logits/chosen": -3.274385452270508, + "logits/rejected": -3.087261199951172, + "logps/chosen": -301.7484436035156, + "logps/rejected": -957.4783935546875, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3449203968048096, + "rewards/margins": 6.288740158081055, + "rewards/rejected": -4.943819999694824, + "step": 1177 + }, + { + "epoch": 1.5, + "learning_rate": 1.5382897547758512e-08, + "logits/chosen": -3.215221405029297, + "logits/rejected": -3.1629698276519775, + "logps/chosen": -297.54815673828125, + "logps/rejected": -767.7359619140625, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.456312656402588, + "rewards/margins": 5.389701843261719, + "rewards/rejected": -3.933389186859131, + "step": 1178 + }, + { + "epoch": 1.5, + "learning_rate": 1.5308403206948344e-08, + "logits/chosen": -3.2478485107421875, + "logits/rejected": -3.1335911750793457, + "logps/chosen": -264.40411376953125, + "logps/rejected": -492.61505126953125, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.278668999671936, + "rewards/margins": 3.831711530685425, + "rewards/rejected": -2.5530426502227783, + "step": 1179 + }, + { + "epoch": 1.5, + "learning_rate": 1.52340570622144e-08, + "logits/chosen": -3.2676994800567627, + "logits/rejected": -3.0957205295562744, + "logps/chosen": -240.44760131835938, + "logps/rejected": -832.03759765625, + "loss": 0.273, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.136322021484375, + "rewards/margins": 5.109631538391113, + "rewards/rejected": -3.9733095169067383, + "step": 1180 + }, + { + "epoch": 1.51, + "learning_rate": 1.5159859431149636e-08, + "logits/chosen": -3.230931043624878, + "logits/rejected": -3.126330852508545, + "logps/chosen": -271.2381591796875, + "logps/rejected": -703.4635620117188, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.394709825515747, + "rewards/margins": 4.7331438064575195, + "rewards/rejected": -3.3384339809417725, + "step": 1181 + }, + { + "epoch": 1.51, + "learning_rate": 1.508581063071258e-08, + "logits/chosen": -3.3403677940368652, + "logits/rejected": -3.2063591480255127, + "logps/chosen": -251.40621948242188, + "logps/rejected": -716.107421875, + "loss": 0.2894, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2266793251037598, + "rewards/margins": 5.292313575744629, + "rewards/rejected": -4.065634250640869, + "step": 1182 + }, + { + "epoch": 1.51, + "learning_rate": 1.5011910977226017e-08, + "logits/chosen": -3.1942291259765625, + "logits/rejected": -3.102328300476074, + "logps/chosen": -266.08270263671875, + "logps/rejected": -493.3570556640625, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0867599248886108, + "rewards/margins": 3.77693510055542, + "rewards/rejected": -2.6901750564575195, + "step": 1183 + }, + { + "epoch": 1.51, + "learning_rate": 1.493816078637557e-08, + "logits/chosen": -3.266716241836548, + "logits/rejected": -3.0749659538269043, + "logps/chosen": -277.9430847167969, + "logps/rejected": -980.4901123046875, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4256348609924316, + "rewards/margins": 5.87245512008667, + "rewards/rejected": -4.446820259094238, + "step": 1184 + }, + { + "epoch": 1.51, + "learning_rate": 1.4864560373208396e-08, + "logits/chosen": -3.2511727809906006, + "logits/rejected": -3.106907844543457, + "logps/chosen": -278.89080810546875, + "logps/rejected": -579.4039306640625, + "loss": 0.3112, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5077804327011108, + "rewards/margins": 4.162411689758301, + "rewards/rejected": -2.6546311378479004, + "step": 1185 + }, + { + "epoch": 1.51, + "learning_rate": 1.47911100521318e-08, + "logits/chosen": -3.243117570877075, + "logits/rejected": -3.205867290496826, + "logps/chosen": -261.71282958984375, + "logps/rejected": -494.09918212890625, + "loss": 0.3259, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3355109691619873, + "rewards/margins": 4.606550216674805, + "rewards/rejected": -3.271038770675659, + "step": 1186 + }, + { + "epoch": 1.51, + "learning_rate": 1.4717810136911996e-08, + "logits/chosen": -3.2733283042907715, + "logits/rejected": -3.0599594116210938, + "logps/chosen": -280.6305236816406, + "logps/rejected": -568.1295166015625, + "loss": 0.325, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4511626958847046, + "rewards/margins": 4.0325775146484375, + "rewards/rejected": -2.5814149379730225, + "step": 1187 + }, + { + "epoch": 1.51, + "learning_rate": 1.4644660940672625e-08, + "logits/chosen": -3.2263689041137695, + "logits/rejected": -3.150958776473999, + "logps/chosen": -258.6033935546875, + "logps/rejected": -907.490234375, + "loss": 0.2752, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1195244789123535, + "rewards/margins": 5.0268425941467285, + "rewards/rejected": -3.907318115234375, + "step": 1188 + }, + { + "epoch": 1.52, + "learning_rate": 1.4571662775893507e-08, + "logits/chosen": -3.235097885131836, + "logits/rejected": -3.0750253200531006, + "logps/chosen": -254.47767639160156, + "logps/rejected": -1152.156982421875, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.11297607421875, + "rewards/margins": 6.221008777618408, + "rewards/rejected": -5.1080322265625, + "step": 1189 + }, + { + "epoch": 1.52, + "learning_rate": 1.4498815954409278e-08, + "logits/chosen": -3.263383388519287, + "logits/rejected": -3.1243510246276855, + "logps/chosen": -237.89671325683594, + "logps/rejected": -789.0797119140625, + "loss": 0.2855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.284217119216919, + "rewards/margins": 5.876023292541504, + "rewards/rejected": -4.591805934906006, + "step": 1190 + }, + { + "epoch": 1.52, + "learning_rate": 1.4426120787408046e-08, + "logits/chosen": -3.314652919769287, + "logits/rejected": -3.0888900756835938, + "logps/chosen": -247.48208618164062, + "logps/rejected": -754.7799072265625, + "loss": 0.2821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4744499921798706, + "rewards/margins": 4.760310173034668, + "rewards/rejected": -3.2858598232269287, + "step": 1191 + }, + { + "epoch": 1.52, + "learning_rate": 1.4353577585430148e-08, + "logits/chosen": -3.187973976135254, + "logits/rejected": -3.153688430786133, + "logps/chosen": -253.3088836669922, + "logps/rejected": -631.7725830078125, + "loss": 0.2651, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3217827081680298, + "rewards/margins": 5.151775360107422, + "rewards/rejected": -3.8299927711486816, + "step": 1192 + }, + { + "epoch": 1.52, + "learning_rate": 1.4281186658366679e-08, + "logits/chosen": -3.3176345825195312, + "logits/rejected": -3.1009297370910645, + "logps/chosen": -242.38385009765625, + "logps/rejected": -600.4190673828125, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.231054663658142, + "rewards/margins": 4.869358062744141, + "rewards/rejected": -3.638303279876709, + "step": 1193 + }, + { + "epoch": 1.52, + "learning_rate": 1.4208948315458275e-08, + "logits/chosen": -3.224896192550659, + "logits/rejected": -3.116863250732422, + "logps/chosen": -231.22088623046875, + "logps/rejected": -1195.121337890625, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1276741027832031, + "rewards/margins": 7.350165367126465, + "rewards/rejected": -6.222491264343262, + "step": 1194 + }, + { + "epoch": 1.52, + "learning_rate": 1.4136862865293747e-08, + "logits/chosen": -3.276158571243286, + "logits/rejected": -3.0889103412628174, + "logps/chosen": -260.15899658203125, + "logps/rejected": -1500.580078125, + "loss": 0.2768, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4102425575256348, + "rewards/margins": 8.509199142456055, + "rewards/rejected": -7.098956108093262, + "step": 1195 + }, + { + "epoch": 1.52, + "learning_rate": 1.4064930615808806e-08, + "logits/chosen": -3.269075632095337, + "logits/rejected": -3.202089309692383, + "logps/chosen": -268.7236022949219, + "logps/rejected": -693.981201171875, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6470459699630737, + "rewards/margins": 5.4920806884765625, + "rewards/rejected": -3.845034599304199, + "step": 1196 + }, + { + "epoch": 1.53, + "learning_rate": 1.3993151874284697e-08, + "logits/chosen": -3.246939182281494, + "logits/rejected": -3.0682504177093506, + "logps/chosen": -257.30792236328125, + "logps/rejected": -676.8065185546875, + "loss": 0.2872, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2995773553848267, + "rewards/margins": 4.570955276489258, + "rewards/rejected": -3.2713775634765625, + "step": 1197 + }, + { + "epoch": 1.53, + "learning_rate": 1.39215269473469e-08, + "logits/chosen": -3.250606060028076, + "logits/rejected": -3.0921850204467773, + "logps/chosen": -247.24575805664062, + "logps/rejected": -3198.9970703125, + "loss": 0.2707, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5212112665176392, + "rewards/margins": 10.631813049316406, + "rewards/rejected": -9.110602378845215, + "step": 1198 + }, + { + "epoch": 1.53, + "learning_rate": 1.3850056140963828e-08, + "logits/chosen": -3.2323997020721436, + "logits/rejected": -3.176682949066162, + "logps/chosen": -278.3257751464844, + "logps/rejected": -419.5304870605469, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0868072509765625, + "rewards/margins": 3.260911703109741, + "rewards/rejected": -2.1741044521331787, + "step": 1199 + }, + { + "epoch": 1.53, + "learning_rate": 1.3778739760445552e-08, + "logits/chosen": -3.219021797180176, + "logits/rejected": -3.1703991889953613, + "logps/chosen": -255.70669555664062, + "logps/rejected": -644.7546997070312, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.131890892982483, + "rewards/margins": 4.114540100097656, + "rewards/rejected": -2.982649326324463, + "step": 1200 + }, + { + "epoch": 1.53, + "learning_rate": 1.3707578110442436e-08, + "logits/chosen": -3.1718897819519043, + "logits/rejected": -3.1265711784362793, + "logps/chosen": -291.408203125, + "logps/rejected": -831.47021484375, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.204827070236206, + "rewards/margins": 5.818282604217529, + "rewards/rejected": -4.613455295562744, + "step": 1201 + }, + { + "epoch": 1.53, + "learning_rate": 1.3636571494943861e-08, + "logits/chosen": -3.2288451194763184, + "logits/rejected": -3.051715850830078, + "logps/chosen": -294.586669921875, + "logps/rejected": -1346.8399658203125, + "loss": 0.284, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0910522937774658, + "rewards/margins": 7.461664199829102, + "rewards/rejected": -6.370611667633057, + "step": 1202 + }, + { + "epoch": 1.53, + "learning_rate": 1.3565720217276938e-08, + "logits/chosen": -3.3342344760894775, + "logits/rejected": -3.153219223022461, + "logps/chosen": -249.73582458496094, + "logps/rejected": -443.0572509765625, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.134820580482483, + "rewards/margins": 3.13189697265625, + "rewards/rejected": -1.9970765113830566, + "step": 1203 + }, + { + "epoch": 1.53, + "learning_rate": 1.349502458010519e-08, + "logits/chosen": -3.2002768516540527, + "logits/rejected": -3.1981186866760254, + "logps/chosen": -273.1326904296875, + "logps/rejected": -623.4888305664062, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3815598487854004, + "rewards/margins": 4.713991165161133, + "rewards/rejected": -3.3324310779571533, + "step": 1204 + }, + { + "epoch": 1.54, + "learning_rate": 1.3424484885427312e-08, + "logits/chosen": -3.211419105529785, + "logits/rejected": -3.1624562740325928, + "logps/chosen": -259.77984619140625, + "logps/rejected": -861.8385009765625, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2660889625549316, + "rewards/margins": 6.138266563415527, + "rewards/rejected": -4.8721771240234375, + "step": 1205 + }, + { + "epoch": 1.54, + "learning_rate": 1.3354101434575805e-08, + "logits/chosen": -3.3127336502075195, + "logits/rejected": -3.1694228649139404, + "logps/chosen": -239.376953125, + "logps/rejected": -1136.3837890625, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4950530529022217, + "rewards/margins": 7.536843776702881, + "rewards/rejected": -6.041790962219238, + "step": 1206 + }, + { + "epoch": 1.54, + "learning_rate": 1.3283874528215733e-08, + "logits/chosen": -3.245231866836548, + "logits/rejected": -3.1466641426086426, + "logps/chosen": -259.63671875, + "logps/rejected": -888.32666015625, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.032344102859497, + "rewards/margins": 6.0652570724487305, + "rewards/rejected": -5.0329132080078125, + "step": 1207 + }, + { + "epoch": 1.54, + "learning_rate": 1.321380446634342e-08, + "logits/chosen": -3.2322044372558594, + "logits/rejected": -3.222978353500366, + "logps/chosen": -283.2190246582031, + "logps/rejected": -815.720703125, + "loss": 0.2976, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4821182489395142, + "rewards/margins": 7.42054557800293, + "rewards/rejected": -5.938427925109863, + "step": 1208 + }, + { + "epoch": 1.54, + "learning_rate": 1.3143891548285213e-08, + "logits/chosen": -3.259401321411133, + "logits/rejected": -3.153757095336914, + "logps/chosen": -250.87936401367188, + "logps/rejected": -570.545166015625, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4891159534454346, + "rewards/margins": 4.697196960449219, + "rewards/rejected": -3.2080812454223633, + "step": 1209 + }, + { + "epoch": 1.54, + "learning_rate": 1.3074136072696147e-08, + "logits/chosen": -3.2488110065460205, + "logits/rejected": -3.097421646118164, + "logps/chosen": -278.1345520019531, + "logps/rejected": -764.945068359375, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.476043701171875, + "rewards/margins": 5.328570365905762, + "rewards/rejected": -3.852526903152466, + "step": 1210 + }, + { + "epoch": 1.54, + "learning_rate": 1.300453833755869e-08, + "logits/chosen": -3.2963919639587402, + "logits/rejected": -3.207770347595215, + "logps/chosen": -267.50347900390625, + "logps/rejected": -499.5617370605469, + "loss": 0.3173, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1934006214141846, + "rewards/margins": 4.2894744873046875, + "rewards/rejected": -3.096074104309082, + "step": 1211 + }, + { + "epoch": 1.54, + "learning_rate": 1.2935098640181458e-08, + "logits/chosen": -3.1967883110046387, + "logits/rejected": -3.094454288482666, + "logps/chosen": -283.4840393066406, + "logps/rejected": -663.6505737304688, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2644882202148438, + "rewards/margins": 4.371363639831543, + "rewards/rejected": -3.1068756580352783, + "step": 1212 + }, + { + "epoch": 1.55, + "learning_rate": 1.2865817277198004e-08, + "logits/chosen": -3.1747164726257324, + "logits/rejected": -3.048163414001465, + "logps/chosen": -285.3232116699219, + "logps/rejected": -1330.499755859375, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2900497913360596, + "rewards/margins": 7.361198425292969, + "rewards/rejected": -6.071148872375488, + "step": 1213 + }, + { + "epoch": 1.55, + "learning_rate": 1.2796694544565478e-08, + "logits/chosen": -3.238341808319092, + "logits/rejected": -3.11375093460083, + "logps/chosen": -253.97265625, + "logps/rejected": -450.82061767578125, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4318337440490723, + "rewards/margins": 3.896106719970703, + "rewards/rejected": -2.46427321434021, + "step": 1214 + }, + { + "epoch": 1.55, + "learning_rate": 1.272773073756338e-08, + "logits/chosen": -3.3103232383728027, + "logits/rejected": -3.2201008796691895, + "logps/chosen": -284.89056396484375, + "logps/rejected": -625.6538696289062, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0942131280899048, + "rewards/margins": 4.436583995819092, + "rewards/rejected": -3.3423707485198975, + "step": 1215 + }, + { + "epoch": 1.55, + "learning_rate": 1.2658926150792321e-08, + "logits/chosen": -3.222831964492798, + "logits/rejected": -3.132732391357422, + "logps/chosen": -230.09983825683594, + "logps/rejected": -648.34765625, + "loss": 0.2698, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3779534101486206, + "rewards/margins": 4.8697075843811035, + "rewards/rejected": -3.4917540550231934, + "step": 1216 + }, + { + "epoch": 1.55, + "learning_rate": 1.2590281078172738e-08, + "logits/chosen": -3.2843692302703857, + "logits/rejected": -3.075326442718506, + "logps/chosen": -241.31661987304688, + "logps/rejected": -639.7445678710938, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.242323398590088, + "rewards/margins": 4.574141025543213, + "rewards/rejected": -3.331817626953125, + "step": 1217 + }, + { + "epoch": 1.55, + "learning_rate": 1.2521795812943703e-08, + "logits/chosen": -3.223629951477051, + "logits/rejected": -3.0882303714752197, + "logps/chosen": -276.39581298828125, + "logps/rejected": -740.3214111328125, + "loss": 0.3104, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1573761701583862, + "rewards/margins": 5.295693874359131, + "rewards/rejected": -4.138318061828613, + "step": 1218 + }, + { + "epoch": 1.55, + "learning_rate": 1.2453470647661563e-08, + "logits/chosen": -3.26600980758667, + "logits/rejected": -3.1022872924804688, + "logps/chosen": -304.177490234375, + "logps/rejected": -1202.6524658203125, + "loss": 0.308, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5309479236602783, + "rewards/margins": 6.965456962585449, + "rewards/rejected": -5.43450927734375, + "step": 1219 + }, + { + "epoch": 1.56, + "learning_rate": 1.2385305874198776e-08, + "logits/chosen": -3.2670419216156006, + "logits/rejected": -3.037400960922241, + "logps/chosen": -261.0909118652344, + "logps/rejected": -1216.625732421875, + "loss": 0.2757, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3460098505020142, + "rewards/margins": 7.391902446746826, + "rewards/rejected": -6.045892715454102, + "step": 1220 + }, + { + "epoch": 1.56, + "learning_rate": 1.2317301783742606e-08, + "logits/chosen": -3.245474338531494, + "logits/rejected": -3.2001852989196777, + "logps/chosen": -272.02197265625, + "logps/rejected": -670.8356323242188, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2164772748947144, + "rewards/margins": 5.129245758056641, + "rewards/rejected": -3.912768602371216, + "step": 1221 + }, + { + "epoch": 1.56, + "learning_rate": 1.2249458666793966e-08, + "logits/chosen": -3.2514662742614746, + "logits/rejected": -3.1584815979003906, + "logps/chosen": -255.31918334960938, + "logps/rejected": -972.2415771484375, + "loss": 0.302, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3860374689102173, + "rewards/margins": 7.368190765380859, + "rewards/rejected": -5.982153415679932, + "step": 1222 + }, + { + "epoch": 1.56, + "learning_rate": 1.2181776813166078e-08, + "logits/chosen": -3.1999497413635254, + "logits/rejected": -3.105530261993408, + "logps/chosen": -267.1357421875, + "logps/rejected": -403.5330810546875, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.519385576248169, + "rewards/margins": 3.6007392406463623, + "rewards/rejected": -2.0813536643981934, + "step": 1223 + }, + { + "epoch": 1.56, + "learning_rate": 1.2114256511983274e-08, + "logits/chosen": -3.233689546585083, + "logits/rejected": -3.2148807048797607, + "logps/chosen": -241.00514221191406, + "logps/rejected": -636.1494140625, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5422722101211548, + "rewards/margins": 5.340605735778809, + "rewards/rejected": -3.7983336448669434, + "step": 1224 + }, + { + "epoch": 1.56, + "learning_rate": 1.204689805167977e-08, + "logits/chosen": -3.249380588531494, + "logits/rejected": -2.9898838996887207, + "logps/chosen": -251.31126403808594, + "logps/rejected": -1509.456298828125, + "loss": 0.2863, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0372207164764404, + "rewards/margins": 7.502754211425781, + "rewards/rejected": -6.465533256530762, + "step": 1225 + }, + { + "epoch": 1.56, + "learning_rate": 1.1979701719998452e-08, + "logits/chosen": -3.267073631286621, + "logits/rejected": -3.1342296600341797, + "logps/chosen": -260.75921630859375, + "logps/rejected": -780.3006591796875, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.223414659500122, + "rewards/margins": 4.82754373550415, + "rewards/rejected": -3.6041293144226074, + "step": 1226 + }, + { + "epoch": 1.56, + "learning_rate": 1.1912667803989602e-08, + "logits/chosen": -3.194584369659424, + "logits/rejected": -3.1028711795806885, + "logps/chosen": -286.44317626953125, + "logps/rejected": -1033.71728515625, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0969619750976562, + "rewards/margins": 5.631861686706543, + "rewards/rejected": -4.534899711608887, + "step": 1227 + }, + { + "epoch": 1.57, + "learning_rate": 1.1845796590009682e-08, + "logits/chosen": -3.258237838745117, + "logits/rejected": -3.111915111541748, + "logps/chosen": -254.07040405273438, + "logps/rejected": -1035.1959228515625, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3255157470703125, + "rewards/margins": 6.174797058105469, + "rewards/rejected": -4.849281311035156, + "step": 1228 + }, + { + "epoch": 1.57, + "learning_rate": 1.177908836372014e-08, + "logits/chosen": -3.261101722717285, + "logits/rejected": -3.2566137313842773, + "logps/chosen": -239.91229248046875, + "logps/rejected": -625.7706909179688, + "loss": 0.2827, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3141647577285767, + "rewards/margins": 4.828686714172363, + "rewards/rejected": -3.514521837234497, + "step": 1229 + }, + { + "epoch": 1.57, + "learning_rate": 1.1712543410086145e-08, + "logits/chosen": -3.2324562072753906, + "logits/rejected": -3.1243486404418945, + "logps/chosen": -293.6867980957031, + "logps/rejected": -491.6185607910156, + "loss": 0.2867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.280158281326294, + "rewards/margins": 4.110843658447266, + "rewards/rejected": -2.830685615539551, + "step": 1230 + }, + { + "epoch": 1.57, + "learning_rate": 1.1646162013375444e-08, + "logits/chosen": -3.2591257095336914, + "logits/rejected": -2.9811744689941406, + "logps/chosen": -237.81906127929688, + "logps/rejected": -1104.0992431640625, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2108078002929688, + "rewards/margins": 6.033064365386963, + "rewards/rejected": -4.822256565093994, + "step": 1231 + }, + { + "epoch": 1.57, + "learning_rate": 1.157994445715706e-08, + "logits/chosen": -3.272019624710083, + "logits/rejected": -3.164879083633423, + "logps/chosen": -234.83859252929688, + "logps/rejected": -762.3265380859375, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.138665795326233, + "rewards/margins": 5.356339931488037, + "rewards/rejected": -4.217674255371094, + "step": 1232 + }, + { + "epoch": 1.57, + "learning_rate": 1.1513891024300121e-08, + "logits/chosen": -3.1937594413757324, + "logits/rejected": -3.113699436187744, + "logps/chosen": -285.20587158203125, + "logps/rejected": -593.7748413085938, + "loss": 0.3083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1194442510604858, + "rewards/margins": 4.205670356750488, + "rewards/rejected": -3.086225986480713, + "step": 1233 + }, + { + "epoch": 1.57, + "learning_rate": 1.1448001996972645e-08, + "logits/chosen": -3.2522530555725098, + "logits/rejected": -3.108677864074707, + "logps/chosen": -281.20184326171875, + "logps/rejected": -451.6339416503906, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3384307622909546, + "rewards/margins": 3.2563400268554688, + "rewards/rejected": -1.9179092645645142, + "step": 1234 + }, + { + "epoch": 1.57, + "learning_rate": 1.1382277656640382e-08, + "logits/chosen": -3.253932476043701, + "logits/rejected": -3.138636589050293, + "logps/chosen": -278.1432800292969, + "logps/rejected": -555.2112426757812, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3339295387268066, + "rewards/margins": 3.891714572906494, + "rewards/rejected": -2.5577850341796875, + "step": 1235 + }, + { + "epoch": 1.58, + "learning_rate": 1.1316718284065534e-08, + "logits/chosen": -3.208465337753296, + "logits/rejected": -3.1136107444763184, + "logps/chosen": -273.632568359375, + "logps/rejected": -456.14141845703125, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2406494617462158, + "rewards/margins": 3.7229340076446533, + "rewards/rejected": -2.4822845458984375, + "step": 1236 + }, + { + "epoch": 1.58, + "learning_rate": 1.1251324159305593e-08, + "logits/chosen": -3.251067638397217, + "logits/rejected": -3.2108912467956543, + "logps/chosen": -253.06988525390625, + "logps/rejected": -493.81768798828125, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8994972705841064, + "rewards/margins": 3.7983620166778564, + "rewards/rejected": -2.89886474609375, + "step": 1237 + }, + { + "epoch": 1.58, + "learning_rate": 1.1186095561712128e-08, + "logits/chosen": -3.2073378562927246, + "logits/rejected": -3.1534323692321777, + "logps/chosen": -239.0760498046875, + "logps/rejected": -585.3082275390625, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9712173938751221, + "rewards/margins": 3.9303910732269287, + "rewards/rejected": -2.9591736793518066, + "step": 1238 + }, + { + "epoch": 1.58, + "learning_rate": 1.1121032769929667e-08, + "logits/chosen": -3.3059635162353516, + "logits/rejected": -3.187593460083008, + "logps/chosen": -215.77230834960938, + "logps/rejected": -552.6004028320312, + "loss": 0.2695, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1394622325897217, + "rewards/margins": 4.827751159667969, + "rewards/rejected": -3.688289165496826, + "step": 1239 + }, + { + "epoch": 1.58, + "learning_rate": 1.1056136061894383e-08, + "logits/chosen": -3.2657036781311035, + "logits/rejected": -3.1639161109924316, + "logps/chosen": -237.68746948242188, + "logps/rejected": -312.723388671875, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3457398414611816, + "rewards/margins": 2.725146532058716, + "rewards/rejected": -1.3794068098068237, + "step": 1240 + }, + { + "epoch": 1.58, + "learning_rate": 1.0991405714833002e-08, + "logits/chosen": -3.3301496505737305, + "logits/rejected": -3.0879361629486084, + "logps/chosen": -253.16696166992188, + "logps/rejected": -1127.4093017578125, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5068848133087158, + "rewards/margins": 7.091894626617432, + "rewards/rejected": -5.585009574890137, + "step": 1241 + }, + { + "epoch": 1.58, + "learning_rate": 1.0926842005261549e-08, + "logits/chosen": -3.2097702026367188, + "logits/rejected": -3.024446725845337, + "logps/chosen": -325.5244140625, + "logps/rejected": -1043.622314453125, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.033461093902588, + "rewards/margins": 4.962889194488525, + "rewards/rejected": -3.9294281005859375, + "step": 1242 + }, + { + "epoch": 1.58, + "learning_rate": 1.0862445208984278e-08, + "logits/chosen": -3.2830753326416016, + "logits/rejected": -3.2437970638275146, + "logps/chosen": -249.85049438476562, + "logps/rejected": -688.7196655273438, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3682914972305298, + "rewards/margins": 4.769451141357422, + "rewards/rejected": -3.4011597633361816, + "step": 1243 + }, + { + "epoch": 1.59, + "learning_rate": 1.0798215601092353e-08, + "logits/chosen": -3.1384730339050293, + "logits/rejected": -3.0769293308258057, + "logps/chosen": -295.56524658203125, + "logps/rejected": -502.4980773925781, + "loss": 0.328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8567596673965454, + "rewards/margins": 3.293041944503784, + "rewards/rejected": -2.4362823963165283, + "step": 1244 + }, + { + "epoch": 1.59, + "learning_rate": 1.0734153455962763e-08, + "logits/chosen": -3.2775094509124756, + "logits/rejected": -3.155609130859375, + "logps/chosen": -251.78237915039062, + "logps/rejected": -1035.839599609375, + "loss": 0.2883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3871262073516846, + "rewards/margins": 6.818290710449219, + "rewards/rejected": -5.431164741516113, + "step": 1245 + }, + { + "epoch": 1.59, + "learning_rate": 1.067025904725713e-08, + "logits/chosen": -3.234588623046875, + "logits/rejected": -3.1431450843811035, + "logps/chosen": -253.26022338867188, + "logps/rejected": -683.1787719726562, + "loss": 0.2865, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4545211791992188, + "rewards/margins": 4.7244462966918945, + "rewards/rejected": -3.269925117492676, + "step": 1246 + }, + { + "epoch": 1.59, + "learning_rate": 1.0606532647920514e-08, + "logits/chosen": -3.26302433013916, + "logits/rejected": -3.0757696628570557, + "logps/chosen": -260.957763671875, + "logps/rejected": -503.7807922363281, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1656311750411987, + "rewards/margins": 3.2457122802734375, + "rewards/rejected": -2.0800812244415283, + "step": 1247 + }, + { + "epoch": 1.59, + "learning_rate": 1.0542974530180326e-08, + "logits/chosen": -3.1772515773773193, + "logits/rejected": -3.0952701568603516, + "logps/chosen": -302.3877258300781, + "logps/rejected": -646.1891479492188, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1217498779296875, + "rewards/margins": 3.8880205154418945, + "rewards/rejected": -2.766270637512207, + "step": 1248 + }, + { + "epoch": 1.59, + "learning_rate": 1.0479584965545052e-08, + "logits/chosen": -3.217197895050049, + "logits/rejected": -3.123910903930664, + "logps/chosen": -275.105712890625, + "logps/rejected": -862.0154418945312, + "loss": 0.3074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3532272577285767, + "rewards/margins": 5.543449401855469, + "rewards/rejected": -4.190222263336182, + "step": 1249 + }, + { + "epoch": 1.59, + "learning_rate": 1.0416364224803182e-08, + "logits/chosen": -3.2529149055480957, + "logits/rejected": -3.1282424926757812, + "logps/chosen": -259.464599609375, + "logps/rejected": -607.98681640625, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2731293439865112, + "rewards/margins": 4.836435317993164, + "rewards/rejected": -3.5633058547973633, + "step": 1250 + }, + { + "epoch": 1.59, + "learning_rate": 1.0353312578021984e-08, + "logits/chosen": -3.2561726570129395, + "logits/rejected": -3.176443576812744, + "logps/chosen": -236.37286376953125, + "logps/rejected": -1030.09619140625, + "loss": 0.2714, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2285712957382202, + "rewards/margins": 6.35288143157959, + "rewards/rejected": -5.124310493469238, + "step": 1251 + }, + { + "epoch": 1.6, + "learning_rate": 1.0290430294546448e-08, + "logits/chosen": -3.267102003097534, + "logits/rejected": -3.0748305320739746, + "logps/chosen": -297.4919128417969, + "logps/rejected": -212.30580139160156, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4253342151641846, + "rewards/margins": 2.4637765884399414, + "rewards/rejected": -1.0384422540664673, + "step": 1252 + }, + { + "epoch": 1.6, + "learning_rate": 1.0227717642998034e-08, + "logits/chosen": -3.253667116165161, + "logits/rejected": -3.163900852203369, + "logps/chosen": -273.6605529785156, + "logps/rejected": -494.75799560546875, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3140381574630737, + "rewards/margins": 4.208888530731201, + "rewards/rejected": -2.894850254058838, + "step": 1253 + }, + { + "epoch": 1.6, + "learning_rate": 1.016517489127357e-08, + "logits/chosen": -3.2389678955078125, + "logits/rejected": -3.118508815765381, + "logps/chosen": -229.8408966064453, + "logps/rejected": -452.6054382324219, + "loss": 0.3136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3619369268417358, + "rewards/margins": 4.0007781982421875, + "rewards/rejected": -2.638841152191162, + "step": 1254 + }, + { + "epoch": 1.6, + "learning_rate": 1.0102802306544095e-08, + "logits/chosen": -3.2402327060699463, + "logits/rejected": -3.1852684020996094, + "logps/chosen": -268.51171875, + "logps/rejected": -863.497314453125, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.247340440750122, + "rewards/margins": 5.445277690887451, + "rewards/rejected": -4.19793701171875, + "step": 1255 + }, + { + "epoch": 1.6, + "learning_rate": 1.0040600155253764e-08, + "logits/chosen": -3.29160737991333, + "logits/rejected": -3.215261459350586, + "logps/chosen": -267.4368896484375, + "logps/rejected": -990.3475341796875, + "loss": 0.2725, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3112915754318237, + "rewards/margins": 6.715558052062988, + "rewards/rejected": -5.404266357421875, + "step": 1256 + }, + { + "epoch": 1.6, + "learning_rate": 9.978568703118634e-09, + "logits/chosen": -3.2102324962615967, + "logits/rejected": -3.1648173332214355, + "logps/chosen": -276.34222412109375, + "logps/rejected": -683.385986328125, + "loss": 0.2782, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2991530895233154, + "rewards/margins": 5.057002544403076, + "rewards/rejected": -3.7578492164611816, + "step": 1257 + }, + { + "epoch": 1.6, + "learning_rate": 9.916708215125585e-09, + "logits/chosen": -3.255448579788208, + "logits/rejected": -3.1445395946502686, + "logps/chosen": -250.04248046875, + "logps/rejected": -496.32244873046875, + "loss": 0.3062, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.410261631011963, + "rewards/margins": 3.691455125808716, + "rewards/rejected": -2.281193494796753, + "step": 1258 + }, + { + "epoch": 1.6, + "learning_rate": 9.855018955531158e-09, + "logits/chosen": -3.173762798309326, + "logits/rejected": -3.1684513092041016, + "logps/chosen": -287.3771057128906, + "logps/rejected": -956.75439453125, + "loss": 0.2751, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3703980445861816, + "rewards/margins": 6.063116550445557, + "rewards/rejected": -4.692718505859375, + "step": 1259 + }, + { + "epoch": 1.61, + "learning_rate": 9.793501187860431e-09, + "logits/chosen": -3.2190957069396973, + "logits/rejected": -3.130385398864746, + "logps/chosen": -286.79669189453125, + "logps/rejected": -575.206787109375, + "loss": 0.2948, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2615692615509033, + "rewards/margins": 3.7753067016601562, + "rewards/rejected": -2.513737440109253, + "step": 1260 + }, + { + "epoch": 1.61, + "learning_rate": 9.732155174905943e-09, + "logits/chosen": -3.3039069175720215, + "logits/rejected": -3.19134521484375, + "logps/chosen": -261.4001159667969, + "logps/rejected": -486.78631591796875, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.422926425933838, + "rewards/margins": 4.07724142074585, + "rewards/rejected": -2.654315233230591, + "step": 1261 + }, + { + "epoch": 1.61, + "learning_rate": 9.670981178726485e-09, + "logits/chosen": -3.266300678253174, + "logits/rejected": -3.1254327297210693, + "logps/chosen": -285.3750305175781, + "logps/rejected": -767.9592895507812, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3082596063613892, + "rewards/margins": 5.998414993286133, + "rewards/rejected": -4.690155029296875, + "step": 1262 + }, + { + "epoch": 1.61, + "learning_rate": 9.60997946064604e-09, + "logits/chosen": -3.1773276329040527, + "logits/rejected": -3.0955326557159424, + "logps/chosen": -269.68182373046875, + "logps/rejected": -907.4515380859375, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0141113996505737, + "rewards/margins": 4.650638103485107, + "rewards/rejected": -3.636526584625244, + "step": 1263 + }, + { + "epoch": 1.61, + "learning_rate": 9.549150281252633e-09, + "logits/chosen": -3.303304672241211, + "logits/rejected": -3.202084541320801, + "logps/chosen": -248.30291748046875, + "logps/rejected": -804.23095703125, + "loss": 0.2845, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1989730596542358, + "rewards/margins": 5.711796760559082, + "rewards/rejected": -4.512823581695557, + "step": 1264 + }, + { + "epoch": 1.61, + "learning_rate": 9.488493900397282e-09, + "logits/chosen": -3.1963038444519043, + "logits/rejected": -3.1130361557006836, + "logps/chosen": -248.87448120117188, + "logps/rejected": -516.731689453125, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1999168395996094, + "rewards/margins": 4.183793067932129, + "rewards/rejected": -2.9838762283325195, + "step": 1265 + }, + { + "epoch": 1.61, + "learning_rate": 9.428010577192796e-09, + "logits/chosen": -3.307156562805176, + "logits/rejected": -3.099025011062622, + "logps/chosen": -215.90176391601562, + "logps/rejected": -409.044921875, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.390944004058838, + "rewards/margins": 3.470349073410034, + "rewards/rejected": -2.0794053077697754, + "step": 1266 + }, + { + "epoch": 1.62, + "learning_rate": 9.367700570012733e-09, + "logits/chosen": -3.1700592041015625, + "logits/rejected": -3.0658814907073975, + "logps/chosen": -288.1612548828125, + "logps/rejected": -1550.2022705078125, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.591892957687378, + "rewards/margins": 8.278589248657227, + "rewards/rejected": -6.6866960525512695, + "step": 1267 + }, + { + "epoch": 1.62, + "learning_rate": 9.307564136490254e-09, + "logits/chosen": -3.306403160095215, + "logits/rejected": -3.1827077865600586, + "logps/chosen": -251.18539428710938, + "logps/rejected": -905.7352294921875, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1656769514083862, + "rewards/margins": 5.226007461547852, + "rewards/rejected": -4.060330390930176, + "step": 1268 + }, + { + "epoch": 1.62, + "learning_rate": 9.247601533517097e-09, + "logits/chosen": -3.2272772789001465, + "logits/rejected": -3.0772461891174316, + "logps/chosen": -318.5834655761719, + "logps/rejected": -873.82421875, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3593612909317017, + "rewards/margins": 5.537651062011719, + "rewards/rejected": -4.178289890289307, + "step": 1269 + }, + { + "epoch": 1.62, + "learning_rate": 9.187813017242386e-09, + "logits/chosen": -3.2952065467834473, + "logits/rejected": -3.1597559452056885, + "logps/chosen": -251.42703247070312, + "logps/rejected": -880.8178100585938, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3579285144805908, + "rewards/margins": 5.5823974609375, + "rewards/rejected": -4.224469184875488, + "step": 1270 + }, + { + "epoch": 1.62, + "learning_rate": 9.128198843071577e-09, + "logits/chosen": -3.299182415008545, + "logits/rejected": -3.1214208602905273, + "logps/chosen": -263.44384765625, + "logps/rejected": -532.2445068359375, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1187317371368408, + "rewards/margins": 3.631082057952881, + "rewards/rejected": -2.512350559234619, + "step": 1271 + }, + { + "epoch": 1.62, + "learning_rate": 9.068759265665382e-09, + "logits/chosen": -3.232435464859009, + "logits/rejected": -3.108640193939209, + "logps/chosen": -280.52410888671875, + "logps/rejected": -1429.842041015625, + "loss": 0.2751, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.508033037185669, + "rewards/margins": 7.578449249267578, + "rewards/rejected": -6.070416450500488, + "step": 1272 + }, + { + "epoch": 1.62, + "learning_rate": 9.009494538938684e-09, + "logits/chosen": -3.2052736282348633, + "logits/rejected": -3.0445423126220703, + "logps/chosen": -254.7125701904297, + "logps/rejected": -1032.0980224609375, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.240858554840088, + "rewards/margins": 6.6608171463012695, + "rewards/rejected": -5.419958591461182, + "step": 1273 + }, + { + "epoch": 1.62, + "learning_rate": 8.950404916059406e-09, + "logits/chosen": -3.2928266525268555, + "logits/rejected": -3.2018041610717773, + "logps/chosen": -270.77886962890625, + "logps/rejected": -436.8186340332031, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.220313310623169, + "rewards/margins": 3.623826026916504, + "rewards/rejected": -2.403512716293335, + "step": 1274 + }, + { + "epoch": 1.63, + "learning_rate": 8.891490649447475e-09, + "logits/chosen": -3.272796392440796, + "logits/rejected": -3.128666877746582, + "logps/chosen": -284.41943359375, + "logps/rejected": -563.667236328125, + "loss": 0.3135, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6293091773986816, + "rewards/margins": 4.166162490844727, + "rewards/rejected": -2.536853075027466, + "step": 1275 + }, + { + "epoch": 1.63, + "learning_rate": 8.832751990773713e-09, + "logits/chosen": -3.249307155609131, + "logits/rejected": -3.113888740539551, + "logps/chosen": -266.20965576171875, + "logps/rejected": -869.500244140625, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3629517555236816, + "rewards/margins": 5.5193328857421875, + "rewards/rejected": -4.156381130218506, + "step": 1276 + }, + { + "epoch": 1.63, + "learning_rate": 8.774189190958819e-09, + "logits/chosen": -3.237703800201416, + "logits/rejected": -3.1291372776031494, + "logps/chosen": -273.7169189453125, + "logps/rejected": -1451.70654296875, + "loss": 0.2934, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3219833374023438, + "rewards/margins": 9.111125946044922, + "rewards/rejected": -7.78914213180542, + "step": 1277 + }, + { + "epoch": 1.63, + "learning_rate": 8.715802500172214e-09, + "logits/chosen": -3.170039653778076, + "logits/rejected": -3.121406078338623, + "logps/chosen": -276.12939453125, + "logps/rejected": -378.4146728515625, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2286362648010254, + "rewards/margins": 3.496290683746338, + "rewards/rejected": -2.2676544189453125, + "step": 1278 + }, + { + "epoch": 1.63, + "learning_rate": 8.657592167831023e-09, + "logits/chosen": -3.224884510040283, + "logits/rejected": -3.099728584289551, + "logps/chosen": -294.0439453125, + "logps/rejected": -1028.788818359375, + "loss": 0.2817, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1378463506698608, + "rewards/margins": 5.817726135253906, + "rewards/rejected": -4.679880142211914, + "step": 1279 + }, + { + "epoch": 1.63, + "learning_rate": 8.599558442598998e-09, + "logits/chosen": -3.233938694000244, + "logits/rejected": -3.1512715816497803, + "logps/chosen": -266.7980041503906, + "logps/rejected": -500.57354736328125, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2368667125701904, + "rewards/margins": 3.8728928565979004, + "rewards/rejected": -2.636025905609131, + "step": 1280 + }, + { + "epoch": 1.63, + "learning_rate": 8.541701572385484e-09, + "logits/chosen": -3.3155157566070557, + "logits/rejected": -3.1333093643188477, + "logps/chosen": -279.7801513671875, + "logps/rejected": -852.9652099609375, + "loss": 0.2987, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.489837646484375, + "rewards/margins": 5.419692039489746, + "rewards/rejected": -3.92985463142395, + "step": 1281 + }, + { + "epoch": 1.63, + "learning_rate": 8.484021804344305e-09, + "logits/chosen": -3.225522518157959, + "logits/rejected": -3.173935651779175, + "logps/chosen": -259.01007080078125, + "logps/rejected": -776.556640625, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4351555109024048, + "rewards/margins": 5.581282615661621, + "rewards/rejected": -4.146127700805664, + "step": 1282 + }, + { + "epoch": 1.64, + "learning_rate": 8.426519384872732e-09, + "logits/chosen": -3.246434211730957, + "logits/rejected": -3.1925597190856934, + "logps/chosen": -234.47535705566406, + "logps/rejected": -708.8834228515625, + "loss": 0.2873, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3832900524139404, + "rewards/margins": 5.549027919769287, + "rewards/rejected": -4.165738105773926, + "step": 1283 + }, + { + "epoch": 1.64, + "learning_rate": 8.369194559610482e-09, + "logits/chosen": -3.2695517539978027, + "logits/rejected": -3.1789937019348145, + "logps/chosen": -248.34658813476562, + "logps/rejected": -842.3291625976562, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2180473804473877, + "rewards/margins": 5.753350257873535, + "rewards/rejected": -4.535303115844727, + "step": 1284 + }, + { + "epoch": 1.64, + "learning_rate": 8.312047573438574e-09, + "logits/chosen": -3.213740825653076, + "logits/rejected": -3.182680368423462, + "logps/chosen": -261.9862060546875, + "logps/rejected": -912.2635498046875, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1313339471817017, + "rewards/margins": 5.603515625, + "rewards/rejected": -4.472181797027588, + "step": 1285 + }, + { + "epoch": 1.64, + "learning_rate": 8.25507867047835e-09, + "logits/chosen": -3.2523598670959473, + "logits/rejected": -3.1630420684814453, + "logps/chosen": -275.34075927734375, + "logps/rejected": -674.1685791015625, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0775314569473267, + "rewards/margins": 5.142860412597656, + "rewards/rejected": -4.065329074859619, + "step": 1286 + }, + { + "epoch": 1.64, + "learning_rate": 8.1982880940904e-09, + "logits/chosen": -3.210639715194702, + "logits/rejected": -3.1320276260375977, + "logps/chosen": -253.67721557617188, + "logps/rejected": -479.98138427734375, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3486229181289673, + "rewards/margins": 3.9834465980529785, + "rewards/rejected": -2.6348235607147217, + "step": 1287 + }, + { + "epoch": 1.64, + "learning_rate": 8.141676086873572e-09, + "logits/chosen": -3.200432300567627, + "logits/rejected": -3.078826904296875, + "logps/chosen": -274.3531188964844, + "logps/rejected": -447.54864501953125, + "loss": 0.2981, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5347473621368408, + "rewards/margins": 3.8270766735076904, + "rewards/rejected": -2.2923293113708496, + "step": 1288 + }, + { + "epoch": 1.64, + "learning_rate": 8.085242890663863e-09, + "logits/chosen": -3.2425992488861084, + "logits/rejected": -3.058706760406494, + "logps/chosen": -279.126953125, + "logps/rejected": -502.5337829589844, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.094024658203125, + "rewards/margins": 3.8482604026794434, + "rewards/rejected": -2.7542359828948975, + "step": 1289 + }, + { + "epoch": 1.64, + "learning_rate": 8.028988746533432e-09, + "logits/chosen": -3.2751822471618652, + "logits/rejected": -3.1792426109313965, + "logps/chosen": -284.7508544921875, + "logps/rejected": -494.97882080078125, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8529266119003296, + "rewards/margins": 3.574916362762451, + "rewards/rejected": -2.721989631652832, + "step": 1290 + }, + { + "epoch": 1.65, + "learning_rate": 7.972913894789557e-09, + "logits/chosen": -3.2165839672088623, + "logits/rejected": -3.077012538909912, + "logps/chosen": -274.8293762207031, + "logps/rejected": -887.2017211914062, + "loss": 0.283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4481064081192017, + "rewards/margins": 6.09292459487915, + "rewards/rejected": -4.644818305969238, + "step": 1291 + }, + { + "epoch": 1.65, + "learning_rate": 7.917018574973644e-09, + "logits/chosen": -3.2203569412231445, + "logits/rejected": -3.08364200592041, + "logps/chosen": -264.2146301269531, + "logps/rejected": -869.5726318359375, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3490180969238281, + "rewards/margins": 5.416108131408691, + "rewards/rejected": -4.067090034484863, + "step": 1292 + }, + { + "epoch": 1.65, + "learning_rate": 7.861303025860133e-09, + "logits/chosen": -3.22660493850708, + "logits/rejected": -3.1733720302581787, + "logps/chosen": -270.46832275390625, + "logps/rejected": -1403.3262939453125, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9154723882675171, + "rewards/margins": 7.958422660827637, + "rewards/rejected": -7.042950630187988, + "step": 1293 + }, + { + "epoch": 1.65, + "learning_rate": 7.805767485455527e-09, + "logits/chosen": -3.1781005859375, + "logits/rejected": -3.1304264068603516, + "logps/chosen": -241.17837524414062, + "logps/rejected": -377.118408203125, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4101059436798096, + "rewards/margins": 3.278921604156494, + "rewards/rejected": -1.8688156604766846, + "step": 1294 + }, + { + "epoch": 1.65, + "learning_rate": 7.750412190997362e-09, + "logits/chosen": -3.203455924987793, + "logits/rejected": -3.09251070022583, + "logps/chosen": -276.782470703125, + "logps/rejected": -948.3198852539062, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1322723627090454, + "rewards/margins": 6.598446846008301, + "rewards/rejected": -5.466174125671387, + "step": 1295 + }, + { + "epoch": 1.65, + "learning_rate": 7.695237378953223e-09, + "logits/chosen": -3.2413368225097656, + "logits/rejected": -3.1960465908050537, + "logps/chosen": -240.1701202392578, + "logps/rejected": -625.1171264648438, + "loss": 0.2764, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1164429187774658, + "rewards/margins": 4.6344757080078125, + "rewards/rejected": -3.518033027648926, + "step": 1296 + }, + { + "epoch": 1.65, + "learning_rate": 7.640243285019682e-09, + "logits/chosen": -3.188455581665039, + "logits/rejected": -3.076155185699463, + "logps/chosen": -266.4368896484375, + "logps/rejected": -571.7177734375, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4022986888885498, + "rewards/margins": 4.469242095947266, + "rewards/rejected": -3.066943645477295, + "step": 1297 + }, + { + "epoch": 1.65, + "learning_rate": 7.585430144121319e-09, + "logits/chosen": -3.3289194107055664, + "logits/rejected": -3.1452693939208984, + "logps/chosen": -234.46685791015625, + "logps/rejected": -1004.4635009765625, + "loss": 0.3023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2714462280273438, + "rewards/margins": 5.440676689147949, + "rewards/rejected": -4.169230937957764, + "step": 1298 + }, + { + "epoch": 1.66, + "learning_rate": 7.530798190409704e-09, + "logits/chosen": -3.245431423187256, + "logits/rejected": -3.138526439666748, + "logps/chosen": -285.20465087890625, + "logps/rejected": -261.70941162109375, + "loss": 0.3075, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4415092468261719, + "rewards/margins": 2.812206268310547, + "rewards/rejected": -1.370697021484375, + "step": 1299 + }, + { + "epoch": 1.66, + "learning_rate": 7.476347657262455e-09, + "logits/chosen": -3.2052865028381348, + "logits/rejected": -3.0055184364318848, + "logps/chosen": -260.63232421875, + "logps/rejected": -1527.1025390625, + "loss": 0.2985, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1701653003692627, + "rewards/margins": 6.786864757537842, + "rewards/rejected": -5.61669921875, + "step": 1300 + }, + { + "epoch": 1.66, + "learning_rate": 7.422078777282148e-09, + "logits/chosen": -3.2590532302856445, + "logits/rejected": -3.099410057067871, + "logps/chosen": -262.26715087890625, + "logps/rejected": -422.36138916015625, + "loss": 0.2685, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.200117588043213, + "rewards/margins": 3.200456380844116, + "rewards/rejected": -2.0003387928009033, + "step": 1301 + }, + { + "epoch": 1.66, + "learning_rate": 7.367991782295391e-09, + "logits/chosen": -3.265087127685547, + "logits/rejected": -3.1697397232055664, + "logps/chosen": -275.38970947265625, + "logps/rejected": -457.3410949707031, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2466293573379517, + "rewards/margins": 3.991199016571045, + "rewards/rejected": -2.7445695400238037, + "step": 1302 + }, + { + "epoch": 1.66, + "learning_rate": 7.314086903351796e-09, + "logits/chosen": -3.198470115661621, + "logits/rejected": -3.1383209228515625, + "logps/chosen": -256.7538146972656, + "logps/rejected": -840.8480224609375, + "loss": 0.3159, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.198988437652588, + "rewards/margins": 6.003398418426514, + "rewards/rejected": -4.804409980773926, + "step": 1303 + }, + { + "epoch": 1.66, + "learning_rate": 7.260364370723044e-09, + "logits/chosen": -3.222168445587158, + "logits/rejected": -3.0855588912963867, + "logps/chosen": -312.5715637207031, + "logps/rejected": -1250.4588623046875, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.260986328125, + "rewards/margins": 7.0023956298828125, + "rewards/rejected": -5.7414093017578125, + "step": 1304 + }, + { + "epoch": 1.66, + "learning_rate": 7.2068244139018355e-09, + "logits/chosen": -3.2726192474365234, + "logits/rejected": -3.174828052520752, + "logps/chosen": -260.2930603027344, + "logps/rejected": -462.39501953125, + "loss": 0.288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.981066107749939, + "rewards/margins": 3.4222054481506348, + "rewards/rejected": -2.4411392211914062, + "step": 1305 + }, + { + "epoch": 1.66, + "learning_rate": 7.153467261600948e-09, + "logits/chosen": -3.264537811279297, + "logits/rejected": -3.186782121658325, + "logps/chosen": -284.033447265625, + "logps/rejected": -691.378173828125, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2734298706054688, + "rewards/margins": 5.98869514465332, + "rewards/rejected": -4.715265274047852, + "step": 1306 + }, + { + "epoch": 1.67, + "learning_rate": 7.100293141752245e-09, + "logits/chosen": -3.268285036087036, + "logits/rejected": -3.1344423294067383, + "logps/chosen": -256.3446960449219, + "logps/rejected": -490.51806640625, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3627853393554688, + "rewards/margins": 3.6135010719299316, + "rewards/rejected": -2.250715732574463, + "step": 1307 + }, + { + "epoch": 1.67, + "learning_rate": 7.047302281505735e-09, + "logits/chosen": -3.218356132507324, + "logits/rejected": -3.1284992694854736, + "logps/chosen": -234.76736450195312, + "logps/rejected": -694.860107421875, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3516075611114502, + "rewards/margins": 4.5785698890686035, + "rewards/rejected": -3.2269623279571533, + "step": 1308 + }, + { + "epoch": 1.67, + "learning_rate": 6.9944949072285546e-09, + "logits/chosen": -3.255582332611084, + "logits/rejected": -3.1634573936462402, + "logps/chosen": -279.2666015625, + "logps/rejected": -496.28826904296875, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3782669305801392, + "rewards/margins": 4.298440456390381, + "rewards/rejected": -2.9201736450195312, + "step": 1309 + }, + { + "epoch": 1.67, + "learning_rate": 6.9418712445040165e-09, + "logits/chosen": -3.178022861480713, + "logits/rejected": -3.090083122253418, + "logps/chosen": -262.85968017578125, + "logps/rejected": -504.21002197265625, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2719405889511108, + "rewards/margins": 3.8270509243011475, + "rewards/rejected": -2.555110216140747, + "step": 1310 + }, + { + "epoch": 1.67, + "learning_rate": 6.889431518130673e-09, + "logits/chosen": -3.2273807525634766, + "logits/rejected": -3.11136794090271, + "logps/chosen": -279.7635803222656, + "logps/rejected": -688.14404296875, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1718506813049316, + "rewards/margins": 4.983217239379883, + "rewards/rejected": -3.811366558074951, + "step": 1311 + }, + { + "epoch": 1.67, + "learning_rate": 6.837175952121305e-09, + "logits/chosen": -3.2329044342041016, + "logits/rejected": -3.1851439476013184, + "logps/chosen": -268.8985900878906, + "logps/rejected": -425.3516845703125, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2419815063476562, + "rewards/margins": 3.3760986328125, + "rewards/rejected": -2.1341171264648438, + "step": 1312 + }, + { + "epoch": 1.67, + "learning_rate": 6.785104769702032e-09, + "logits/chosen": -3.2899374961853027, + "logits/rejected": -3.1508431434631348, + "logps/chosen": -236.97482299804688, + "logps/rejected": -816.31689453125, + "loss": 0.2668, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3672211170196533, + "rewards/margins": 5.474261283874512, + "rewards/rejected": -4.1070404052734375, + "step": 1313 + }, + { + "epoch": 1.67, + "learning_rate": 6.733218193311291e-09, + "logits/chosen": -3.240077018737793, + "logits/rejected": -3.175853729248047, + "logps/chosen": -246.57168579101562, + "logps/rejected": -764.7481079101562, + "loss": 0.2866, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2848496437072754, + "rewards/margins": 5.161314487457275, + "rewards/rejected": -3.87646484375, + "step": 1314 + }, + { + "epoch": 1.68, + "learning_rate": 6.681516444598934e-09, + "logits/chosen": -3.264092445373535, + "logits/rejected": -3.0584287643432617, + "logps/chosen": -304.783447265625, + "logps/rejected": -469.74151611328125, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3040618896484375, + "rewards/margins": 3.672562599182129, + "rewards/rejected": -2.3685004711151123, + "step": 1315 + }, + { + "epoch": 1.68, + "learning_rate": 6.629999744425236e-09, + "logits/chosen": -3.25431489944458, + "logits/rejected": -3.199479579925537, + "logps/chosen": -273.69781494140625, + "logps/rejected": -659.2876586914062, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9668350219726562, + "rewards/margins": 5.043307781219482, + "rewards/rejected": -4.076472759246826, + "step": 1316 + }, + { + "epoch": 1.68, + "learning_rate": 6.5786683128600255e-09, + "logits/chosen": -3.2701334953308105, + "logits/rejected": -3.1920149326324463, + "logps/chosen": -265.1613464355469, + "logps/rejected": -689.66748046875, + "loss": 0.3168, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4197449684143066, + "rewards/margins": 5.072485446929932, + "rewards/rejected": -3.652740478515625, + "step": 1317 + }, + { + "epoch": 1.68, + "learning_rate": 6.527522369181654e-09, + "logits/chosen": -3.1779990196228027, + "logits/rejected": -3.139164447784424, + "logps/chosen": -286.11993408203125, + "logps/rejected": -623.6638793945312, + "loss": 0.2921, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1377930641174316, + "rewards/margins": 4.306147575378418, + "rewards/rejected": -3.1683549880981445, + "step": 1318 + }, + { + "epoch": 1.68, + "learning_rate": 6.476562131876129e-09, + "logits/chosen": -3.278987169265747, + "logits/rejected": -3.062328577041626, + "logps/chosen": -237.97605895996094, + "logps/rejected": -725.3135375976562, + "loss": 0.2534, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2310622930526733, + "rewards/margins": 5.293785095214844, + "rewards/rejected": -4.062723159790039, + "step": 1319 + }, + { + "epoch": 1.68, + "learning_rate": 6.42578781863613e-09, + "logits/chosen": -3.267767906188965, + "logits/rejected": -3.1776766777038574, + "logps/chosen": -253.8202362060547, + "logps/rejected": -550.380126953125, + "loss": 0.2919, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4420685768127441, + "rewards/margins": 4.844886779785156, + "rewards/rejected": -3.402818441390991, + "step": 1320 + }, + { + "epoch": 1.68, + "learning_rate": 6.3751996463601406e-09, + "logits/chosen": -3.243865489959717, + "logits/rejected": -3.1917428970336914, + "logps/chosen": -269.54290771484375, + "logps/rejected": -506.78619384765625, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4431641101837158, + "rewards/margins": 4.732975959777832, + "rewards/rejected": -3.289811611175537, + "step": 1321 + }, + { + "epoch": 1.69, + "learning_rate": 6.324797831151452e-09, + "logits/chosen": -3.276057720184326, + "logits/rejected": -3.097895860671997, + "logps/chosen": -292.8162841796875, + "logps/rejected": -791.7017822265625, + "loss": 0.2821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.322035312652588, + "rewards/margins": 5.067326545715332, + "rewards/rejected": -3.745291233062744, + "step": 1322 + }, + { + "epoch": 1.69, + "learning_rate": 6.27458258831729e-09, + "logits/chosen": -3.274653911590576, + "logits/rejected": -3.1523687839508057, + "logps/chosen": -295.142822265625, + "logps/rejected": -997.0252685546875, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.349897861480713, + "rewards/margins": 6.381045341491699, + "rewards/rejected": -5.031147480010986, + "step": 1323 + }, + { + "epoch": 1.69, + "learning_rate": 6.22455413236786e-09, + "logits/chosen": -3.2752084732055664, + "logits/rejected": -3.1779541969299316, + "logps/chosen": -274.2307434082031, + "logps/rejected": -667.47314453125, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3615700006484985, + "rewards/margins": 5.343825340270996, + "rewards/rejected": -3.982255697250366, + "step": 1324 + }, + { + "epoch": 1.69, + "learning_rate": 6.174712677015459e-09, + "logits/chosen": -3.174286365509033, + "logits/rejected": -3.0812671184539795, + "logps/chosen": -289.9533996582031, + "logps/rejected": -596.6944580078125, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.227320909500122, + "rewards/margins": 4.833284378051758, + "rewards/rejected": -3.6059632301330566, + "step": 1325 + }, + { + "epoch": 1.69, + "learning_rate": 6.125058435173569e-09, + "logits/chosen": -3.2556753158569336, + "logits/rejected": -3.1978583335876465, + "logps/chosen": -264.4896240234375, + "logps/rejected": -680.3768310546875, + "loss": 0.2969, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.255139946937561, + "rewards/margins": 4.929511070251465, + "rewards/rejected": -3.6743712425231934, + "step": 1326 + }, + { + "epoch": 1.69, + "learning_rate": 6.075591618955906e-09, + "logits/chosen": -3.210890769958496, + "logits/rejected": -3.155506134033203, + "logps/chosen": -236.57012939453125, + "logps/rejected": -570.8838500976562, + "loss": 0.2557, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.093397617340088, + "rewards/margins": 4.387730598449707, + "rewards/rejected": -3.294332981109619, + "step": 1327 + }, + { + "epoch": 1.69, + "learning_rate": 6.026312439675552e-09, + "logits/chosen": -3.225667953491211, + "logits/rejected": -3.093071460723877, + "logps/chosen": -223.6138916015625, + "logps/rejected": -428.6092834472656, + "loss": 0.2631, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3414771556854248, + "rewards/margins": 3.5688562393188477, + "rewards/rejected": -2.2273788452148438, + "step": 1328 + }, + { + "epoch": 1.69, + "learning_rate": 5.977221107844016e-09, + "logits/chosen": -3.283350944519043, + "logits/rejected": -3.089630365371704, + "logps/chosen": -272.9775695800781, + "logps/rejected": -561.951171875, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0607185363769531, + "rewards/margins": 4.231067657470703, + "rewards/rejected": -3.17034912109375, + "step": 1329 + }, + { + "epoch": 1.7, + "learning_rate": 5.928317833170393e-09, + "logits/chosen": -3.2850964069366455, + "logits/rejected": -3.177361011505127, + "logps/chosen": -254.78573608398438, + "logps/rejected": -584.3109130859375, + "loss": 0.2862, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3463013172149658, + "rewards/margins": 4.601925849914551, + "rewards/rejected": -3.255624294281006, + "step": 1330 + }, + { + "epoch": 1.7, + "learning_rate": 5.879602824560415e-09, + "logits/chosen": -3.2126786708831787, + "logits/rejected": -3.0369062423706055, + "logps/chosen": -255.25259399414062, + "logps/rejected": -223.04782104492188, + "loss": 0.3097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.458038330078125, + "rewards/margins": 2.2901666164398193, + "rewards/rejected": -0.8321281671524048, + "step": 1331 + }, + { + "epoch": 1.7, + "learning_rate": 5.831076290115572e-09, + "logits/chosen": -3.2525763511657715, + "logits/rejected": -3.1702699661254883, + "logps/chosen": -292.6619567871094, + "logps/rejected": -683.9617919921875, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.413915991783142, + "rewards/margins": 5.181036472320557, + "rewards/rejected": -3.767120361328125, + "step": 1332 + }, + { + "epoch": 1.7, + "learning_rate": 5.782738437132223e-09, + "logits/chosen": -3.1735901832580566, + "logits/rejected": -3.0816659927368164, + "logps/chosen": -305.0018005371094, + "logps/rejected": -574.6405639648438, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2891113758087158, + "rewards/margins": 3.995697021484375, + "rewards/rejected": -2.7065858840942383, + "step": 1333 + }, + { + "epoch": 1.7, + "learning_rate": 5.734589472100737e-09, + "logits/chosen": -3.2506775856018066, + "logits/rejected": -3.1524698734283447, + "logps/chosen": -262.0668029785156, + "logps/rejected": -611.0958251953125, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0649994611740112, + "rewards/margins": 4.674936294555664, + "rewards/rejected": -3.6099367141723633, + "step": 1334 + }, + { + "epoch": 1.7, + "learning_rate": 5.6866296007045735e-09, + "logits/chosen": -3.252415657043457, + "logits/rejected": -3.2285449504852295, + "logps/chosen": -249.11863708496094, + "logps/rejected": -661.9228515625, + "loss": 0.2753, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2659194469451904, + "rewards/margins": 5.597379684448242, + "rewards/rejected": -4.331460475921631, + "step": 1335 + }, + { + "epoch": 1.7, + "learning_rate": 5.638859027819409e-09, + "logits/chosen": -3.2930855751037598, + "logits/rejected": -3.096012592315674, + "logps/chosen": -260.46636962890625, + "logps/rejected": -783.5240478515625, + "loss": 0.2925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.935650646686554, + "rewards/margins": 4.163717746734619, + "rewards/rejected": -3.228066921234131, + "step": 1336 + }, + { + "epoch": 1.7, + "learning_rate": 5.591277957512286e-09, + "logits/chosen": -3.2475228309631348, + "logits/rejected": -3.1765971183776855, + "logps/chosen": -280.794677734375, + "logps/rejected": -1249.025634765625, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2726960182189941, + "rewards/margins": 6.67050838470459, + "rewards/rejected": -5.397812366485596, + "step": 1337 + }, + { + "epoch": 1.71, + "learning_rate": 5.543886593040736e-09, + "logits/chosen": -3.212573528289795, + "logits/rejected": -3.147658109664917, + "logps/chosen": -266.1935119628906, + "logps/rejected": -314.56005859375, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4940696954727173, + "rewards/margins": 3.1214241981506348, + "rewards/rejected": -1.6273545026779175, + "step": 1338 + }, + { + "epoch": 1.71, + "learning_rate": 5.4966851368518865e-09, + "logits/chosen": -3.247865676879883, + "logits/rejected": -3.1397509574890137, + "logps/chosen": -279.754638671875, + "logps/rejected": -749.3187255859375, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4323440790176392, + "rewards/margins": 5.7877607345581055, + "rewards/rejected": -4.355416774749756, + "step": 1339 + }, + { + "epoch": 1.71, + "learning_rate": 5.44967379058161e-09, + "logits/chosen": -3.244717597961426, + "logits/rejected": -3.1744439601898193, + "logps/chosen": -240.656982421875, + "logps/rejected": -833.0377807617188, + "loss": 0.2664, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.079724907875061, + "rewards/margins": 6.094480037689209, + "rewards/rejected": -5.0147552490234375, + "step": 1340 + }, + { + "epoch": 1.71, + "learning_rate": 5.402852755053671e-09, + "logits/chosen": -3.210392475128174, + "logits/rejected": -3.097780227661133, + "logps/chosen": -239.86151123046875, + "logps/rejected": -609.6925048828125, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2019188404083252, + "rewards/margins": 4.969228744506836, + "rewards/rejected": -3.7673096656799316, + "step": 1341 + }, + { + "epoch": 1.71, + "learning_rate": 5.356222230278856e-09, + "logits/chosen": -3.2691352367401123, + "logits/rejected": -3.132572650909424, + "logps/chosen": -271.2646789550781, + "logps/rejected": -519.534423828125, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1531692743301392, + "rewards/margins": 4.135502815246582, + "rewards/rejected": -2.9823334217071533, + "step": 1342 + }, + { + "epoch": 1.71, + "learning_rate": 5.30978241545415e-09, + "logits/chosen": -3.2891440391540527, + "logits/rejected": -3.1761908531188965, + "logps/chosen": -241.04933166503906, + "logps/rejected": -661.093505859375, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.25653076171875, + "rewards/margins": 5.166861057281494, + "rewards/rejected": -3.910330295562744, + "step": 1343 + }, + { + "epoch": 1.71, + "learning_rate": 5.263533508961826e-09, + "logits/chosen": -3.260127544403076, + "logits/rejected": -3.0253186225891113, + "logps/chosen": -239.24496459960938, + "logps/rejected": -1221.6788330078125, + "loss": 0.3019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2964591979980469, + "rewards/margins": 7.066915512084961, + "rewards/rejected": -5.770456314086914, + "step": 1344 + }, + { + "epoch": 1.71, + "learning_rate": 5.2174757083686515e-09, + "logits/chosen": -3.1759731769561768, + "logits/rejected": -3.1627769470214844, + "logps/chosen": -292.95196533203125, + "logps/rejected": -738.2971801757812, + "loss": 0.3189, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3454742431640625, + "rewards/margins": 4.569920539855957, + "rewards/rejected": -3.2244462966918945, + "step": 1345 + }, + { + "epoch": 1.72, + "learning_rate": 5.17160921042501e-09, + "logits/chosen": -3.2930407524108887, + "logits/rejected": -3.238699197769165, + "logps/chosen": -270.8834533691406, + "logps/rejected": -1066.7652587890625, + "loss": 0.3094, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.447261095046997, + "rewards/margins": 6.836268901824951, + "rewards/rejected": -5.389007568359375, + "step": 1346 + }, + { + "epoch": 1.72, + "learning_rate": 5.1259342110641036e-09, + "logits/chosen": -3.3003828525543213, + "logits/rejected": -3.0388851165771484, + "logps/chosen": -265.911865234375, + "logps/rejected": -625.85791015625, + "loss": 0.3141, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3541412353515625, + "rewards/margins": 4.1390275955200195, + "rewards/rejected": -2.784886360168457, + "step": 1347 + }, + { + "epoch": 1.72, + "learning_rate": 5.080450905401057e-09, + "logits/chosen": -3.3148279190063477, + "logits/rejected": -3.1790590286254883, + "logps/chosen": -242.75860595703125, + "logps/rejected": -891.3555297851562, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5471465587615967, + "rewards/margins": 5.9185471534729, + "rewards/rejected": -4.371400833129883, + "step": 1348 + }, + { + "epoch": 1.72, + "learning_rate": 5.035159487732133e-09, + "logits/chosen": -3.204906463623047, + "logits/rejected": -3.110980987548828, + "logps/chosen": -338.23675537109375, + "logps/rejected": -609.135498046875, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3533493280410767, + "rewards/margins": 4.495337009429932, + "rewards/rejected": -3.1419878005981445, + "step": 1349 + }, + { + "epoch": 1.72, + "learning_rate": 4.9900601515338705e-09, + "logits/chosen": -3.2471747398376465, + "logits/rejected": -3.1347711086273193, + "logps/chosen": -273.3313293457031, + "logps/rejected": -571.9300537109375, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3758834600448608, + "rewards/margins": 4.839075088500977, + "rewards/rejected": -3.463191270828247, + "step": 1350 + }, + { + "epoch": 1.72, + "learning_rate": 4.945153089462295e-09, + "logits/chosen": -3.228422164916992, + "logits/rejected": -3.1306183338165283, + "logps/chosen": -280.76739501953125, + "logps/rejected": -541.4971923828125, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5955734252929688, + "rewards/margins": 4.76309061050415, + "rewards/rejected": -3.1675171852111816, + "step": 1351 + }, + { + "epoch": 1.72, + "learning_rate": 4.9004384933520545e-09, + "logits/chosen": -3.272963523864746, + "logits/rejected": -3.185720920562744, + "logps/chosen": -239.23892211914062, + "logps/rejected": -675.8472900390625, + "loss": 0.3041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4088913202285767, + "rewards/margins": 5.3405656814575195, + "rewards/rejected": -3.931674003601074, + "step": 1352 + }, + { + "epoch": 1.72, + "learning_rate": 4.8559165542156164e-09, + "logits/chosen": -3.2363646030426025, + "logits/rejected": -3.126807689666748, + "logps/chosen": -278.67498779296875, + "logps/rejected": -456.1668701171875, + "loss": 0.2858, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.05804443359375, + "rewards/margins": 3.8116226196289062, + "rewards/rejected": -2.7535781860351562, + "step": 1353 + }, + { + "epoch": 1.73, + "learning_rate": 4.811587462242461e-09, + "logits/chosen": -3.2726926803588867, + "logits/rejected": -3.219695568084717, + "logps/chosen": -280.04620361328125, + "logps/rejected": -605.6968383789062, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4239234924316406, + "rewards/margins": 5.176635265350342, + "rewards/rejected": -3.752711772918701, + "step": 1354 + }, + { + "epoch": 1.73, + "learning_rate": 4.767451406798251e-09, + "logits/chosen": -3.234760284423828, + "logits/rejected": -3.0853166580200195, + "logps/chosen": -234.3835906982422, + "logps/rejected": -538.7238159179688, + "loss": 0.2791, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3347183465957642, + "rewards/margins": 4.15740966796875, + "rewards/rejected": -2.8226914405822754, + "step": 1355 + }, + { + "epoch": 1.73, + "learning_rate": 4.7235085764240625e-09, + "logits/chosen": -3.2596335411071777, + "logits/rejected": -3.1311516761779785, + "logps/chosen": -282.6192626953125, + "logps/rejected": -486.21826171875, + "loss": 0.2918, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.602940320968628, + "rewards/margins": 4.250312805175781, + "rewards/rejected": -2.6473724842071533, + "step": 1356 + }, + { + "epoch": 1.73, + "learning_rate": 4.679759158835511e-09, + "logits/chosen": -3.2891926765441895, + "logits/rejected": -3.022812604904175, + "logps/chosen": -220.17941284179688, + "logps/rejected": -1248.412109375, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4957726001739502, + "rewards/margins": 7.034956932067871, + "rewards/rejected": -5.5391845703125, + "step": 1357 + }, + { + "epoch": 1.73, + "learning_rate": 4.636203340922007e-09, + "logits/chosen": -3.3558621406555176, + "logits/rejected": -3.253469944000244, + "logps/chosen": -246.02035522460938, + "logps/rejected": -530.2371826171875, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3494278192520142, + "rewards/margins": 4.802206516265869, + "rewards/rejected": -3.4527785778045654, + "step": 1358 + }, + { + "epoch": 1.73, + "learning_rate": 4.592841308745932e-09, + "logits/chosen": -3.2762343883514404, + "logits/rejected": -3.1036596298217773, + "logps/chosen": -257.8388977050781, + "logps/rejected": -627.8114624023438, + "loss": 0.2823, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2925605773925781, + "rewards/margins": 4.357945919036865, + "rewards/rejected": -3.065385341644287, + "step": 1359 + }, + { + "epoch": 1.73, + "learning_rate": 4.549673247541874e-09, + "logits/chosen": -3.2105865478515625, + "logits/rejected": -3.1588265895843506, + "logps/chosen": -249.53732299804688, + "logps/rejected": -866.7916259765625, + "loss": 0.2755, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2433288097381592, + "rewards/margins": 6.60284423828125, + "rewards/rejected": -5.35951566696167, + "step": 1360 + }, + { + "epoch": 1.73, + "learning_rate": 4.5066993417157865e-09, + "logits/chosen": -3.2520687580108643, + "logits/rejected": -3.1622960567474365, + "logps/chosen": -238.22299194335938, + "logps/rejected": -843.8521118164062, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0102813243865967, + "rewards/margins": 5.608334541320801, + "rewards/rejected": -4.598052978515625, + "step": 1361 + }, + { + "epoch": 1.74, + "learning_rate": 4.463919774844233e-09, + "logits/chosen": -3.2477777004241943, + "logits/rejected": -3.1399807929992676, + "logps/chosen": -239.39076232910156, + "logps/rejected": -391.8152160644531, + "loss": 0.3079, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0325523614883423, + "rewards/margins": 3.090561628341675, + "rewards/rejected": -2.058009386062622, + "step": 1362 + }, + { + "epoch": 1.74, + "learning_rate": 4.421334729673593e-09, + "logits/chosen": -3.242182731628418, + "logits/rejected": -3.1590476036071777, + "logps/chosen": -253.85581970214844, + "logps/rejected": -782.27978515625, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0697990655899048, + "rewards/margins": 5.43367862701416, + "rewards/rejected": -4.363879203796387, + "step": 1363 + }, + { + "epoch": 1.74, + "learning_rate": 4.37894438811931e-09, + "logits/chosen": -3.2829902172088623, + "logits/rejected": -3.2298431396484375, + "logps/chosen": -306.5185241699219, + "logps/rejected": -790.7815551757812, + "loss": 0.2966, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1004669666290283, + "rewards/margins": 6.220813274383545, + "rewards/rejected": -5.1203460693359375, + "step": 1364 + }, + { + "epoch": 1.74, + "learning_rate": 4.336748931265066e-09, + "logits/chosen": -3.2054505348205566, + "logits/rejected": -3.1949119567871094, + "logps/chosen": -286.120849609375, + "logps/rejected": -963.7283935546875, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.554931640625, + "rewards/margins": 6.489584445953369, + "rewards/rejected": -4.934652805328369, + "step": 1365 + }, + { + "epoch": 1.74, + "learning_rate": 4.294748539362031e-09, + "logits/chosen": -3.203388214111328, + "logits/rejected": -3.1544768810272217, + "logps/chosen": -274.362060546875, + "logps/rejected": -442.97772216796875, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4656891822814941, + "rewards/margins": 3.878645420074463, + "rewards/rejected": -2.4129562377929688, + "step": 1366 + }, + { + "epoch": 1.74, + "learning_rate": 4.252943391828107e-09, + "logits/chosen": -3.2060084342956543, + "logits/rejected": -3.023780345916748, + "logps/chosen": -237.9934844970703, + "logps/rejected": -686.250244140625, + "loss": 0.2731, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2993751764297485, + "rewards/margins": 4.0044732093811035, + "rewards/rejected": -2.7050979137420654, + "step": 1367 + }, + { + "epoch": 1.74, + "learning_rate": 4.2113336672471245e-09, + "logits/chosen": -3.1783604621887207, + "logits/rejected": -3.087055206298828, + "logps/chosen": -276.8348388671875, + "logps/rejected": -387.4041748046875, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.400202989578247, + "rewards/margins": 3.3266844749450684, + "rewards/rejected": -1.9264817237854004, + "step": 1368 + }, + { + "epoch": 1.75, + "learning_rate": 4.16991954336815e-09, + "logits/chosen": -3.274130344390869, + "logits/rejected": -3.1535518169403076, + "logps/chosen": -265.3199157714844, + "logps/rejected": -451.0856018066406, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4251831769943237, + "rewards/margins": 4.089991569519043, + "rewards/rejected": -2.664808750152588, + "step": 1369 + }, + { + "epoch": 1.75, + "learning_rate": 4.128701197104628e-09, + "logits/chosen": -3.2377474308013916, + "logits/rejected": -3.166386127471924, + "logps/chosen": -254.43453979492188, + "logps/rejected": -563.6064453125, + "loss": 0.2706, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4116027355194092, + "rewards/margins": 5.49029541015625, + "rewards/rejected": -4.078692436218262, + "step": 1370 + }, + { + "epoch": 1.75, + "learning_rate": 4.087678804533712e-09, + "logits/chosen": -3.2301361560821533, + "logits/rejected": -3.1338109970092773, + "logps/chosen": -295.97412109375, + "logps/rejected": -957.6768798828125, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3961243629455566, + "rewards/margins": 6.2666168212890625, + "rewards/rejected": -4.870492935180664, + "step": 1371 + }, + { + "epoch": 1.75, + "learning_rate": 4.0468525408954454e-09, + "logits/chosen": -3.208578586578369, + "logits/rejected": -3.1405880451202393, + "logps/chosen": -272.8677978515625, + "logps/rejected": -733.5648803710938, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2459626197814941, + "rewards/margins": 5.160104751586914, + "rewards/rejected": -3.914141893386841, + "step": 1372 + }, + { + "epoch": 1.75, + "learning_rate": 4.006222580592089e-09, + "logits/chosen": -3.2351737022399902, + "logits/rejected": -3.083331346511841, + "logps/chosen": -309.07391357421875, + "logps/rejected": -1235.8594970703125, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2685363292694092, + "rewards/margins": 6.977121353149414, + "rewards/rejected": -5.708584785461426, + "step": 1373 + }, + { + "epoch": 1.75, + "learning_rate": 3.9657890971873e-09, + "logits/chosen": -3.216728448867798, + "logits/rejected": -3.12680721282959, + "logps/chosen": -243.64682006835938, + "logps/rejected": -700.6415405273438, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3351707458496094, + "rewards/margins": 4.5968194007873535, + "rewards/rejected": -3.261648654937744, + "step": 1374 + }, + { + "epoch": 1.75, + "learning_rate": 3.925552263405424e-09, + "logits/chosen": -3.2892532348632812, + "logits/rejected": -3.0836031436920166, + "logps/chosen": -240.35977172851562, + "logps/rejected": -1422.771240234375, + "loss": 0.2804, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.437703013420105, + "rewards/margins": 7.085542678833008, + "rewards/rejected": -5.647839546203613, + "step": 1375 + }, + { + "epoch": 1.75, + "learning_rate": 3.8855122511307626e-09, + "logits/chosen": -3.2145943641662598, + "logits/rejected": -3.1468210220336914, + "logps/chosen": -276.519287109375, + "logps/rejected": -4299.9521484375, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3278671503067017, + "rewards/margins": 5.579986572265625, + "rewards/rejected": -4.252119541168213, + "step": 1376 + }, + { + "epoch": 1.76, + "learning_rate": 3.845669231406845e-09, + "logits/chosen": -3.269918441772461, + "logits/rejected": -3.122570514678955, + "logps/chosen": -254.4256134033203, + "logps/rejected": -735.65478515625, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4481338262557983, + "rewards/margins": 5.63974142074585, + "rewards/rejected": -4.191607475280762, + "step": 1377 + }, + { + "epoch": 1.76, + "learning_rate": 3.8060233744356625e-09, + "logits/chosen": -3.2889516353607178, + "logits/rejected": -3.1139349937438965, + "logps/chosen": -273.6466064453125, + "logps/rejected": -1321.7718505859375, + "loss": 0.2991, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4681510925292969, + "rewards/margins": 7.674291610717773, + "rewards/rejected": -6.206140518188477, + "step": 1378 + }, + { + "epoch": 1.76, + "learning_rate": 3.766574849576976e-09, + "logits/chosen": -3.305312395095825, + "logits/rejected": -3.259568214416504, + "logps/chosen": -252.76034545898438, + "logps/rejected": -665.082763671875, + "loss": 0.2917, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.556176781654358, + "rewards/margins": 5.044421195983887, + "rewards/rejected": -3.4882447719573975, + "step": 1379 + }, + { + "epoch": 1.76, + "learning_rate": 3.727323825347578e-09, + "logits/chosen": -3.214681386947632, + "logits/rejected": -3.006826877593994, + "logps/chosen": -281.22808837890625, + "logps/rejected": -835.8363647460938, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2944656610488892, + "rewards/margins": 4.3540849685668945, + "rewards/rejected": -3.059619188308716, + "step": 1380 + }, + { + "epoch": 1.76, + "learning_rate": 3.6882704694205624e-09, + "logits/chosen": -3.175670623779297, + "logits/rejected": -3.088498830795288, + "logps/chosen": -279.0, + "logps/rejected": -475.3082580566406, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.289027452468872, + "rewards/margins": 3.4290056228637695, + "rewards/rejected": -2.1399781703948975, + "step": 1381 + }, + { + "epoch": 1.76, + "learning_rate": 3.649414948624652e-09, + "logits/chosen": -3.1925008296966553, + "logits/rejected": -3.0441315174102783, + "logps/chosen": -287.559814453125, + "logps/rejected": -592.2987060546875, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4167206287384033, + "rewards/margins": 4.461688041687012, + "rewards/rejected": -3.0449676513671875, + "step": 1382 + }, + { + "epoch": 1.76, + "learning_rate": 3.6107574289434285e-09, + "logits/chosen": -3.1949894428253174, + "logits/rejected": -3.0197136402130127, + "logps/chosen": -271.74981689453125, + "logps/rejected": -289.9399108886719, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2858765125274658, + "rewards/margins": 2.4626970291137695, + "rewards/rejected": -1.1768203973770142, + "step": 1383 + }, + { + "epoch": 1.76, + "learning_rate": 3.5722980755146515e-09, + "logits/chosen": -3.274247407913208, + "logits/rejected": -3.168148994445801, + "logps/chosen": -253.99423217773438, + "logps/rejected": -815.9724731445312, + "loss": 0.3006, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2354271411895752, + "rewards/margins": 5.930092811584473, + "rewards/rejected": -4.694665431976318, + "step": 1384 + }, + { + "epoch": 1.77, + "learning_rate": 3.534037052629546e-09, + "logits/chosen": -3.2550439834594727, + "logits/rejected": -3.157261371612549, + "logps/chosen": -224.27542114257812, + "logps/rejected": -1206.01171875, + "loss": 0.2662, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2214065790176392, + "rewards/margins": 8.201581954956055, + "rewards/rejected": -6.980175971984863, + "step": 1385 + }, + { + "epoch": 1.77, + "learning_rate": 3.4959745237321427e-09, + "logits/chosen": -3.227649211883545, + "logits/rejected": -3.0780158042907715, + "logps/chosen": -247.96224975585938, + "logps/rejected": -412.9747619628906, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3302048444747925, + "rewards/margins": 3.679121494293213, + "rewards/rejected": -2.348916530609131, + "step": 1386 + }, + { + "epoch": 1.77, + "learning_rate": 3.4581106514184944e-09, + "logits/chosen": -3.244807243347168, + "logits/rejected": -3.1358048915863037, + "logps/chosen": -302.32904052734375, + "logps/rejected": -412.8042297363281, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4359543323516846, + "rewards/margins": 3.593928575515747, + "rewards/rejected": -2.1579742431640625, + "step": 1387 + }, + { + "epoch": 1.77, + "learning_rate": 3.4204455974360556e-09, + "logits/chosen": -3.175457000732422, + "logits/rejected": -3.125798225402832, + "logps/chosen": -297.52020263671875, + "logps/rejected": -660.6435546875, + "loss": 0.2774, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4553115367889404, + "rewards/margins": 5.335456848144531, + "rewards/rejected": -3.880145311355591, + "step": 1388 + }, + { + "epoch": 1.77, + "learning_rate": 3.382979522682955e-09, + "logits/chosen": -3.2274537086486816, + "logits/rejected": -3.165987014770508, + "logps/chosen": -305.463623046875, + "logps/rejected": -563.8214721679688, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.277867078781128, + "rewards/margins": 4.513910293579102, + "rewards/rejected": -3.2360429763793945, + "step": 1389 + }, + { + "epoch": 1.77, + "learning_rate": 3.3457125872073388e-09, + "logits/chosen": -3.2747347354888916, + "logits/rejected": -3.0628299713134766, + "logps/chosen": -235.9931640625, + "logps/rejected": -828.4932861328125, + "loss": 0.2708, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2699570655822754, + "rewards/margins": 5.415410041809082, + "rewards/rejected": -4.145452976226807, + "step": 1390 + }, + { + "epoch": 1.77, + "learning_rate": 3.3086449502066514e-09, + "logits/chosen": -3.238192081451416, + "logits/rejected": -3.183577537536621, + "logps/chosen": -290.5245666503906, + "logps/rejected": -971.1666870117188, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2589995861053467, + "rewards/margins": 7.083984375, + "rewards/rejected": -5.824985027313232, + "step": 1391 + }, + { + "epoch": 1.77, + "learning_rate": 3.2717767700269627e-09, + "logits/chosen": -3.268106460571289, + "logits/rejected": -3.074680805206299, + "logps/chosen": -244.69253540039062, + "logps/rejected": -1549.5347900390625, + "loss": 0.2656, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.340692162513733, + "rewards/margins": 8.192633628845215, + "rewards/rejected": -6.851941108703613, + "step": 1392 + }, + { + "epoch": 1.78, + "learning_rate": 3.2351082041623123e-09, + "logits/chosen": -3.268418788909912, + "logits/rejected": -3.304553508758545, + "logps/chosen": -243.1018829345703, + "logps/rejected": -4088.85205078125, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.305903673171997, + "rewards/margins": 4.93801736831665, + "rewards/rejected": -3.632113456726074, + "step": 1393 + }, + { + "epoch": 1.78, + "learning_rate": 3.198639409254017e-09, + "logits/chosen": -3.260511875152588, + "logits/rejected": -3.1410436630249023, + "logps/chosen": -277.12921142578125, + "logps/rejected": -828.878662109375, + "loss": 0.2868, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485987901687622, + "rewards/margins": 5.344768047332764, + "rewards/rejected": -3.8587799072265625, + "step": 1394 + }, + { + "epoch": 1.78, + "learning_rate": 3.1623705410900314e-09, + "logits/chosen": -3.16475510597229, + "logits/rejected": -3.110339403152466, + "logps/chosen": -302.0893249511719, + "logps/rejected": -301.0153503417969, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.485345482826233, + "rewards/margins": 3.216418504714966, + "rewards/rejected": -1.7310731410980225, + "step": 1395 + }, + { + "epoch": 1.78, + "learning_rate": 3.1263017546042324e-09, + "logits/chosen": -3.255809783935547, + "logits/rejected": -3.1222407817840576, + "logps/chosen": -263.9443054199219, + "logps/rejected": -771.8114013671875, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2216278314590454, + "rewards/margins": 5.751940727233887, + "rewards/rejected": -4.530313014984131, + "step": 1396 + }, + { + "epoch": 1.78, + "learning_rate": 3.0904332038757976e-09, + "logits/chosen": -3.3107643127441406, + "logits/rejected": -3.2011051177978516, + "logps/chosen": -286.39117431640625, + "logps/rejected": -572.5445556640625, + "loss": 0.3099, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.310943603515625, + "rewards/margins": 4.5931077003479, + "rewards/rejected": -3.2821640968322754, + "step": 1397 + }, + { + "epoch": 1.78, + "learning_rate": 3.054765042128521e-09, + "logits/chosen": -3.2589221000671387, + "logits/rejected": -3.123837471008301, + "logps/chosen": -290.04364013671875, + "logps/rejected": -791.8497314453125, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4887192249298096, + "rewards/margins": 6.048594951629639, + "rewards/rejected": -4.55987548828125, + "step": 1398 + }, + { + "epoch": 1.78, + "learning_rate": 3.0192974217302048e-09, + "logits/chosen": -3.2315475940704346, + "logits/rejected": -3.189451217651367, + "logps/chosen": -266.7807922363281, + "logps/rejected": -694.9759521484375, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4177277088165283, + "rewards/margins": 5.445452690124512, + "rewards/rejected": -4.0277252197265625, + "step": 1399 + }, + { + "epoch": 1.78, + "learning_rate": 2.9840304941919412e-09, + "logits/chosen": -3.285889148712158, + "logits/rejected": -3.0325515270233154, + "logps/chosen": -249.6952362060547, + "logps/rejected": -947.7554931640625, + "loss": 0.277, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.582776665687561, + "rewards/margins": 5.604711532592773, + "rewards/rejected": -4.021934509277344, + "step": 1400 + }, + { + "epoch": 1.79, + "learning_rate": 2.948964410167509e-09, + "logits/chosen": -3.287749767303467, + "logits/rejected": -3.0696897506713867, + "logps/chosen": -205.76132202148438, + "logps/rejected": -461.9718322753906, + "loss": 0.2792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3917427062988281, + "rewards/margins": 3.2781381607055664, + "rewards/rejected": -1.8863953351974487, + "step": 1401 + }, + { + "epoch": 1.79, + "learning_rate": 2.9140993194527286e-09, + "logits/chosen": -3.225745677947998, + "logits/rejected": -3.08948016166687, + "logps/chosen": -265.206298828125, + "logps/rejected": -1607.268310546875, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1177444458007812, + "rewards/margins": 8.720369338989258, + "rewards/rejected": -7.602624893188477, + "step": 1402 + }, + { + "epoch": 1.79, + "learning_rate": 2.8794353709848172e-09, + "logits/chosen": -3.217707872390747, + "logits/rejected": -3.0888028144836426, + "logps/chosen": -293.8306884765625, + "logps/rejected": -705.4567260742188, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2691559791564941, + "rewards/margins": 5.074986457824707, + "rewards/rejected": -3.805830478668213, + "step": 1403 + }, + { + "epoch": 1.79, + "learning_rate": 2.8449727128417366e-09, + "logits/chosen": -3.178694725036621, + "logits/rejected": -3.1418471336364746, + "logps/chosen": -272.8133239746094, + "logps/rejected": -925.0048217773438, + "loss": 0.2686, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3539161682128906, + "rewards/margins": 6.063223838806152, + "rewards/rejected": -4.709307670593262, + "step": 1404 + }, + { + "epoch": 1.79, + "learning_rate": 2.81071149224158e-09, + "logits/chosen": -3.259030818939209, + "logits/rejected": -3.178356170654297, + "logps/chosen": -238.30245971679688, + "logps/rejected": -662.1305541992188, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.318037509918213, + "rewards/margins": 5.393567085266113, + "rewards/rejected": -4.0755295753479, + "step": 1405 + }, + { + "epoch": 1.79, + "learning_rate": 2.7766518555419394e-09, + "logits/chosen": -3.2694857120513916, + "logits/rejected": -3.162674903869629, + "logps/chosen": -258.3941955566406, + "logps/rejected": -418.4207458496094, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4608116149902344, + "rewards/margins": 3.722667694091797, + "rewards/rejected": -2.2618560791015625, + "step": 1406 + }, + { + "epoch": 1.79, + "learning_rate": 2.742793948239258e-09, + "logits/chosen": -3.1907799243927, + "logits/rejected": -3.052908420562744, + "logps/chosen": -243.3566436767578, + "logps/rejected": -1069.71240234375, + "loss": 0.2792, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1158294677734375, + "rewards/margins": 6.133069038391113, + "rewards/rejected": -5.017239570617676, + "step": 1407 + }, + { + "epoch": 1.79, + "learning_rate": 2.709137914968268e-09, + "logits/chosen": -3.2028603553771973, + "logits/rejected": -3.0360512733459473, + "logps/chosen": -318.004638671875, + "logps/rejected": -787.713134765625, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4642410278320312, + "rewards/margins": 4.3272905349731445, + "rewards/rejected": -2.863049268722534, + "step": 1408 + }, + { + "epoch": 1.8, + "learning_rate": 2.675683899501302e-09, + "logits/chosen": -3.25363826751709, + "logits/rejected": -3.1601691246032715, + "logps/chosen": -253.89910888671875, + "logps/rejected": -512.0510864257812, + "loss": 0.2766, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3341248035430908, + "rewards/margins": 4.2084455490112305, + "rewards/rejected": -2.8743209838867188, + "step": 1409 + }, + { + "epoch": 1.8, + "learning_rate": 2.642432044747711e-09, + "logits/chosen": -3.2408084869384766, + "logits/rejected": -3.1667871475219727, + "logps/chosen": -267.4140625, + "logps/rejected": -763.494140625, + "loss": 0.2935, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2532395124435425, + "rewards/margins": 6.1804704666137695, + "rewards/rejected": -4.9272308349609375, + "step": 1410 + }, + { + "epoch": 1.8, + "learning_rate": 2.609382492753265e-09, + "logits/chosen": -3.2774887084960938, + "logits/rejected": -3.1654062271118164, + "logps/chosen": -257.05279541015625, + "logps/rejected": -688.492919921875, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5037651062011719, + "rewards/margins": 5.321201324462891, + "rewards/rejected": -3.8174362182617188, + "step": 1411 + }, + { + "epoch": 1.8, + "learning_rate": 2.57653538469953e-09, + "logits/chosen": -3.185164451599121, + "logits/rejected": -3.1367673873901367, + "logps/chosen": -264.8451843261719, + "logps/rejected": -684.6116333007812, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3810906410217285, + "rewards/margins": 5.010046005249023, + "rewards/rejected": -3.628955125808716, + "step": 1412 + }, + { + "epoch": 1.8, + "learning_rate": 2.5438908609032693e-09, + "logits/chosen": -3.2159688472747803, + "logits/rejected": -3.169858455657959, + "logps/chosen": -290.5953063964844, + "logps/rejected": -633.2196044921875, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9563385248184204, + "rewards/margins": 4.2186431884765625, + "rewards/rejected": -3.2623047828674316, + "step": 1413 + }, + { + "epoch": 1.8, + "learning_rate": 2.51144906081584e-09, + "logits/chosen": -3.213127613067627, + "logits/rejected": -3.055083990097046, + "logps/chosen": -249.43125915527344, + "logps/rejected": -647.9185791015625, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2460448741912842, + "rewards/margins": 3.9700803756713867, + "rewards/rejected": -2.7240357398986816, + "step": 1414 + }, + { + "epoch": 1.8, + "learning_rate": 2.479210123022596e-09, + "logits/chosen": -3.242098569869995, + "logits/rejected": -3.2017276287078857, + "logps/chosen": -281.9034118652344, + "logps/rejected": -531.2098388671875, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9525588750839233, + "rewards/margins": 3.699749708175659, + "rewards/rejected": -2.7471909523010254, + "step": 1415 + }, + { + "epoch": 1.8, + "learning_rate": 2.4471741852423233e-09, + "logits/chosen": -3.262096881866455, + "logits/rejected": -3.133486747741699, + "logps/chosen": -270.31317138671875, + "logps/rejected": -529.7406005859375, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5921471118927002, + "rewards/margins": 4.764856815338135, + "rewards/rejected": -3.1727097034454346, + "step": 1416 + }, + { + "epoch": 1.81, + "learning_rate": 2.415341384326608e-09, + "logits/chosen": -3.2562389373779297, + "logits/rejected": -3.0979480743408203, + "logps/chosen": -287.88739013671875, + "logps/rejected": -691.7872314453125, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7147353887557983, + "rewards/margins": 5.332612991333008, + "rewards/rejected": -3.617877244949341, + "step": 1417 + }, + { + "epoch": 1.81, + "learning_rate": 2.3837118562592794e-09, + "logits/chosen": -3.3230104446411133, + "logits/rejected": -3.1468281745910645, + "logps/chosen": -243.8573760986328, + "logps/rejected": -399.3331298828125, + "loss": 0.303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1783554553985596, + "rewards/margins": 3.013510227203369, + "rewards/rejected": -1.8351547718048096, + "step": 1418 + }, + { + "epoch": 1.81, + "learning_rate": 2.3522857361558237e-09, + "logits/chosen": -3.319322347640991, + "logits/rejected": -3.146756410598755, + "logps/chosen": -266.5150146484375, + "logps/rejected": -467.7215270996094, + "loss": 0.2848, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4218552112579346, + "rewards/margins": 4.0186767578125, + "rewards/rejected": -2.5968217849731445, + "step": 1419 + }, + { + "epoch": 1.81, + "learning_rate": 2.3210631582627928e-09, + "logits/chosen": -3.2356176376342773, + "logits/rejected": -3.026653528213501, + "logps/chosen": -257.99774169921875, + "logps/rejected": -906.7539672851562, + "loss": 0.287, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2321395874023438, + "rewards/margins": 5.175337791442871, + "rewards/rejected": -3.943197727203369, + "step": 1420 + }, + { + "epoch": 1.81, + "learning_rate": 2.2900442559572797e-09, + "logits/chosen": -3.277571201324463, + "logits/rejected": -3.115875720977783, + "logps/chosen": -274.2532043457031, + "logps/rejected": -1258.874267578125, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1609947681427002, + "rewards/margins": 5.394804954528809, + "rewards/rejected": -4.2338104248046875, + "step": 1421 + }, + { + "epoch": 1.81, + "learning_rate": 2.259229161746279e-09, + "logits/chosen": -3.147901773452759, + "logits/rejected": -2.9558186531066895, + "logps/chosen": -263.0162658691406, + "logps/rejected": -814.405029296875, + "loss": 0.3275, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1386017799377441, + "rewards/margins": 4.229526042938232, + "rewards/rejected": -3.0909242630004883, + "step": 1422 + }, + { + "epoch": 1.81, + "learning_rate": 2.22861800726617e-09, + "logits/chosen": -3.291133403778076, + "logits/rejected": -3.2269253730773926, + "logps/chosen": -286.1787109375, + "logps/rejected": -803.93994140625, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4490982294082642, + "rewards/margins": 6.430067539215088, + "rewards/rejected": -4.980969429016113, + "step": 1423 + }, + { + "epoch": 1.82, + "learning_rate": 2.198210923282118e-09, + "logits/chosen": -3.2579245567321777, + "logits/rejected": -3.1000490188598633, + "logps/chosen": -250.44842529296875, + "logps/rejected": -389.9983215332031, + "loss": 0.2971, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4570221900939941, + "rewards/margins": 3.5889344215393066, + "rewards/rejected": -2.1319122314453125, + "step": 1424 + }, + { + "epoch": 1.82, + "learning_rate": 2.1680080396875745e-09, + "logits/chosen": -3.297912120819092, + "logits/rejected": -3.156129837036133, + "logps/chosen": -271.36688232421875, + "logps/rejected": -398.915771484375, + "loss": 0.2883, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.386505126953125, + "rewards/margins": 3.406620979309082, + "rewards/rejected": -2.020115613937378, + "step": 1425 + }, + { + "epoch": 1.82, + "learning_rate": 2.1380094855036614e-09, + "logits/chosen": -3.239912986755371, + "logits/rejected": -3.221177816390991, + "logps/chosen": -263.128662109375, + "logps/rejected": -585.03271484375, + "loss": 0.2903, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.307830810546875, + "rewards/margins": 4.491072177886963, + "rewards/rejected": -3.183241367340088, + "step": 1426 + }, + { + "epoch": 1.82, + "learning_rate": 2.108215388878637e-09, + "logits/chosen": -3.2097110748291016, + "logits/rejected": -3.1123695373535156, + "logps/chosen": -232.6932373046875, + "logps/rejected": -449.36700439453125, + "loss": 0.2821, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.308079481124878, + "rewards/margins": 3.64251708984375, + "rewards/rejected": -2.334437608718872, + "step": 1427 + }, + { + "epoch": 1.82, + "learning_rate": 2.0786258770873644e-09, + "logits/chosen": -3.1781582832336426, + "logits/rejected": -3.1679847240448, + "logps/chosen": -271.48443603515625, + "logps/rejected": -3643.83154296875, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1845413446426392, + "rewards/margins": 8.667948722839355, + "rewards/rejected": -7.483407497406006, + "step": 1428 + }, + { + "epoch": 1.82, + "learning_rate": 2.0492410765307666e-09, + "logits/chosen": -3.2958993911743164, + "logits/rejected": -3.077244758605957, + "logps/chosen": -246.21340942382812, + "logps/rejected": -1169.84765625, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5495002269744873, + "rewards/margins": 7.2500410079956055, + "rewards/rejected": -5.700540542602539, + "step": 1429 + }, + { + "epoch": 1.82, + "learning_rate": 2.020061112735266e-09, + "logits/chosen": -3.3099255561828613, + "logits/rejected": -3.083230972290039, + "logps/chosen": -221.3106689453125, + "logps/rejected": -1330.922607421875, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1467567682266235, + "rewards/margins": 6.881473541259766, + "rewards/rejected": -5.734716892242432, + "step": 1430 + }, + { + "epoch": 1.82, + "learning_rate": 1.991086110352264e-09, + "logits/chosen": -3.2725818157196045, + "logits/rejected": -3.132322072982788, + "logps/chosen": -256.8466491699219, + "logps/rejected": -592.7978515625, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3021888732910156, + "rewards/margins": 4.012622833251953, + "rewards/rejected": -2.7104339599609375, + "step": 1431 + }, + { + "epoch": 1.83, + "learning_rate": 1.9623161931575925e-09, + "logits/chosen": -3.2834157943725586, + "logits/rejected": -3.1928811073303223, + "logps/chosen": -248.6999969482422, + "logps/rejected": -820.5260620117188, + "loss": 0.2832, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.144317626953125, + "rewards/margins": 5.220278739929199, + "rewards/rejected": -4.075961112976074, + "step": 1432 + }, + { + "epoch": 1.83, + "learning_rate": 1.933751484051027e-09, + "logits/chosen": -3.2149178981781006, + "logits/rejected": -3.0149669647216797, + "logps/chosen": -283.52880859375, + "logps/rejected": -1765.9884033203125, + "loss": 0.2787, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.423187255859375, + "rewards/margins": 8.330669403076172, + "rewards/rejected": -6.907483100891113, + "step": 1433 + }, + { + "epoch": 1.83, + "learning_rate": 1.905392105055703e-09, + "logits/chosen": -3.2469279766082764, + "logits/rejected": -3.1188502311706543, + "logps/chosen": -255.812255859375, + "logps/rejected": -686.8544921875, + "loss": 0.2805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2525222301483154, + "rewards/margins": 5.0444841384887695, + "rewards/rejected": -3.791961908340454, + "step": 1434 + }, + { + "epoch": 1.83, + "learning_rate": 1.8772381773176416e-09, + "logits/chosen": -3.264902114868164, + "logits/rejected": -3.1408448219299316, + "logps/chosen": -251.3064727783203, + "logps/rejected": -320.42333984375, + "loss": 0.2732, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2472282648086548, + "rewards/margins": 3.182147979736328, + "rewards/rejected": -1.934919834136963, + "step": 1435 + }, + { + "epoch": 1.83, + "learning_rate": 1.8492898211051989e-09, + "logits/chosen": -3.161987066268921, + "logits/rejected": -3.108494281768799, + "logps/chosen": -248.93313598632812, + "logps/rejected": -353.72393798828125, + "loss": 0.2747, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3800156116485596, + "rewards/margins": 3.2075514793395996, + "rewards/rejected": -1.8275359869003296, + "step": 1436 + }, + { + "epoch": 1.83, + "learning_rate": 1.8215471558085838e-09, + "logits/chosen": -3.305980682373047, + "logits/rejected": -3.1867899894714355, + "logps/chosen": -261.5182800292969, + "logps/rejected": -789.6019287109375, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2126686573028564, + "rewards/margins": 5.240024566650391, + "rewards/rejected": -4.027356147766113, + "step": 1437 + }, + { + "epoch": 1.83, + "learning_rate": 1.7940102999393193e-09, + "logits/chosen": -3.265796184539795, + "logits/rejected": -3.1608352661132812, + "logps/chosen": -272.400634765625, + "logps/rejected": -298.10736083984375, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1720092296600342, + "rewards/margins": 2.653111457824707, + "rewards/rejected": -1.4811019897460938, + "step": 1438 + }, + { + "epoch": 1.83, + "learning_rate": 1.7666793711297646e-09, + "logits/chosen": -3.1948463916778564, + "logits/rejected": -3.061565399169922, + "logps/chosen": -243.087158203125, + "logps/rejected": -641.3599853515625, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4191887378692627, + "rewards/margins": 4.785935401916504, + "rewards/rejected": -3.366746664047241, + "step": 1439 + }, + { + "epoch": 1.84, + "learning_rate": 1.7395544861325716e-09, + "logits/chosen": -3.2693300247192383, + "logits/rejected": -3.201962947845459, + "logps/chosen": -272.277099609375, + "logps/rejected": -568.7134399414062, + "loss": 0.2867, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2283813953399658, + "rewards/margins": 4.373181343078613, + "rewards/rejected": -3.1447997093200684, + "step": 1440 + }, + { + "epoch": 1.84, + "learning_rate": 1.7126357608202246e-09, + "logits/chosen": -3.1860828399658203, + "logits/rejected": -3.0157527923583984, + "logps/chosen": -286.3592529296875, + "logps/rejected": -805.2037353515625, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3062942028045654, + "rewards/margins": 5.0003767013549805, + "rewards/rejected": -3.694082736968994, + "step": 1441 + }, + { + "epoch": 1.84, + "learning_rate": 1.6859233101845506e-09, + "logits/chosen": -3.2533843517303467, + "logits/rejected": -3.1765878200531006, + "logps/chosen": -287.9850158691406, + "logps/rejected": -864.7113037109375, + "loss": 0.3107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9677002429962158, + "rewards/margins": 5.314401149749756, + "rewards/rejected": -4.346701145172119, + "step": 1442 + }, + { + "epoch": 1.84, + "learning_rate": 1.6594172483361756e-09, + "logits/chosen": -3.2949070930480957, + "logits/rejected": -3.1912219524383545, + "logps/chosen": -257.81109619140625, + "logps/rejected": -885.7154541015625, + "loss": 0.3002, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2076020240783691, + "rewards/margins": 6.833462715148926, + "rewards/rejected": -5.625860691070557, + "step": 1443 + }, + { + "epoch": 1.84, + "learning_rate": 1.6331176885040876e-09, + "logits/chosen": -3.2270097732543945, + "logits/rejected": -3.004728317260742, + "logps/chosen": -307.475341796875, + "logps/rejected": -607.591552734375, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3647339344024658, + "rewards/margins": 4.694517612457275, + "rewards/rejected": -3.3297836780548096, + "step": 1444 + }, + { + "epoch": 1.84, + "learning_rate": 1.6070247430351347e-09, + "logits/chosen": -3.2093873023986816, + "logits/rejected": -2.9986085891723633, + "logps/chosen": -291.2041320800781, + "logps/rejected": -325.9241943359375, + "loss": 0.3161, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6324234008789062, + "rewards/margins": 2.714648485183716, + "rewards/rejected": -1.0822250843048096, + "step": 1445 + }, + { + "epoch": 1.84, + "learning_rate": 1.5811385233935548e-09, + "logits/chosen": -3.2225401401519775, + "logits/rejected": -3.136192798614502, + "logps/chosen": -348.7550354003906, + "logps/rejected": -427.10699462890625, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2457878589630127, + "rewards/margins": 3.388932228088379, + "rewards/rejected": -2.143144130706787, + "step": 1446 + }, + { + "epoch": 1.84, + "learning_rate": 1.5554591401604811e-09, + "logits/chosen": -3.1258206367492676, + "logits/rejected": -3.132155656814575, + "logps/chosen": -269.3127136230469, + "logps/rejected": -768.6680297851562, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.723017930984497, + "rewards/margins": 6.183659553527832, + "rewards/rejected": -4.460641860961914, + "step": 1447 + }, + { + "epoch": 1.85, + "learning_rate": 1.5299867030334812e-09, + "logits/chosen": -3.2575550079345703, + "logits/rejected": -3.0605483055114746, + "logps/chosen": -292.0943298339844, + "logps/rejected": -671.97021484375, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3587455749511719, + "rewards/margins": 5.4292826652526855, + "rewards/rejected": -4.0705366134643555, + "step": 1448 + }, + { + "epoch": 1.85, + "learning_rate": 1.5047213208260913e-09, + "logits/chosen": -3.263901710510254, + "logits/rejected": -3.123321771621704, + "logps/chosen": -246.34661865234375, + "logps/rejected": -815.3726806640625, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0806610584259033, + "rewards/margins": 4.90240478515625, + "rewards/rejected": -3.821743965148926, + "step": 1449 + }, + { + "epoch": 1.85, + "learning_rate": 1.4796631014673322e-09, + "logits/chosen": -3.1429126262664795, + "logits/rejected": -3.103240728378296, + "logps/chosen": -257.8070068359375, + "logps/rejected": -717.6773681640625, + "loss": 0.2982, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3409790992736816, + "rewards/margins": 5.725189208984375, + "rewards/rejected": -4.384210109710693, + "step": 1450 + }, + { + "epoch": 1.85, + "learning_rate": 1.4548121520012946e-09, + "logits/chosen": -3.2341208457946777, + "logits/rejected": -3.1174557209014893, + "logps/chosen": -237.44866943359375, + "logps/rejected": -605.70263671875, + "loss": 0.2682, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.302098035812378, + "rewards/margins": 4.203831672668457, + "rewards/rejected": -2.9017333984375, + "step": 1451 + }, + { + "epoch": 1.85, + "learning_rate": 1.4301685785866214e-09, + "logits/chosen": -3.227786064147949, + "logits/rejected": -3.113485336303711, + "logps/chosen": -287.26678466796875, + "logps/rejected": -664.9212036132812, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1532516479492188, + "rewards/margins": 4.4546098709106445, + "rewards/rejected": -3.301358222961426, + "step": 1452 + }, + { + "epoch": 1.85, + "learning_rate": 1.4057324864960972e-09, + "logits/chosen": -3.171782970428467, + "logits/rejected": -3.1155595779418945, + "logps/chosen": -258.12689208984375, + "logps/rejected": -928.9437866210938, + "loss": 0.283, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2510429620742798, + "rewards/margins": 5.707024574279785, + "rewards/rejected": -4.455981254577637, + "step": 1453 + }, + { + "epoch": 1.85, + "learning_rate": 1.3815039801161721e-09, + "logits/chosen": -3.246108055114746, + "logits/rejected": -3.1621437072753906, + "logps/chosen": -293.9975280761719, + "logps/rejected": -661.4432373046875, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4483063220977783, + "rewards/margins": 5.390084743499756, + "rewards/rejected": -3.9417786598205566, + "step": 1454 + }, + { + "epoch": 1.85, + "learning_rate": 1.3574831629465488e-09, + "logits/chosen": -3.169877767562866, + "logits/rejected": -3.1730692386627197, + "logps/chosen": -282.85833740234375, + "logps/rejected": -619.2151489257812, + "loss": 0.2699, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3041412830352783, + "rewards/margins": 4.753195285797119, + "rewards/rejected": -3.449054002761841, + "step": 1455 + }, + { + "epoch": 1.86, + "learning_rate": 1.3336701375997127e-09, + "logits/chosen": -3.151163339614868, + "logits/rejected": -3.064894199371338, + "logps/chosen": -257.00238037109375, + "logps/rejected": -714.5089111328125, + "loss": 0.2773, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3615753650665283, + "rewards/margins": 5.218115329742432, + "rewards/rejected": -3.8565402030944824, + "step": 1456 + }, + { + "epoch": 1.86, + "learning_rate": 1.3100650058004869e-09, + "logits/chosen": -3.306147575378418, + "logits/rejected": -3.1988768577575684, + "logps/chosen": -289.9561767578125, + "logps/rejected": -470.1412658691406, + "loss": 0.3037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3544731140136719, + "rewards/margins": 3.7560906410217285, + "rewards/rejected": -2.4016175270080566, + "step": 1457 + }, + { + "epoch": 1.86, + "learning_rate": 1.2866678683856268e-09, + "logits/chosen": -3.2692837715148926, + "logits/rejected": -3.043654441833496, + "logps/chosen": -267.9700012207031, + "logps/rejected": -522.3203735351562, + "loss": 0.2966, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3573105335235596, + "rewards/margins": 3.761898994445801, + "rewards/rejected": -2.404588222503662, + "step": 1458 + }, + { + "epoch": 1.86, + "learning_rate": 1.2634788253033713e-09, + "logits/chosen": -3.2490310668945312, + "logits/rejected": -3.0900049209594727, + "logps/chosen": -259.40740966796875, + "logps/rejected": -572.4739379882812, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.235476016998291, + "rewards/margins": 4.259771347045898, + "rewards/rejected": -3.0242953300476074, + "step": 1459 + }, + { + "epoch": 1.86, + "learning_rate": 1.240497975613014e-09, + "logits/chosen": -3.224440574645996, + "logits/rejected": -3.0232715606689453, + "logps/chosen": -273.1068115234375, + "logps/rejected": -388.5733642578125, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4114547967910767, + "rewards/margins": 3.3924057483673096, + "rewards/rejected": -1.980950951576233, + "step": 1460 + }, + { + "epoch": 1.86, + "learning_rate": 1.2177254174844943e-09, + "logits/chosen": -3.240663766860962, + "logits/rejected": -3.1087052822113037, + "logps/chosen": -261.20587158203125, + "logps/rejected": -460.9732666015625, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3569245338439941, + "rewards/margins": 4.081010818481445, + "rewards/rejected": -2.724086046218872, + "step": 1461 + }, + { + "epoch": 1.86, + "learning_rate": 1.1951612481979567e-09, + "logits/chosen": -3.2653651237487793, + "logits/rejected": -3.1225991249084473, + "logps/chosen": -275.46722412109375, + "logps/rejected": -1016.906494140625, + "loss": 0.2806, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.309483289718628, + "rewards/margins": 6.058497428894043, + "rewards/rejected": -4.749014377593994, + "step": 1462 + }, + { + "epoch": 1.86, + "learning_rate": 1.1728055641433587e-09, + "logits/chosen": -3.2891135215759277, + "logits/rejected": -3.1876237392425537, + "logps/chosen": -269.1441650390625, + "logps/rejected": -858.0169677734375, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3635170459747314, + "rewards/margins": 6.781623363494873, + "rewards/rejected": -5.4181060791015625, + "step": 1463 + }, + { + "epoch": 1.87, + "learning_rate": 1.1506584608200365e-09, + "logits/chosen": -3.306908369064331, + "logits/rejected": -3.1614131927490234, + "logps/chosen": -312.1925354003906, + "logps/rejected": -522.6229858398438, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.547088623046875, + "rewards/margins": 4.038487434387207, + "rewards/rejected": -2.491398811340332, + "step": 1464 + }, + { + "epoch": 1.87, + "learning_rate": 1.1287200328363222e-09, + "logits/chosen": -3.3267464637756348, + "logits/rejected": -3.1933059692382812, + "logps/chosen": -276.4043273925781, + "logps/rejected": -432.2123718261719, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4550186395645142, + "rewards/margins": 4.0329179763793945, + "rewards/rejected": -2.577899217605591, + "step": 1465 + }, + { + "epoch": 1.87, + "learning_rate": 1.1069903739091002e-09, + "logits/chosen": -3.2563672065734863, + "logits/rejected": -3.1326920986175537, + "logps/chosen": -301.53131103515625, + "logps/rejected": -1375.703125, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0182983875274658, + "rewards/margins": 7.7199249267578125, + "rewards/rejected": -6.701626777648926, + "step": 1466 + }, + { + "epoch": 1.87, + "learning_rate": 1.085469576863468e-09, + "logits/chosen": -3.257808208465576, + "logits/rejected": -3.0435309410095215, + "logps/chosen": -288.62188720703125, + "logps/rejected": -642.81640625, + "loss": 0.2977, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.45928955078125, + "rewards/margins": 4.768719673156738, + "rewards/rejected": -3.309429883956909, + "step": 1467 + }, + { + "epoch": 1.87, + "learning_rate": 1.064157733632276e-09, + "logits/chosen": -3.22269344329834, + "logits/rejected": -3.131711483001709, + "logps/chosen": -272.4757080078125, + "logps/rejected": -408.91180419921875, + "loss": 0.3055, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1597352027893066, + "rewards/margins": 3.234788656234741, + "rewards/rejected": -2.0750534534454346, + "step": 1468 + }, + { + "epoch": 1.87, + "learning_rate": 1.0430549352557717e-09, + "logits/chosen": -3.3028006553649902, + "logits/rejected": -3.163534641265869, + "logps/chosen": -287.18890380859375, + "logps/rejected": -550.47216796875, + "loss": 0.2886, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3771209716796875, + "rewards/margins": 4.084012031555176, + "rewards/rejected": -2.706890821456909, + "step": 1469 + }, + { + "epoch": 1.87, + "learning_rate": 1.0221612718812e-09, + "logits/chosen": -3.245055675506592, + "logits/rejected": -3.112717866897583, + "logps/chosen": -280.5664978027344, + "logps/rejected": -590.515625, + "loss": 0.3021, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.005724310874939, + "rewards/margins": 4.805507659912109, + "rewards/rejected": -3.79978346824646, + "step": 1470 + }, + { + "epoch": 1.88, + "learning_rate": 1.0014768327624212e-09, + "logits/chosen": -3.2095515727996826, + "logits/rejected": -3.133162498474121, + "logps/chosen": -254.222900390625, + "logps/rejected": -1223.3411865234375, + "loss": 0.2983, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3604530096054077, + "rewards/margins": 7.7434492111206055, + "rewards/rejected": -6.38299560546875, + "step": 1471 + }, + { + "epoch": 1.88, + "learning_rate": 9.81001706259532e-10, + "logits/chosen": -3.2925877571105957, + "logits/rejected": -3.1214842796325684, + "logps/chosen": -248.54470825195312, + "logps/rejected": -414.8829345703125, + "loss": 0.3088, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3649742603302002, + "rewards/margins": 3.1838555335998535, + "rewards/rejected": -1.8188812732696533, + "step": 1472 + }, + { + "epoch": 1.88, + "learning_rate": 9.607359798384785e-10, + "logits/chosen": -3.273256301879883, + "logits/rejected": -3.1201605796813965, + "logps/chosen": -267.02777099609375, + "logps/rejected": -782.1982421875, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5436394214630127, + "rewards/margins": 5.089206218719482, + "rewards/rejected": -3.5455667972564697, + "step": 1473 + }, + { + "epoch": 1.88, + "learning_rate": 9.40679740070688e-10, + "logits/chosen": -3.2066397666931152, + "logits/rejected": -3.0857105255126953, + "logps/chosen": -295.2703857421875, + "logps/rejected": -460.83984375, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.189184546470642, + "rewards/margins": 3.932995557785034, + "rewards/rejected": -2.7438111305236816, + "step": 1474 + }, + { + "epoch": 1.88, + "learning_rate": 9.208330726327041e-10, + "logits/chosen": -3.285616636276245, + "logits/rejected": -3.169185161590576, + "logps/chosen": -226.23362731933594, + "logps/rejected": -729.1372680664062, + "loss": 0.2751, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4029731750488281, + "rewards/margins": 4.348140716552734, + "rewards/rejected": -2.9451677799224854, + "step": 1475 + }, + { + "epoch": 1.88, + "learning_rate": 9.011960623058201e-10, + "logits/chosen": -3.174180507659912, + "logits/rejected": -3.175609588623047, + "logps/chosen": -293.297607421875, + "logps/rejected": -997.8772583007812, + "loss": 0.2771, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3637443780899048, + "rewards/margins": 7.197112083435059, + "rewards/rejected": -5.833367824554443, + "step": 1476 + }, + { + "epoch": 1.88, + "learning_rate": 8.817687929756901e-10, + "logits/chosen": -3.213315963745117, + "logits/rejected": -3.031466484069824, + "logps/chosen": -260.760498046875, + "logps/rejected": -771.358642578125, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2656081914901733, + "rewards/margins": 4.835914611816406, + "rewards/rejected": -3.5703063011169434, + "step": 1477 + }, + { + "epoch": 1.88, + "learning_rate": 8.625513476320289e-10, + "logits/chosen": -3.2441563606262207, + "logits/rejected": -3.1307058334350586, + "logps/chosen": -310.7197265625, + "logps/rejected": -772.855712890625, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1569290161132812, + "rewards/margins": 4.59707498550415, + "rewards/rejected": -3.440145969390869, + "step": 1478 + }, + { + "epoch": 1.89, + "learning_rate": 8.435438083681967e-10, + "logits/chosen": -3.273026466369629, + "logits/rejected": -3.2245876789093018, + "logps/chosen": -264.2095947265625, + "logps/rejected": -4269.95849609375, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2779603004455566, + "rewards/margins": 11.433551788330078, + "rewards/rejected": -10.155591011047363, + "step": 1479 + }, + { + "epoch": 1.89, + "learning_rate": 8.247462563808816e-10, + "logits/chosen": -3.2835304737091064, + "logits/rejected": -3.176999568939209, + "logps/chosen": -239.96575927734375, + "logps/rejected": -702.1493530273438, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.081305742263794, + "rewards/margins": 4.481969833374023, + "rewards/rejected": -3.4006638526916504, + "step": 1480 + }, + { + "epoch": 1.89, + "learning_rate": 8.061587719697394e-10, + "logits/chosen": -3.230445384979248, + "logits/rejected": -3.2224936485290527, + "logps/chosen": -252.43115234375, + "logps/rejected": -807.7454833984375, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2012542486190796, + "rewards/margins": 5.856622695922852, + "rewards/rejected": -4.655368328094482, + "step": 1481 + }, + { + "epoch": 1.89, + "learning_rate": 7.877814345370715e-10, + "logits/chosen": -3.2019550800323486, + "logits/rejected": -3.115664482116699, + "logps/chosen": -271.3164978027344, + "logps/rejected": -475.0960693359375, + "loss": 0.2974, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4217689037322998, + "rewards/margins": 3.90987491607666, + "rewards/rejected": -2.4881057739257812, + "step": 1482 + }, + { + "epoch": 1.89, + "learning_rate": 7.696143225874474e-10, + "logits/chosen": -3.281118631362915, + "logits/rejected": -3.1058349609375, + "logps/chosen": -275.08123779296875, + "logps/rejected": -602.0118408203125, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1890366077423096, + "rewards/margins": 4.165245056152344, + "rewards/rejected": -2.976208448410034, + "step": 1483 + }, + { + "epoch": 1.89, + "learning_rate": 7.516575137274162e-10, + "logits/chosen": -3.193390130996704, + "logits/rejected": -3.1442184448242188, + "logps/chosen": -262.07769775390625, + "logps/rejected": -659.8839111328125, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3583741188049316, + "rewards/margins": 5.172647476196289, + "rewards/rejected": -3.8142733573913574, + "step": 1484 + }, + { + "epoch": 1.89, + "learning_rate": 7.33911084665123e-10, + "logits/chosen": -3.29404354095459, + "logits/rejected": -3.1614108085632324, + "logps/chosen": -237.7342529296875, + "logps/rejected": -1373.92431640625, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2108818292617798, + "rewards/margins": 7.105361461639404, + "rewards/rejected": -5.894479751586914, + "step": 1485 + }, + { + "epoch": 1.89, + "learning_rate": 7.163751112100435e-10, + "logits/chosen": -3.2300124168395996, + "logits/rejected": -3.195866584777832, + "logps/chosen": -259.9776611328125, + "logps/rejected": -733.5213012695312, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1120620965957642, + "rewards/margins": 4.6836957931518555, + "rewards/rejected": -3.571633815765381, + "step": 1486 + }, + { + "epoch": 1.9, + "learning_rate": 6.990496682726055e-10, + "logits/chosen": -3.2481608390808105, + "logits/rejected": -3.1489813327789307, + "logps/chosen": -215.7327880859375, + "logps/rejected": -505.95965576171875, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.279760718345642, + "rewards/margins": 3.6651201248168945, + "rewards/rejected": -2.385359287261963, + "step": 1487 + }, + { + "epoch": 1.9, + "learning_rate": 6.819348298638839e-10, + "logits/chosen": -3.266307830810547, + "logits/rejected": -3.125911235809326, + "logps/chosen": -251.40164184570312, + "logps/rejected": -277.8345031738281, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2163323163986206, + "rewards/margins": 2.8474268913269043, + "rewards/rejected": -1.6310944557189941, + "step": 1488 + }, + { + "epoch": 1.9, + "learning_rate": 6.650306690953011e-10, + "logits/chosen": -3.2776899337768555, + "logits/rejected": -3.171602964401245, + "logps/chosen": -272.2469787597656, + "logps/rejected": -712.0775146484375, + "loss": 0.2695, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.461646318435669, + "rewards/margins": 5.276363372802734, + "rewards/rejected": -3.8147172927856445, + "step": 1489 + }, + { + "epoch": 1.9, + "learning_rate": 6.483372581783054e-10, + "logits/chosen": -3.2495625019073486, + "logits/rejected": -3.122180461883545, + "logps/chosen": -279.76312255859375, + "logps/rejected": -652.1004638671875, + "loss": 0.2699, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.389216661453247, + "rewards/margins": 4.352717876434326, + "rewards/rejected": -2.9635009765625, + "step": 1490 + }, + { + "epoch": 1.9, + "learning_rate": 6.318546684240533e-10, + "logits/chosen": -3.2336230278015137, + "logits/rejected": -3.074410915374756, + "logps/chosen": -246.0321044921875, + "logps/rejected": -659.5819091796875, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5047180652618408, + "rewards/margins": 5.440917015075684, + "rewards/rejected": -3.9361987113952637, + "step": 1491 + }, + { + "epoch": 1.9, + "learning_rate": 6.15582970243117e-10, + "logits/chosen": -3.277926445007324, + "logits/rejected": -3.1878745555877686, + "logps/chosen": -231.85301208496094, + "logps/rejected": -568.7720336914062, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3549095392227173, + "rewards/margins": 4.80277156829834, + "rewards/rejected": -3.447862148284912, + "step": 1492 + }, + { + "epoch": 1.9, + "learning_rate": 5.995222331451721e-10, + "logits/chosen": -3.258850574493408, + "logits/rejected": -3.1609745025634766, + "logps/chosen": -273.5921325683594, + "logps/rejected": -882.23095703125, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.464015245437622, + "rewards/margins": 6.695750713348389, + "rewards/rejected": -5.2317352294921875, + "step": 1493 + }, + { + "epoch": 1.9, + "learning_rate": 5.83672525738721e-10, + "logits/chosen": -3.2735109329223633, + "logits/rejected": -3.0922603607177734, + "logps/chosen": -279.01959228515625, + "logps/rejected": -701.4261474609375, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3487564325332642, + "rewards/margins": 4.618139743804932, + "rewards/rejected": -3.269383192062378, + "step": 1494 + }, + { + "epoch": 1.91, + "learning_rate": 5.680339157307756e-10, + "logits/chosen": -3.150282144546509, + "logits/rejected": -3.091416358947754, + "logps/chosen": -247.68251037597656, + "logps/rejected": -319.34405517578125, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2775070667266846, + "rewards/margins": 2.840550422668457, + "rewards/rejected": -1.563043236732483, + "step": 1495 + }, + { + "epoch": 1.91, + "learning_rate": 5.526064699265753e-10, + "logits/chosen": -3.2654519081115723, + "logits/rejected": -3.139537811279297, + "logps/chosen": -238.53802490234375, + "logps/rejected": -1033.5472412109375, + "loss": 0.2798, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1800849437713623, + "rewards/margins": 6.489789962768555, + "rewards/rejected": -5.309704780578613, + "step": 1496 + }, + { + "epoch": 1.91, + "learning_rate": 5.373902542293196e-10, + "logits/chosen": -3.277993679046631, + "logits/rejected": -3.1882998943328857, + "logps/chosen": -233.9102325439453, + "logps/rejected": -440.10894775390625, + "loss": 0.2672, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.297701358795166, + "rewards/margins": 3.8993752002716064, + "rewards/rejected": -2.6016738414764404, + "step": 1497 + }, + { + "epoch": 1.91, + "learning_rate": 5.223853336398632e-10, + "logits/chosen": -3.247002124786377, + "logits/rejected": -3.13321590423584, + "logps/chosen": -254.7838897705078, + "logps/rejected": -417.480712890625, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4762436151504517, + "rewards/margins": 3.9129250049591064, + "rewards/rejected": -2.4366812705993652, + "step": 1498 + }, + { + "epoch": 1.91, + "learning_rate": 5.075917722564382e-10, + "logits/chosen": -3.290731191635132, + "logits/rejected": -3.1554617881774902, + "logps/chosen": -237.10879516601562, + "logps/rejected": -628.3169555664062, + "loss": 0.2703, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3222930431365967, + "rewards/margins": 5.059412002563477, + "rewards/rejected": -3.737118721008301, + "step": 1499 + }, + { + "epoch": 1.91, + "learning_rate": 4.930096332744105e-10, + "logits/chosen": -3.2770490646362305, + "logits/rejected": -3.206366539001465, + "logps/chosen": -270.19915771484375, + "logps/rejected": -413.931640625, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3194358348846436, + "rewards/margins": 3.4273338317871094, + "rewards/rejected": -2.107897996902466, + "step": 1500 + }, + { + "epoch": 1.91, + "learning_rate": 4.786389789859735e-10, + "logits/chosen": -3.3393681049346924, + "logits/rejected": -3.1255109310150146, + "logps/chosen": -279.94049072265625, + "logps/rejected": -925.2492065429688, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.215785264968872, + "rewards/margins": 6.299223899841309, + "rewards/rejected": -5.083438396453857, + "step": 1501 + }, + { + "epoch": 1.91, + "learning_rate": 4.644798707798936e-10, + "logits/chosen": -3.2521591186523438, + "logits/rejected": -3.1385209560394287, + "logps/chosen": -272.7848815917969, + "logps/rejected": -630.48779296875, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1395546197891235, + "rewards/margins": 5.052338600158691, + "rewards/rejected": -3.9127840995788574, + "step": 1502 + }, + { + "epoch": 1.92, + "learning_rate": 4.5053236914127103e-10, + "logits/chosen": -3.283534526824951, + "logits/rejected": -3.0803160667419434, + "logps/chosen": -233.64683532714844, + "logps/rejected": -821.84912109375, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3672211170196533, + "rewards/margins": 6.142068386077881, + "rewards/rejected": -4.774847507476807, + "step": 1503 + }, + { + "epoch": 1.92, + "learning_rate": 4.3679653365124024e-10, + "logits/chosen": -3.206284523010254, + "logits/rejected": -3.1062753200531006, + "logps/chosen": -269.978271484375, + "logps/rejected": -402.3857421875, + "loss": 0.2875, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4988830089569092, + "rewards/margins": 3.1681411266326904, + "rewards/rejected": -1.6692581176757812, + "step": 1504 + }, + { + "epoch": 1.92, + "learning_rate": 4.2327242298674793e-10, + "logits/chosen": -3.2399215698242188, + "logits/rejected": -3.049748420715332, + "logps/chosen": -241.04649353027344, + "logps/rejected": -467.991455078125, + "loss": 0.2962, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2614402770996094, + "rewards/margins": 3.6669838428497314, + "rewards/rejected": -2.405543565750122, + "step": 1505 + }, + { + "epoch": 1.92, + "learning_rate": 4.0996009492029195e-10, + "logits/chosen": -3.2405142784118652, + "logits/rejected": -3.0694093704223633, + "logps/chosen": -254.30300903320312, + "logps/rejected": -745.37060546875, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.246490478515625, + "rewards/margins": 5.572241306304932, + "rewards/rejected": -4.325750827789307, + "step": 1506 + }, + { + "epoch": 1.92, + "learning_rate": 3.9685960631967166e-10, + "logits/chosen": -3.254805564880371, + "logits/rejected": -3.1446638107299805, + "logps/chosen": -272.103759765625, + "logps/rejected": -405.4335021972656, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3584396839141846, + "rewards/margins": 3.7758119106292725, + "rewards/rejected": -2.417372226715088, + "step": 1507 + }, + { + "epoch": 1.92, + "learning_rate": 3.8397101314774914e-10, + "logits/chosen": -3.1799559593200684, + "logits/rejected": -3.0854756832122803, + "logps/chosen": -292.8008728027344, + "logps/rejected": -746.0073852539062, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2807540893554688, + "rewards/margins": 4.692268371582031, + "rewards/rejected": -3.4115142822265625, + "step": 1508 + }, + { + "epoch": 1.92, + "learning_rate": 3.7129437046220515e-10, + "logits/chosen": -3.255784273147583, + "logits/rejected": -3.1485495567321777, + "logps/chosen": -213.79222106933594, + "logps/rejected": -720.5537109375, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3113312721252441, + "rewards/margins": 6.037973403930664, + "rewards/rejected": -4.72664213180542, + "step": 1509 + }, + { + "epoch": 1.92, + "learning_rate": 3.588297324153056e-10, + "logits/chosen": -3.2227020263671875, + "logits/rejected": -3.135313034057617, + "logps/chosen": -257.2558288574219, + "logps/rejected": -892.5264892578125, + "loss": 0.276, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2557350397109985, + "rewards/margins": 6.253512382507324, + "rewards/rejected": -4.997776985168457, + "step": 1510 + }, + { + "epoch": 1.93, + "learning_rate": 3.4657715225368535e-10, + "logits/chosen": -3.2615904808044434, + "logits/rejected": -3.0458617210388184, + "logps/chosen": -257.61529541015625, + "logps/rejected": -457.31317138671875, + "loss": 0.2859, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3809959888458252, + "rewards/margins": 4.110378265380859, + "rewards/rejected": -2.7293825149536133, + "step": 1511 + }, + { + "epoch": 1.93, + "learning_rate": 3.345366823180928e-10, + "logits/chosen": -3.245112180709839, + "logits/rejected": -3.156461477279663, + "logps/chosen": -235.0436553955078, + "logps/rejected": -887.9248046875, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.433007001876831, + "rewards/margins": 5.794261932373047, + "rewards/rejected": -4.361254692077637, + "step": 1512 + }, + { + "epoch": 1.93, + "learning_rate": 3.2270837404318464e-10, + "logits/chosen": -3.195859670639038, + "logits/rejected": -3.1135082244873047, + "logps/chosen": -266.4774475097656, + "logps/rejected": -423.54864501953125, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3852570056915283, + "rewards/margins": 3.4820146560668945, + "rewards/rejected": -2.096757411956787, + "step": 1513 + }, + { + "epoch": 1.93, + "learning_rate": 3.110922779573033e-10, + "logits/chosen": -3.2432055473327637, + "logits/rejected": -3.1643991470336914, + "logps/chosen": -249.60765075683594, + "logps/rejected": -571.7303466796875, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2519714832305908, + "rewards/margins": 5.031569004058838, + "rewards/rejected": -3.779597520828247, + "step": 1514 + }, + { + "epoch": 1.93, + "learning_rate": 2.9968844368225556e-10, + "logits/chosen": -3.2498860359191895, + "logits/rejected": -3.102879047393799, + "logps/chosen": -260.5882568359375, + "logps/rejected": -527.501220703125, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.270991563796997, + "rewards/margins": 3.991816997528076, + "rewards/rejected": -2.7208251953125, + "step": 1515 + }, + { + "epoch": 1.93, + "learning_rate": 2.8849691993311776e-10, + "logits/chosen": -3.2370548248291016, + "logits/rejected": -3.20145583152771, + "logps/chosen": -283.37396240234375, + "logps/rejected": -601.753173828125, + "loss": 0.2802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5258262157440186, + "rewards/margins": 4.652446746826172, + "rewards/rejected": -3.1266205310821533, + "step": 1516 + }, + { + "epoch": 1.93, + "learning_rate": 2.7751775451800296e-10, + "logits/chosen": -3.2738170623779297, + "logits/rejected": -3.0701332092285156, + "logps/chosen": -250.86160278320312, + "logps/rejected": -813.8865966796875, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3483169078826904, + "rewards/margins": 5.806677341461182, + "rewards/rejected": -4.45836067199707, + "step": 1517 + }, + { + "epoch": 1.93, + "learning_rate": 2.667509943378721e-10, + "logits/chosen": -3.258025884628296, + "logits/rejected": -3.1708931922912598, + "logps/chosen": -266.04986572265625, + "logps/rejected": -625.3038940429688, + "loss": 0.2909, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2410645484924316, + "rewards/margins": 4.3033599853515625, + "rewards/rejected": -3.062295436859131, + "step": 1518 + }, + { + "epoch": 1.94, + "learning_rate": 2.561966853863284e-10, + "logits/chosen": -3.268192768096924, + "logits/rejected": -3.2031564712524414, + "logps/chosen": -274.5616455078125, + "logps/rejected": -444.8482666015625, + "loss": 0.2996, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1114165782928467, + "rewards/margins": 3.6727890968322754, + "rewards/rejected": -2.5613722801208496, + "step": 1519 + }, + { + "epoch": 1.94, + "learning_rate": 2.4585487274942915e-10, + "logits/chosen": -3.239515781402588, + "logits/rejected": -3.1250081062316895, + "logps/chosen": -267.32635498046875, + "logps/rejected": -470.1201171875, + "loss": 0.3054, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.283464789390564, + "rewards/margins": 4.166750431060791, + "rewards/rejected": -2.8832855224609375, + "step": 1520 + }, + { + "epoch": 1.94, + "learning_rate": 2.3572560060547973e-10, + "logits/chosen": -3.246915817260742, + "logits/rejected": -3.1208791732788086, + "logps/chosen": -272.6876525878906, + "logps/rejected": -446.4232177734375, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2766700983047485, + "rewards/margins": 3.8291375637054443, + "rewards/rejected": -2.5524673461914062, + "step": 1521 + }, + { + "epoch": 1.94, + "learning_rate": 2.2580891222485632e-10, + "logits/chosen": -3.2786123752593994, + "logits/rejected": -3.1436069011688232, + "logps/chosen": -250.5052490234375, + "logps/rejected": -488.5313720703125, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.277130126953125, + "rewards/margins": 3.905705451965332, + "rewards/rejected": -2.628575325012207, + "step": 1522 + }, + { + "epoch": 1.94, + "learning_rate": 2.1610484996981148e-10, + "logits/chosen": -3.3154988288879395, + "logits/rejected": -3.129624366760254, + "logps/chosen": -242.22659301757812, + "logps/rejected": -551.5438232421875, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4245589971542358, + "rewards/margins": 4.1177263259887695, + "rewards/rejected": -2.693167209625244, + "step": 1523 + }, + { + "epoch": 1.94, + "learning_rate": 2.0661345529430774e-10, + "logits/chosen": -3.2937684059143066, + "logits/rejected": -3.186664342880249, + "logps/chosen": -277.8470458984375, + "logps/rejected": -955.5345458984375, + "loss": 0.2857, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2847000360488892, + "rewards/margins": 6.994633674621582, + "rewards/rejected": -5.709933280944824, + "step": 1524 + }, + { + "epoch": 1.94, + "learning_rate": 1.973347687438176e-10, + "logits/chosen": -3.220999240875244, + "logits/rejected": -3.1128149032592773, + "logps/chosen": -309.35784912109375, + "logps/rejected": -1282.5916748046875, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.38360595703125, + "rewards/margins": 7.709301948547363, + "rewards/rejected": -6.325695991516113, + "step": 1525 + }, + { + "epoch": 1.95, + "learning_rate": 1.8826882995517934e-10, + "logits/chosen": -3.3314101696014404, + "logits/rejected": -3.0740156173706055, + "logps/chosen": -262.03875732421875, + "logps/rejected": -1094.6875, + "loss": 0.2735, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2781829833984375, + "rewards/margins": 5.684329032897949, + "rewards/rejected": -4.40614652633667, + "step": 1526 + }, + { + "epoch": 1.95, + "learning_rate": 1.794156776563971e-10, + "logits/chosen": -3.1656992435455322, + "logits/rejected": -3.063523292541504, + "logps/chosen": -282.28570556640625, + "logps/rejected": -692.889404296875, + "loss": 0.3104, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4817535877227783, + "rewards/margins": 5.164968967437744, + "rewards/rejected": -3.683215618133545, + "step": 1527 + }, + { + "epoch": 1.95, + "learning_rate": 1.7077534966650763e-10, + "logits/chosen": -3.2668261528015137, + "logits/rejected": -3.1791257858276367, + "logps/chosen": -233.8459014892578, + "logps/rejected": -708.3490600585938, + "loss": 0.2643, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5787436962127686, + "rewards/margins": 5.170929908752441, + "rewards/rejected": -3.592186212539673, + "step": 1528 + }, + { + "epoch": 1.95, + "learning_rate": 1.6234788289538059e-10, + "logits/chosen": -3.2529547214508057, + "logits/rejected": -3.116119384765625, + "logps/chosen": -241.41094970703125, + "logps/rejected": -3350.662353515625, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3846282958984375, + "rewards/margins": 11.071786880493164, + "rewards/rejected": -9.687158584594727, + "step": 1529 + }, + { + "epoch": 1.95, + "learning_rate": 1.541333133436018e-10, + "logits/chosen": -3.2685885429382324, + "logits/rejected": -3.2413876056671143, + "logps/chosen": -280.92376708984375, + "logps/rejected": -848.5652465820312, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2723067998886108, + "rewards/margins": 6.00114631652832, + "rewards/rejected": -4.72883939743042, + "step": 1530 + }, + { + "epoch": 1.95, + "learning_rate": 1.461316761022846e-10, + "logits/chosen": -3.278655529022217, + "logits/rejected": -3.1604344844818115, + "logps/chosen": -240.32749938964844, + "logps/rejected": -494.8128662109375, + "loss": 0.2765, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6086288690567017, + "rewards/margins": 4.058799743652344, + "rewards/rejected": -2.4501709938049316, + "step": 1531 + }, + { + "epoch": 1.95, + "learning_rate": 1.3834300535294218e-10, + "logits/chosen": -3.2151436805725098, + "logits/rejected": -3.122138500213623, + "logps/chosen": -276.89007568359375, + "logps/rejected": -609.3504028320312, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.22125244140625, + "rewards/margins": 3.737506151199341, + "rewards/rejected": -2.516253709793091, + "step": 1532 + }, + { + "epoch": 1.95, + "learning_rate": 1.307673343673432e-10, + "logits/chosen": -3.273322582244873, + "logits/rejected": -3.1282715797424316, + "logps/chosen": -242.2506561279297, + "logps/rejected": -1020.3701171875, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0929031372070312, + "rewards/margins": 6.0725908279418945, + "rewards/rejected": -4.979687690734863, + "step": 1533 + }, + { + "epoch": 1.96, + "learning_rate": 1.2340469550733423e-10, + "logits/chosen": -3.2710890769958496, + "logits/rejected": -3.084029197692871, + "logps/chosen": -263.35626220703125, + "logps/rejected": -1067.330078125, + "loss": 0.3095, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4478546380996704, + "rewards/margins": 6.855725288391113, + "rewards/rejected": -5.407870292663574, + "step": 1534 + }, + { + "epoch": 1.96, + "learning_rate": 1.1625512022476191e-10, + "logits/chosen": -3.2286293506622314, + "logits/rejected": -3.0363454818725586, + "logps/chosen": -264.1512756347656, + "logps/rejected": -808.44091796875, + "loss": 0.2956, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.421014428138733, + "rewards/margins": 5.812524795532227, + "rewards/rejected": -4.391510009765625, + "step": 1535 + }, + { + "epoch": 1.96, + "learning_rate": 1.0931863906127325e-10, + "logits/chosen": -3.2097954750061035, + "logits/rejected": -3.150928020477295, + "logps/chosen": -288.9313659667969, + "logps/rejected": -769.9844970703125, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.614903211593628, + "rewards/margins": 5.8943376541137695, + "rewards/rejected": -4.2794342041015625, + "step": 1536 + }, + { + "epoch": 1.96, + "learning_rate": 1.0259528164823783e-10, + "logits/chosen": -3.2487454414367676, + "logits/rejected": -3.13883376121521, + "logps/chosen": -322.75439453125, + "logps/rejected": -396.4626159667969, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3935775756835938, + "rewards/margins": 3.3344955444335938, + "rewards/rejected": -1.94091796875, + "step": 1537 + }, + { + "epoch": 1.96, + "learning_rate": 9.608507670659238e-11, + "logits/chosen": -3.216054916381836, + "logits/rejected": -3.124455690383911, + "logps/chosen": -250.83843994140625, + "logps/rejected": -571.4317626953125, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2445487976074219, + "rewards/margins": 4.3027472496032715, + "rewards/rejected": -3.0581984519958496, + "step": 1538 + }, + { + "epoch": 1.96, + "learning_rate": 8.978805204672979e-11, + "logits/chosen": -3.2219629287719727, + "logits/rejected": -3.1119449138641357, + "logps/chosen": -261.81060791015625, + "logps/rejected": -690.43994140625, + "loss": 0.2979, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3895034790039062, + "rewards/margins": 5.571159362792969, + "rewards/rejected": -4.1816558837890625, + "step": 1539 + }, + { + "epoch": 1.96, + "learning_rate": 8.370423456837139e-11, + "logits/chosen": -3.167104959487915, + "logits/rejected": -3.123950481414795, + "logps/chosen": -222.54324340820312, + "logps/rejected": -361.7930603027344, + "loss": 0.2696, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2366294860839844, + "rewards/margins": 3.2351326942443848, + "rewards/rejected": -1.9985032081604004, + "step": 1540 + }, + { + "epoch": 1.96, + "learning_rate": 7.783365026045597e-11, + "logits/chosen": -3.2144665718078613, + "logits/rejected": -3.1623775959014893, + "logps/chosen": -263.54864501953125, + "logps/rejected": -902.395263671875, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1331467628479004, + "rewards/margins": 6.466050624847412, + "rewards/rejected": -5.332903861999512, + "step": 1541 + }, + { + "epoch": 1.97, + "learning_rate": 7.217632420102871e-11, + "logits/chosen": -3.2067506313323975, + "logits/rejected": -3.096717357635498, + "logps/chosen": -278.14398193359375, + "logps/rejected": -618.0510864257812, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3116607666015625, + "rewards/margins": 4.579000949859619, + "rewards/rejected": -3.2673401832580566, + "step": 1542 + }, + { + "epoch": 1.97, + "learning_rate": 6.673228055715241e-11, + "logits/chosen": -3.2103984355926514, + "logits/rejected": -3.1518092155456543, + "logps/chosen": -239.66400146484375, + "logps/rejected": -444.3773193359375, + "loss": 0.2834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3428733348846436, + "rewards/margins": 3.709279775619507, + "rewards/rejected": -2.3664064407348633, + "step": 1543 + }, + { + "epoch": 1.97, + "learning_rate": 6.150154258476314e-11, + "logits/chosen": -3.2532639503479004, + "logits/rejected": -3.1835403442382812, + "logps/chosen": -264.2086486816406, + "logps/rejected": -749.2546997070312, + "loss": 0.2898, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4475960731506348, + "rewards/margins": 5.42907190322876, + "rewards/rejected": -3.981475830078125, + "step": 1544 + }, + { + "epoch": 1.97, + "learning_rate": 5.648413262860363e-11, + "logits/chosen": -3.2854108810424805, + "logits/rejected": -3.242312431335449, + "logps/chosen": -281.41748046875, + "logps/rejected": -767.4666137695312, + "loss": 0.2855, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3096039295196533, + "rewards/margins": 6.104907512664795, + "rewards/rejected": -4.7953033447265625, + "step": 1545 + }, + { + "epoch": 1.97, + "learning_rate": 5.168007212212333e-11, + "logits/chosen": -3.1873064041137695, + "logits/rejected": -3.0998215675354004, + "logps/chosen": -250.500732421875, + "logps/rejected": -716.4097900390625, + "loss": 0.2922, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1716827154159546, + "rewards/margins": 4.742953300476074, + "rewards/rejected": -3.5712709426879883, + "step": 1546 + }, + { + "epoch": 1.97, + "learning_rate": 4.708938158737852e-11, + "logits/chosen": -3.2042441368103027, + "logits/rejected": -3.090395212173462, + "logps/chosen": -254.81967163085938, + "logps/rejected": -553.2528076171875, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.241784691810608, + "rewards/margins": 3.913987874984741, + "rewards/rejected": -2.6722030639648438, + "step": 1547 + }, + { + "epoch": 1.97, + "learning_rate": 4.271208063494902e-11, + "logits/chosen": -3.1876871585845947, + "logits/rejected": -3.089053153991699, + "logps/chosen": -258.9316711425781, + "logps/rejected": -1330.8033447265625, + "loss": 0.2803, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3861031532287598, + "rewards/margins": 7.959812164306641, + "rewards/rejected": -6.573709011077881, + "step": 1548 + }, + { + "epoch": 1.97, + "learning_rate": 3.854818796385495e-11, + "logits/chosen": -3.2338919639587402, + "logits/rejected": -3.038193702697754, + "logps/chosen": -279.88031005859375, + "logps/rejected": -1144.54931640625, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3902785778045654, + "rewards/margins": 6.274269104003906, + "rewards/rejected": -4.88399076461792, + "step": 1549 + }, + { + "epoch": 1.98, + "learning_rate": 3.459772136146788e-11, + "logits/chosen": -3.3080310821533203, + "logits/rejected": -3.1713428497314453, + "logps/chosen": -234.09063720703125, + "logps/rejected": -1585.86572265625, + "loss": 0.2879, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4896018505096436, + "rewards/margins": 9.87358283996582, + "rewards/rejected": -8.383981704711914, + "step": 1550 + }, + { + "epoch": 1.98, + "learning_rate": 3.0860697703460894e-11, + "logits/chosen": -3.2577168941497803, + "logits/rejected": -3.278022289276123, + "logps/chosen": -249.36984252929688, + "logps/rejected": -905.3717651367188, + "loss": 0.2733, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.343540906906128, + "rewards/margins": 6.598826885223389, + "rewards/rejected": -5.255285739898682, + "step": 1551 + }, + { + "epoch": 1.98, + "learning_rate": 2.733713295369755e-11, + "logits/chosen": -3.2907485961914062, + "logits/rejected": -3.1987013816833496, + "logps/chosen": -265.5960693359375, + "logps/rejected": -1133.901611328125, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2362244129180908, + "rewards/margins": 7.471527099609375, + "rewards/rejected": -6.235302925109863, + "step": 1552 + }, + { + "epoch": 1.98, + "learning_rate": 2.4027042164198597e-11, + "logits/chosen": -3.217435836791992, + "logits/rejected": -3.1749424934387207, + "logps/chosen": -260.8542785644531, + "logps/rejected": -691.30517578125, + "loss": 0.2805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3889031410217285, + "rewards/margins": 5.639379501342773, + "rewards/rejected": -4.250475883483887, + "step": 1553 + }, + { + "epoch": 1.98, + "learning_rate": 2.093043947505868e-11, + "logits/chosen": -3.2911078929901123, + "logits/rejected": -3.1203866004943848, + "logps/chosen": -270.6746520996094, + "logps/rejected": -534.666748046875, + "loss": 0.2829, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.273956298828125, + "rewards/margins": 4.010583400726318, + "rewards/rejected": -2.7366271018981934, + "step": 1554 + }, + { + "epoch": 1.98, + "learning_rate": 1.80473381143853e-11, + "logits/chosen": -3.2723960876464844, + "logits/rejected": -3.204866409301758, + "logps/chosen": -259.61962890625, + "logps/rejected": -468.594970703125, + "loss": 0.2789, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1158615350723267, + "rewards/margins": 3.654318332672119, + "rewards/rejected": -2.538456916809082, + "step": 1555 + }, + { + "epoch": 1.98, + "learning_rate": 1.53777503982655e-11, + "logits/chosen": -3.2055201530456543, + "logits/rejected": -3.1811156272888184, + "logps/chosen": -270.93914794921875, + "logps/rejected": -813.3565673828125, + "loss": 0.2677, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1786636114120483, + "rewards/margins": 6.624018669128418, + "rewards/rejected": -5.445355415344238, + "step": 1556 + }, + { + "epoch": 1.98, + "learning_rate": 1.2921687730671481e-11, + "logits/chosen": -3.247751235961914, + "logits/rejected": -3.159031867980957, + "logps/chosen": -230.68780517578125, + "logps/rejected": -1145.7183837890625, + "loss": 0.2615, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2488434314727783, + "rewards/margins": 6.454555034637451, + "rewards/rejected": -5.205711364746094, + "step": 1557 + }, + { + "epoch": 1.99, + "learning_rate": 1.0679160603449533e-11, + "logits/chosen": -3.2718377113342285, + "logits/rejected": -3.058849811553955, + "logps/chosen": -238.1702117919922, + "logps/rejected": -587.04541015625, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1504249572753906, + "rewards/margins": 3.877957344055176, + "rewards/rejected": -2.727532148361206, + "step": 1558 + }, + { + "epoch": 1.99, + "learning_rate": 8.65017859626449e-12, + "logits/chosen": -3.161247968673706, + "logits/rejected": -3.038239002227783, + "logps/chosen": -265.9278564453125, + "logps/rejected": -524.273193359375, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3576819896697998, + "rewards/margins": 3.562699317932129, + "rewards/rejected": -2.20501708984375, + "step": 1559 + }, + { + "epoch": 1.99, + "learning_rate": 6.834750376549792e-12, + "logits/chosen": -3.277928352355957, + "logits/rejected": -3.1692378520965576, + "logps/chosen": -284.18658447265625, + "logps/rejected": -749.90869140625, + "loss": 0.3031, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4931793212890625, + "rewards/margins": 5.769735813140869, + "rewards/rejected": -4.276556491851807, + "step": 1560 + }, + { + "epoch": 1.99, + "learning_rate": 5.232883699485269e-12, + "logits/chosen": -3.1907424926757812, + "logits/rejected": -3.0535812377929688, + "logps/chosen": -250.10009765625, + "logps/rejected": -686.5662841796875, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0566871166229248, + "rewards/margins": 4.766633033752441, + "rewards/rejected": -3.7099456787109375, + "step": 1561 + }, + { + "epoch": 1.99, + "learning_rate": 3.844585407936085e-12, + "logits/chosen": -3.3234448432922363, + "logits/rejected": -3.1618947982788086, + "logps/chosen": -263.37127685546875, + "logps/rejected": -764.1851806640625, + "loss": 0.2853, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4015305042266846, + "rewards/margins": 6.0342912673950195, + "rewards/rejected": -4.632761001586914, + "step": 1562 + }, + { + "epoch": 1.99, + "learning_rate": 2.669861432463838e-12, + "logits/chosen": -3.2575526237487793, + "logits/rejected": -3.1451010704040527, + "logps/chosen": -270.1014404296875, + "logps/rejected": -288.0342712402344, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4920120239257812, + "rewards/margins": 2.941952705383301, + "rewards/rejected": -1.44994056224823, + "step": 1563 + }, + { + "epoch": 1.99, + "learning_rate": 1.7087167912710475e-12, + "logits/chosen": -3.2748539447784424, + "logits/rejected": -3.1885275840759277, + "logps/chosen": -236.0965118408203, + "logps/rejected": -643.237548828125, + "loss": 0.2816, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2581512928009033, + "rewards/margins": 5.151315689086914, + "rewards/rejected": -3.8931641578674316, + "step": 1564 + }, + { + "epoch": 1.99, + "learning_rate": 9.611555901845036e-13, + "logits/chosen": -3.2058024406433105, + "logits/rejected": -3.230799674987793, + "logps/chosen": -258.579345703125, + "logps/rejected": -746.22802734375, + "loss": 0.2794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0986114740371704, + "rewards/margins": 6.169012546539307, + "rewards/rejected": -5.070401191711426, + "step": 1565 + }, + { + "epoch": 2.0, + "learning_rate": 4.271810226552652e-13, + "logits/chosen": -3.2594006061553955, + "logits/rejected": -3.163715362548828, + "logps/chosen": -246.6331024169922, + "logps/rejected": -659.12890625, + "loss": 0.2818, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1596648693084717, + "rewards/margins": 4.786562919616699, + "rewards/rejected": -3.6268982887268066, + "step": 1566 + }, + { + "epoch": 2.0, + "learning_rate": 1.0679536971425207e-13, + "logits/chosen": -3.2487845420837402, + "logits/rejected": -3.148239850997925, + "logps/chosen": -244.36744689941406, + "logps/rejected": -656.89111328125, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5079383850097656, + "rewards/margins": 4.51041316986084, + "rewards/rejected": -3.0024750232696533, + "step": 1567 + }, + { + "epoch": 2.0, + "learning_rate": 0.0, + "logits/chosen": -3.218925952911377, + "logits/rejected": -3.1105458736419678, + "logps/chosen": -249.16905212402344, + "logps/rejected": -787.853515625, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3143889904022217, + "rewards/margins": 6.26446533203125, + "rewards/rejected": -4.950076103210449, + "step": 1568 + }, + { + "epoch": 2.0, + "step": 1568, + "total_flos": 0.0, + "train_loss": 0.36147386069428555, + "train_runtime": 4230.9552, + "train_samples_per_second": 2.966, + "train_steps_per_second": 0.371 + } + ], + "logging_steps": 1.0, + "max_steps": 1568, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..00aa4a90c40082def1ece51fb10946feb58638c3 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "up_proj", + "v_proj", + "gate_proj", + "down_proj", + "o_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46fd032e818b08a0b6dfcb8cb29ff0585777ce05 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1f44415569e7076147c962edba6b9166c8895e5c9d81f2ef8c76787788c2d4 +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf836a6ce3d0c3ee740fab12bd4a5d84fa61999 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-Iter2_lora/trainer_state.json @@ -0,0 +1,11020 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 785, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.166666666666666e-09, + "logits/chosen": -3.223076820373535, + "logits/rejected": -3.039369583129883, + "logps/chosen": -302.1958923339844, + "logps/rejected": -658.036376953125, + "loss": 1.006, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04084014892578125, + "rewards/margins": -0.09608916938304901, + "rewards/rejected": 0.05524902418255806, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -3.2066359519958496, + "logits/rejected": -3.055814743041992, + "logps/chosen": -262.7005615234375, + "logps/rejected": -286.9210205078125, + "loss": 0.9902, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.00834808312356472, + "rewards/margins": -0.00834808312356472, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-08, + "logits/chosen": -3.198420524597168, + "logits/rejected": -3.1507694721221924, + "logps/chosen": -234.94073486328125, + "logps/rejected": -682.7703857421875, + "loss": 0.9559, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.05233611911535263, + "rewards/margins": -0.08609466254711151, + "rewards/rejected": 0.03375854715704918, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -3.197138786315918, + "logits/rejected": -3.0649666786193848, + "logps/chosen": -273.46551513671875, + "logps/rejected": -412.6242980957031, + "loss": 0.9799, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0019104003440588713, + "rewards/margins": -0.00374526996165514, + "rewards/rejected": 0.0056556700728833675, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333335e-08, + "logits/chosen": -3.1463894844055176, + "logits/rejected": -3.0807230472564697, + "logps/chosen": -252.8007354736328, + "logps/rejected": -357.8232421875, + "loss": 0.936, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00300445593893528, + "rewards/margins": -0.0014923103153705597, + "rewards/rejected": 0.0044967662543058395, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-08, + "logits/chosen": -3.232449531555176, + "logits/rejected": -3.136561393737793, + "logps/chosen": -232.880859375, + "logps/rejected": -671.6913452148438, + "loss": 0.9844, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0041252137161791325, + "rewards/margins": -0.031914521008729935, + "rewards/rejected": 0.03603973612189293, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.9166666666666666e-08, + "logits/chosen": -3.1608972549438477, + "logits/rejected": -3.0180768966674805, + "logps/chosen": -280.5039978027344, + "logps/rejected": -801.955322265625, + "loss": 0.9748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001507568173110485, + "rewards/margins": 0.06836700439453125, + "rewards/rejected": -0.06685943901538849, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -3.1237308979034424, + "logits/rejected": -3.0870022773742676, + "logps/chosen": -314.3907470703125, + "logps/rejected": -567.3743896484375, + "loss": 0.9759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03558655083179474, + "rewards/margins": 0.014642334543168545, + "rewards/rejected": 0.02094421535730362, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 3.75e-08, + "logits/chosen": -3.187898635864258, + "logits/rejected": -3.119971990585327, + "logps/chosen": -288.55377197265625, + "logps/rejected": -563.0985717773438, + "loss": 0.9888, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.012759399600327015, + "rewards/margins": 0.04354553297162056, + "rewards/rejected": -0.056304931640625, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -3.159487724304199, + "logits/rejected": -3.1265716552734375, + "logps/chosen": -283.5948181152344, + "logps/rejected": -586.64599609375, + "loss": 0.9637, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02445526421070099, + "rewards/margins": -0.03331604599952698, + "rewards/rejected": 0.008860781788825989, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.583333333333333e-08, + "logits/chosen": -3.2147293090820312, + "logits/rejected": -3.076552152633667, + "logps/chosen": -261.25048828125, + "logps/rejected": -660.6452026367188, + "loss": 0.9399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02673950232565403, + "rewards/margins": 0.11229552328586578, + "rewards/rejected": -0.0855560302734375, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 5e-08, + "logits/chosen": -3.235034465789795, + "logits/rejected": -3.055673122406006, + "logps/chosen": -239.13333129882812, + "logps/rejected": -606.3372192382812, + "loss": 0.9781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01911010779440403, + "rewards/margins": 0.08505554497241974, + "rewards/rejected": -0.06594543159008026, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 5.416666666666666e-08, + "logits/chosen": -3.165511131286621, + "logits/rejected": -3.0948314666748047, + "logps/chosen": -275.40582275390625, + "logps/rejected": -911.6729736328125, + "loss": 0.9777, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0237274169921875, + "rewards/margins": -0.05711975321173668, + "rewards/rejected": 0.03339233249425888, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -3.2220683097839355, + "logits/rejected": -3.032884120941162, + "logps/chosen": -268.0560302734375, + "logps/rejected": -441.7408142089844, + "loss": 0.9697, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0008972170762717724, + "rewards/margins": 0.00715484656393528, + "rewards/rejected": -0.00625762902200222, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 6.25e-08, + "logits/chosen": -3.1670706272125244, + "logits/rejected": -3.097599983215332, + "logps/chosen": -284.78741455078125, + "logps/rejected": -389.54638671875, + "loss": 0.9552, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05275116115808487, + "rewards/margins": 0.0012817373499274254, + "rewards/rejected": -0.05403289571404457, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -3.1468987464904785, + "logits/rejected": -3.0954442024230957, + "logps/chosen": -305.884033203125, + "logps/rejected": -569.9644775390625, + "loss": 0.9556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018280029296875, + "rewards/margins": 0.03513794019818306, + "rewards/rejected": -0.05341796949505806, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 7.083333333333334e-08, + "logits/chosen": -3.1977272033691406, + "logits/rejected": -3.13728666305542, + "logps/chosen": -235.10595703125, + "logps/rejected": -315.6377258300781, + "loss": 0.9357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0274581927806139, + "rewards/margins": 0.04109954833984375, + "rewards/rejected": -0.0685577392578125, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-08, + "logits/chosen": -3.1752021312713623, + "logits/rejected": -3.048745632171631, + "logps/chosen": -265.75537109375, + "logps/rejected": -1010.5841064453125, + "loss": 0.9564, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.043997958302497864, + "rewards/margins": -0.014098359271883965, + "rewards/rejected": -0.02989959716796875, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 7.916666666666665e-08, + "logits/chosen": -3.2247776985168457, + "logits/rejected": -3.0755369663238525, + "logps/chosen": -260.9554443359375, + "logps/rejected": -645.4840087890625, + "loss": 0.9381, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02179718017578125, + "rewards/margins": 0.006414793431758881, + "rewards/rejected": -0.02821197733283043, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -3.1258833408355713, + "logits/rejected": -3.0245723724365234, + "logps/chosen": -261.0249938964844, + "logps/rejected": -1453.7095947265625, + "loss": 0.9429, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.014951324090361595, + "rewards/margins": 0.002057647332549095, + "rewards/rejected": 0.0128936767578125, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 8.75e-08, + "logits/chosen": -3.1926276683807373, + "logits/rejected": -3.138760566711426, + "logps/chosen": -237.28909301757812, + "logps/rejected": -801.627685546875, + "loss": 0.9081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.051605224609375, + "rewards/margins": 0.09879150986671448, + "rewards/rejected": -0.04718627780675888, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -3.1973962783813477, + "logits/rejected": -3.105912685394287, + "logps/chosen": -272.87603759765625, + "logps/rejected": -637.0462036132812, + "loss": 0.9258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02359771728515625, + "rewards/margins": 0.1329086422920227, + "rewards/rejected": -0.10931091755628586, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 9.583333333333334e-08, + "logits/chosen": -3.132089614868164, + "logits/rejected": -3.0427796840667725, + "logps/chosen": -262.61834716796875, + "logps/rejected": -591.9404296875, + "loss": 0.9491, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01766204833984375, + "rewards/margins": 0.0077667236328125, + "rewards/rejected": -0.02542877197265625, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -3.182462215423584, + "logits/rejected": -3.044607162475586, + "logps/chosen": -246.0254364013672, + "logps/rejected": -1161.9532470703125, + "loss": 0.9038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02161255106329918, + "rewards/margins": 0.17081299424171448, + "rewards/rejected": -0.149200439453125, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 9.999957394092149e-08, + "logits/chosen": -3.1684608459472656, + "logits/rejected": -3.1579442024230957, + "logps/chosen": -280.45013427734375, + "logps/rejected": -749.13525390625, + "loss": 0.9297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02694854885339737, + "rewards/margins": 0.13072356581687927, + "rewards/rejected": -0.1037750244140625, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 9.9998295770947e-08, + "logits/chosen": -3.2156949043273926, + "logits/rejected": -3.105149269104004, + "logps/chosen": -255.91046142578125, + "logps/rejected": -762.811279296875, + "loss": 0.878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04498291015625, + "rewards/margins": 0.07990722358226776, + "rewards/rejected": -0.12489013373851776, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 9.999616551185958e-08, + "logits/chosen": -3.204521656036377, + "logits/rejected": -3.026042938232422, + "logps/chosen": -304.34185791015625, + "logps/rejected": -441.12579345703125, + "loss": 0.9135, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013603211380541325, + "rewards/margins": 0.20514068007469177, + "rewards/rejected": -0.19153746962547302, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 9.999318319996387e-08, + "logits/chosen": -3.1620066165924072, + "logits/rejected": -3.0978848934173584, + "logps/chosen": -290.56536865234375, + "logps/rejected": -753.123291015625, + "loss": 0.9425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07219085842370987, + "rewards/margins": 0.221781924366951, + "rewards/rejected": -0.14959105849266052, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 9.998934888608552e-08, + "logits/chosen": -3.2355000972747803, + "logits/rejected": -3.039275646209717, + "logps/chosen": -261.7984619140625, + "logps/rejected": -453.1309814453125, + "loss": 0.8457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013372039422392845, + "rewards/margins": 0.11456680297851562, + "rewards/rejected": -0.12793883681297302, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 9.998466263557031e-08, + "logits/chosen": -3.2106363773345947, + "logits/rejected": -3.075246572494507, + "logps/chosen": -281.46484375, + "logps/rejected": -578.2721557617188, + "loss": 0.898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1161399856209755, + "rewards/margins": 0.2256423979997635, + "rewards/rejected": -0.109502412378788, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 9.997912452828299e-08, + "logits/chosen": -3.095472812652588, + "logits/rejected": -3.043105125427246, + "logps/chosen": -282.76666259765625, + "logps/rejected": -656.03173828125, + "loss": 0.8791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017916107550263405, + "rewards/margins": 0.25847703218460083, + "rewards/rejected": -0.2763931155204773, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 9.9972734658606e-08, + "logits/chosen": -3.1881537437438965, + "logits/rejected": -3.005635976791382, + "logps/chosen": -245.58267211914062, + "logps/rejected": -905.2758178710938, + "loss": 0.8558, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017705537378787994, + "rewards/margins": 0.2375801056623459, + "rewards/rejected": -0.2552856504917145, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 9.996549313543787e-08, + "logits/chosen": -3.2355399131774902, + "logits/rejected": -3.118703842163086, + "logps/chosen": -264.5144348144531, + "logps/rejected": -827.73876953125, + "loss": 0.8611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06565551459789276, + "rewards/margins": 0.34573060274124146, + "rewards/rejected": -0.2800750732421875, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 9.99574000821912e-08, + "logits/chosen": -3.1365790367126465, + "logits/rejected": -3.059488296508789, + "logps/chosen": -250.96231079101562, + "logps/rejected": -419.5760498046875, + "loss": 0.8045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.058927156031131744, + "rewards/margins": 0.1508079618215561, + "rewards/rejected": -0.09188079833984375, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 9.994845563679079e-08, + "logits/chosen": -3.239062786102295, + "logits/rejected": -3.1075329780578613, + "logps/chosen": -250.197021484375, + "logps/rejected": -339.70220947265625, + "loss": 0.8563, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.060927584767341614, + "rewards/margins": 0.15291520953178406, + "rewards/rejected": -0.09198760986328125, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 9.993865995167111e-08, + "logits/chosen": -3.1799068450927734, + "logits/rejected": -3.0092544555664062, + "logps/chosen": -254.4027862548828, + "logps/rejected": -611.3113403320312, + "loss": 0.8831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00373992882668972, + "rewards/margins": 0.18208616971969604, + "rewards/rejected": -0.17834624648094177, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 9.992801319377377e-08, + "logits/chosen": -3.1221165657043457, + "logits/rejected": -3.004413604736328, + "logps/chosen": -246.70489501953125, + "logps/rejected": -1270.856689453125, + "loss": 0.7865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.039228059351444244, + "rewards/margins": 0.4970130920410156, + "rewards/rejected": -0.457785040140152, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 9.991651554454472e-08, + "logits/chosen": -3.211134910583496, + "logits/rejected": -3.0929484367370605, + "logps/chosen": -274.79241943359375, + "logps/rejected": -931.576416015625, + "loss": 0.8481, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07150574028491974, + "rewards/margins": 0.38961488008499146, + "rewards/rejected": -0.3181091547012329, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 9.990416719993104e-08, + "logits/chosen": -3.180974006652832, + "logits/rejected": -3.119962453842163, + "logps/chosen": -237.73085021972656, + "logps/rejected": -501.45233154296875, + "loss": 0.8326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03742675855755806, + "rewards/margins": 0.17905426025390625, + "rewards/rejected": -0.1416275054216385, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 9.989096837037771e-08, + "logits/chosen": -3.20758056640625, + "logits/rejected": -3.059755563735962, + "logps/chosen": -254.5674591064453, + "logps/rejected": -592.4034423828125, + "loss": 0.8129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06148376315832138, + "rewards/margins": 0.3223816156387329, + "rewards/rejected": -0.26089784502983093, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 9.987691928082398e-08, + "logits/chosen": -3.193297863006592, + "logits/rejected": -3.084339141845703, + "logps/chosen": -257.961669921875, + "logps/rejected": -525.4642944335938, + "loss": 0.8526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041063692420721054, + "rewards/margins": 0.27878037095069885, + "rewards/rejected": -0.2377166748046875, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 9.986202017069955e-08, + "logits/chosen": -3.250899076461792, + "logits/rejected": -3.1354267597198486, + "logps/chosen": -261.0430908203125, + "logps/rejected": -1150.981689453125, + "loss": 0.7878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07528305053710938, + "rewards/margins": 0.5659202337265015, + "rewards/rejected": -0.4906372129917145, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 9.984627129392044e-08, + "logits/chosen": -3.101931095123291, + "logits/rejected": -3.117337226867676, + "logps/chosen": -331.6098937988281, + "logps/rejected": -680.9990844726562, + "loss": 0.7514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11174621433019638, + "rewards/margins": 0.5761383175849915, + "rewards/rejected": -0.4643920958042145, + "step": 43 + }, + { + "epoch": 0.06, + "learning_rate": 9.982967291888473e-08, + "logits/chosen": -3.1679434776306152, + "logits/rejected": -3.083965301513672, + "logps/chosen": -256.9238586425781, + "logps/rejected": -635.49951171875, + "loss": 0.8042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.041410066187381744, + "rewards/margins": 0.4220237731933594, + "rewards/rejected": -0.3806137144565582, + "step": 44 + }, + { + "epoch": 0.06, + "learning_rate": 9.981222532846798e-08, + "logits/chosen": -3.1968750953674316, + "logits/rejected": -3.0200233459472656, + "logps/chosen": -250.82745361328125, + "logps/rejected": -895.6400146484375, + "loss": 0.7339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10132065415382385, + "rewards/margins": 0.573876142501831, + "rewards/rejected": -0.472555547952652, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 9.979392882001833e-08, + "logits/chosen": -3.1436820030212402, + "logits/rejected": -2.991604804992676, + "logps/chosen": -275.6556091308594, + "logps/rejected": -1221.651123046875, + "loss": 0.7264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12853698432445526, + "rewards/margins": 0.7954437732696533, + "rewards/rejected": -0.6669067144393921, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 9.977478370535154e-08, + "logits/chosen": -3.1898298263549805, + "logits/rejected": -3.071758270263672, + "logps/chosen": -272.7769775390625, + "logps/rejected": -1817.4176025390625, + "loss": 0.76, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1108856201171875, + "rewards/margins": 1.174383521080017, + "rewards/rejected": -1.0634980201721191, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 9.975479031074562e-08, + "logits/chosen": -3.224174976348877, + "logits/rejected": -3.0970184803009033, + "logps/chosen": -302.3545837402344, + "logps/rejected": -445.9790954589844, + "loss": 0.8426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0809783935546875, + "rewards/margins": 0.3504486083984375, + "rewards/rejected": -0.26947021484375, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 9.973394897693524e-08, + "logits/chosen": -3.184636116027832, + "logits/rejected": -3.163606643676758, + "logps/chosen": -247.13436889648438, + "logps/rejected": -411.5857849121094, + "loss": 0.7697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11240005493164062, + "rewards/margins": 0.4137512445449829, + "rewards/rejected": -0.3013511598110199, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 9.971226005910596e-08, + "logits/chosen": -3.255574941635132, + "logits/rejected": -3.06014347076416, + "logps/chosen": -234.7982177734375, + "logps/rejected": -586.374755859375, + "loss": 0.7503, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0786338821053505, + "rewards/margins": 0.4293510615825653, + "rewards/rejected": -0.3507171869277954, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 9.968972392688824e-08, + "logits/chosen": -3.1631429195404053, + "logits/rejected": -3.062849283218384, + "logps/chosen": -283.9481201171875, + "logps/rejected": -297.0478210449219, + "loss": 0.7765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01022796705365181, + "rewards/margins": 0.22313843667507172, + "rewards/rejected": -0.2129104733467102, + "step": 51 + }, + { + "epoch": 0.07, + "learning_rate": 9.9666340964351e-08, + "logits/chosen": -3.174736499786377, + "logits/rejected": -3.1030001640319824, + "logps/chosen": -262.7620544433594, + "logps/rejected": -1001.947265625, + "loss": 0.6878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1342155486345291, + "rewards/margins": 0.8449150323867798, + "rewards/rejected": -0.7106994390487671, + "step": 52 + }, + { + "epoch": 0.07, + "learning_rate": 9.964211156999518e-08, + "logits/chosen": -3.16898512840271, + "logits/rejected": -3.072556495666504, + "logps/chosen": -264.182373046875, + "logps/rejected": -510.10821533203125, + "loss": 0.776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14110489189624786, + "rewards/margins": 0.4063163697719574, + "rewards/rejected": -0.2652114927768707, + "step": 53 + }, + { + "epoch": 0.07, + "learning_rate": 9.961703615674692e-08, + "logits/chosen": -3.166496992111206, + "logits/rejected": -3.0022506713867188, + "logps/chosen": -307.13812255859375, + "logps/rejected": -641.7069702148438, + "loss": 0.7036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01009216345846653, + "rewards/margins": 0.4203948974609375, + "rewards/rejected": -0.4103027582168579, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 9.959111515195054e-08, + "logits/chosen": -3.200146198272705, + "logits/rejected": -3.0633604526519775, + "logps/chosen": -294.08294677734375, + "logps/rejected": -436.69940185546875, + "loss": 0.7705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10203857719898224, + "rewards/margins": 0.4695221185684204, + "rewards/rejected": -0.367483526468277, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 9.956434899736117e-08, + "logits/chosen": -3.245816230773926, + "logits/rejected": -3.09527587890625, + "logps/chosen": -277.6791687011719, + "logps/rejected": -673.370849609375, + "loss": 0.7477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07360687106847763, + "rewards/margins": 0.6104049682617188, + "rewards/rejected": -0.5367981195449829, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 9.953673814913738e-08, + "logits/chosen": -3.2651329040527344, + "logits/rejected": -3.122976779937744, + "logps/chosen": -270.725830078125, + "logps/rejected": -430.9628601074219, + "loss": 0.7239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08566589653491974, + "rewards/margins": 0.3602661192417145, + "rewards/rejected": -0.27460020780563354, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 9.950828307783326e-08, + "logits/chosen": -3.2646303176879883, + "logits/rejected": -3.056523323059082, + "logps/chosen": -259.66986083984375, + "logps/rejected": -303.99554443359375, + "loss": 0.7053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.182200625538826, + "rewards/margins": 0.4058746099472046, + "rewards/rejected": -0.2236739993095398, + "step": 58 + }, + { + "epoch": 0.08, + "learning_rate": 9.947898426839047e-08, + "logits/chosen": -3.1488139629364014, + "logits/rejected": -3.048156261444092, + "logps/chosen": -297.4549255371094, + "logps/rejected": -382.73272705078125, + "loss": 0.8168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13459473848342896, + "rewards/margins": 0.3947242796421051, + "rewards/rejected": -0.26012957096099854, + "step": 59 + }, + { + "epoch": 0.08, + "learning_rate": 9.944884222012994e-08, + "logits/chosen": -3.209730625152588, + "logits/rejected": -3.121743679046631, + "logps/chosen": -221.03335571289062, + "logps/rejected": -746.807861328125, + "loss": 0.7196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07928009331226349, + "rewards/margins": 0.772480845451355, + "rewards/rejected": -0.6932007074356079, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 9.941785744674342e-08, + "logits/chosen": -3.110835075378418, + "logits/rejected": -3.127659320831299, + "logps/chosen": -241.95144653320312, + "logps/rejected": -708.1168212890625, + "loss": 0.7271, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11676406860351562, + "rewards/margins": 0.7540931701660156, + "rewards/rejected": -0.6373291015625, + "step": 61 + }, + { + "epoch": 0.08, + "learning_rate": 9.938603047628467e-08, + "logits/chosen": -3.1782002449035645, + "logits/rejected": -3.0626893043518066, + "logps/chosen": -295.6790771484375, + "logps/rejected": -1098.1427001953125, + "loss": 0.6648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09869156032800674, + "rewards/margins": 1.1151282787322998, + "rewards/rejected": -1.0164368152618408, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 9.935336185116046e-08, + "logits/chosen": -3.148869037628174, + "logits/rejected": -3.022831916809082, + "logps/chosen": -275.51885986328125, + "logps/rejected": -1176.318115234375, + "loss": 0.6199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1838584989309311, + "rewards/margins": 1.0431771278381348, + "rewards/rejected": -0.8593185544013977, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 9.93198521281214e-08, + "logits/chosen": -3.2151002883911133, + "logits/rejected": -3.1133337020874023, + "logps/chosen": -279.64630126953125, + "logps/rejected": -356.24237060546875, + "loss": 0.6819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1641799956560135, + "rewards/margins": 0.5020454525947571, + "rewards/rejected": -0.3378654420375824, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 9.928550187825233e-08, + "logits/chosen": -3.17185115814209, + "logits/rejected": -3.032460927963257, + "logps/chosen": -243.22329711914062, + "logps/rejected": -388.74444580078125, + "loss": 0.7024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1630859375, + "rewards/margins": 0.4629608392715454, + "rewards/rejected": -0.2998749017715454, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 9.925031168696267e-08, + "logits/chosen": -3.231627941131592, + "logits/rejected": -3.1192760467529297, + "logps/chosen": -290.7679748535156, + "logps/rejected": -608.3499755859375, + "loss": 0.6977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05772857367992401, + "rewards/margins": 0.7135162353515625, + "rewards/rejected": -0.6557877063751221, + "step": 66 + }, + { + "epoch": 0.09, + "learning_rate": 9.921428215397648e-08, + "logits/chosen": -3.2077460289001465, + "logits/rejected": -3.0078506469726562, + "logps/chosen": -258.66021728515625, + "logps/rejected": -925.7459716796875, + "loss": 0.6097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15738067030906677, + "rewards/margins": 0.9973236322402954, + "rewards/rejected": -0.839942991733551, + "step": 67 + }, + { + "epoch": 0.09, + "learning_rate": 9.917741389332211e-08, + "logits/chosen": -3.1573476791381836, + "logits/rejected": -3.0823888778686523, + "logps/chosen": -266.9556884765625, + "logps/rejected": -785.5771484375, + "loss": 0.6304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17381897568702698, + "rewards/margins": 0.8783019781112671, + "rewards/rejected": -0.7044830322265625, + "step": 68 + }, + { + "epoch": 0.09, + "learning_rate": 9.913970753332188e-08, + "logits/chosen": -3.176018238067627, + "logits/rejected": -3.180457592010498, + "logps/chosen": -269.9354248046875, + "logps/rejected": -789.6067504882812, + "loss": 0.6435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18521729111671448, + "rewards/margins": 1.0441651344299316, + "rewards/rejected": -0.85894775390625, + "step": 69 + }, + { + "epoch": 0.09, + "learning_rate": 9.910116371658122e-08, + "logits/chosen": -3.221703052520752, + "logits/rejected": -3.0668187141418457, + "logps/chosen": -260.3766174316406, + "logps/rejected": -646.7213745117188, + "loss": 0.6049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22210311889648438, + "rewards/margins": 0.860529363155365, + "rewards/rejected": -0.6384262442588806, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 9.906178309997789e-08, + "logits/chosen": -3.191098690032959, + "logits/rejected": -3.0981838703155518, + "logps/chosen": -271.6564025878906, + "logps/rejected": -398.9933776855469, + "loss": 0.691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21063232421875, + "rewards/margins": 0.6066665649414062, + "rewards/rejected": -0.39603424072265625, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 9.902156635465066e-08, + "logits/chosen": -3.2427453994750977, + "logits/rejected": -2.9689643383026123, + "logps/chosen": -286.65283203125, + "logps/rejected": -920.5968627929688, + "loss": 0.6183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11329040676355362, + "rewards/margins": 1.1821014881134033, + "rewards/rejected": -1.068811058998108, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 9.898051416598789e-08, + "logits/chosen": -3.1425633430480957, + "logits/rejected": -3.105597496032715, + "logps/chosen": -301.1470947265625, + "logps/rejected": -733.780029296875, + "loss": 0.6182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2379196286201477, + "rewards/margins": 0.9693862795829773, + "rewards/rejected": -0.7314666509628296, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 9.893862723361588e-08, + "logits/chosen": -3.227356433868408, + "logits/rejected": -3.01485538482666, + "logps/chosen": -260.4971923828125, + "logps/rejected": -619.3699951171875, + "loss": 0.6269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.208934023976326, + "rewards/margins": 0.8035354614257812, + "rewards/rejected": -0.5946014523506165, + "step": 74 + }, + { + "epoch": 0.1, + "learning_rate": 9.889590627138698e-08, + "logits/chosen": -3.1905455589294434, + "logits/rejected": -3.0944747924804688, + "logps/chosen": -274.2098693847656, + "logps/rejected": -543.66650390625, + "loss": 0.6659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15494613349437714, + "rewards/margins": 0.8486900329589844, + "rewards/rejected": -0.6937439441680908, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 9.88523520073673e-08, + "logits/chosen": -3.1820149421691895, + "logits/rejected": -3.1203038692474365, + "logps/chosen": -274.2257995605469, + "logps/rejected": -812.680419921875, + "loss": 0.6836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1324058473110199, + "rewards/margins": 1.0611076354980469, + "rewards/rejected": -0.9287017583847046, + "step": 76 + }, + { + "epoch": 0.1, + "learning_rate": 9.880796518382447e-08, + "logits/chosen": -3.213700294494629, + "logits/rejected": -3.166416645050049, + "logps/chosen": -247.4239501953125, + "logps/rejected": -586.9210815429688, + "loss": 0.6375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1363929808139801, + "rewards/margins": 0.7331809997558594, + "rewards/rejected": -0.5967880487442017, + "step": 77 + }, + { + "epoch": 0.1, + "learning_rate": 9.876274655721479e-08, + "logits/chosen": -3.1597483158111572, + "logits/rejected": -3.1571998596191406, + "logps/chosen": -255.92245483398438, + "logps/rejected": -680.7652587890625, + "loss": 0.6202, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17129822075366974, + "rewards/margins": 1.1390212774276733, + "rewards/rejected": -0.9677230715751648, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 9.871669689817057e-08, + "logits/chosen": -3.1521248817443848, + "logits/rejected": -3.048758029937744, + "logps/chosen": -267.6583251953125, + "logps/rejected": -1230.0618896484375, + "loss": 0.5617, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22182846069335938, + "rewards/margins": 1.690727949142456, + "rewards/rejected": -1.4688994884490967, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 9.866981699148681e-08, + "logits/chosen": -3.1140246391296387, + "logits/rejected": -3.0430374145507812, + "logps/chosen": -285.7283935546875, + "logps/rejected": -796.1590576171875, + "loss": 0.6762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13706664741039276, + "rewards/margins": 1.2542023658752441, + "rewards/rejected": -1.1171356439590454, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 9.86221076361079e-08, + "logits/chosen": -3.1935176849365234, + "logits/rejected": -3.144331932067871, + "logps/chosen": -255.46847534179688, + "logps/rejected": -742.5869750976562, + "loss": 0.6424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16465148329734802, + "rewards/margins": 1.0385010242462158, + "rewards/rejected": -0.8738495111465454, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 9.857356964511398e-08, + "logits/chosen": -3.2355988025665283, + "logits/rejected": -3.0954267978668213, + "logps/chosen": -301.2239990234375, + "logps/rejected": -517.2470703125, + "loss": 0.6491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19350892305374146, + "rewards/margins": 0.6421432495117188, + "rewards/rejected": -0.4486343264579773, + "step": 82 + }, + { + "epoch": 0.11, + "learning_rate": 9.852420384570717e-08, + "logits/chosen": -3.2138376235961914, + "logits/rejected": -3.085735321044922, + "logps/chosen": -263.7776184082031, + "logps/rejected": -655.81396484375, + "loss": 0.5957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1933586150407791, + "rewards/margins": 1.0592460632324219, + "rewards/rejected": -0.865887463092804, + "step": 83 + }, + { + "epoch": 0.11, + "learning_rate": 9.847401107919729e-08, + "logits/chosen": -3.172567367553711, + "logits/rejected": -3.0761873722076416, + "logps/chosen": -299.12481689453125, + "logps/rejected": -918.1046752929688, + "loss": 0.5793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1556900143623352, + "rewards/margins": 1.1897156238555908, + "rewards/rejected": -1.0340255498886108, + "step": 84 + }, + { + "epoch": 0.11, + "learning_rate": 9.842299220098773e-08, + "logits/chosen": -3.194518566131592, + "logits/rejected": -3.045139789581299, + "logps/chosen": -280.65484619140625, + "logps/rejected": -749.5618286132812, + "loss": 0.6124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14843979477882385, + "rewards/margins": 1.059644341468811, + "rewards/rejected": -0.9112045764923096, + "step": 85 + }, + { + "epoch": 0.11, + "learning_rate": 9.837114808056073e-08, + "logits/chosen": -3.238245964050293, + "logits/rejected": -3.1594550609588623, + "logps/chosen": -278.4967041015625, + "logps/rejected": -715.8245239257812, + "loss": 0.5248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15439453721046448, + "rewards/margins": 1.1996674537658691, + "rewards/rejected": -1.0452728271484375, + "step": 86 + }, + { + "epoch": 0.11, + "learning_rate": 9.831847960146262e-08, + "logits/chosen": -3.2036333084106445, + "logits/rejected": -2.9296021461486816, + "logps/chosen": -245.8565673828125, + "logps/rejected": -1370.56298828125, + "loss": 0.4649, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.207774356007576, + "rewards/margins": 2.0242233276367188, + "rewards/rejected": -1.8164489269256592, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 9.826498766128874e-08, + "logits/chosen": -3.201167106628418, + "logits/rejected": -3.042653799057007, + "logps/chosen": -244.21070861816406, + "logps/rejected": -240.9681396484375, + "loss": 0.6504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19476166367530823, + "rewards/margins": 0.34556275606155396, + "rewards/rejected": -0.15080109238624573, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 9.821067317166818e-08, + "logits/chosen": -3.1538548469543457, + "logits/rejected": -3.1135292053222656, + "logps/chosen": -283.1948547363281, + "logps/rejected": -554.5401611328125, + "loss": 0.6566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15836791694164276, + "rewards/margins": 0.8565673828125, + "rewards/rejected": -0.6981995105743408, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 9.815553705824815e-08, + "logits/chosen": -3.2128400802612305, + "logits/rejected": -3.1956870555877686, + "logps/chosen": -317.129150390625, + "logps/rejected": -704.1732177734375, + "loss": 0.6252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22450561821460724, + "rewards/margins": 1.2861419916152954, + "rewards/rejected": -1.0616363286972046, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 9.809958026067837e-08, + "logits/chosen": -3.159946918487549, + "logits/rejected": -3.0800793170928955, + "logps/chosen": -261.44805908203125, + "logps/rejected": -509.176513671875, + "loss": 0.6315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2213791012763977, + "rewards/margins": 1.0269485712051392, + "rewards/rejected": -0.8055695295333862, + "step": 91 + }, + { + "epoch": 0.12, + "learning_rate": 9.804280373259488e-08, + "logits/chosen": -3.16618013381958, + "logits/rejected": -3.162106513977051, + "logps/chosen": -263.606689453125, + "logps/rejected": -643.2205200195312, + "loss": 0.5359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2625289857387543, + "rewards/margins": 1.205230712890625, + "rewards/rejected": -0.9427016973495483, + "step": 92 + }, + { + "epoch": 0.12, + "learning_rate": 9.798520844160388e-08, + "logits/chosen": -3.2510385513305664, + "logits/rejected": -3.1396141052246094, + "logps/chosen": -271.22406005859375, + "logps/rejected": -872.64208984375, + "loss": 0.5662, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23353728652000427, + "rewards/margins": 1.522740125656128, + "rewards/rejected": -1.2892029285430908, + "step": 93 + }, + { + "epoch": 0.12, + "learning_rate": 9.792679536926524e-08, + "logits/chosen": -3.2386436462402344, + "logits/rejected": -2.9828691482543945, + "logps/chosen": -278.4083251953125, + "logps/rejected": -1424.1182861328125, + "loss": 0.5218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2777557373046875, + "rewards/margins": 2.293881416320801, + "rewards/rejected": -2.016125440597534, + "step": 94 + }, + { + "epoch": 0.12, + "learning_rate": 9.786756551107578e-08, + "logits/chosen": -3.2125141620635986, + "logits/rejected": -3.026275396347046, + "logps/chosen": -223.73849487304688, + "logps/rejected": -1350.96630859375, + "loss": 0.4974, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22761839628219604, + "rewards/margins": 1.9398009777069092, + "rewards/rejected": -1.7121827602386475, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 9.780751987645221e-08, + "logits/chosen": -3.218113899230957, + "logits/rejected": -3.0869789123535156, + "logps/chosen": -264.7038879394531, + "logps/rejected": -757.8746337890625, + "loss": 0.6166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21227797865867615, + "rewards/margins": 1.4423835277557373, + "rewards/rejected": -1.2301056385040283, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 9.774665948871408e-08, + "logits/chosen": -3.1717796325683594, + "logits/rejected": -3.0863308906555176, + "logps/chosen": -291.37646484375, + "logps/rejected": -775.892333984375, + "loss": 0.5456, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11745910346508026, + "rewards/margins": 1.381591796875, + "rewards/rejected": -1.2641327381134033, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 9.768498538506617e-08, + "logits/chosen": -3.227421283721924, + "logits/rejected": -3.111281394958496, + "logps/chosen": -278.4837951660156, + "logps/rejected": -417.10302734375, + "loss": 0.5765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24097977578639984, + "rewards/margins": 0.8101997375488281, + "rewards/rejected": -0.5692200064659119, + "step": 98 + }, + { + "epoch": 0.13, + "learning_rate": 9.762249861658099e-08, + "logits/chosen": -3.2620139122009277, + "logits/rejected": -3.136842727661133, + "logps/chosen": -252.4404754638672, + "logps/rejected": -681.6051025390625, + "loss": 0.5566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20847778022289276, + "rewards/margins": 1.2991669178009033, + "rewards/rejected": -1.0906890630722046, + "step": 99 + }, + { + "epoch": 0.13, + "learning_rate": 9.755920024818072e-08, + "logits/chosen": -3.191105365753174, + "logits/rejected": -3.1122264862060547, + "logps/chosen": -260.6890869140625, + "logps/rejected": -637.921875, + "loss": 0.5155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2596855163574219, + "rewards/margins": 1.2754921913146973, + "rewards/rejected": -1.0158065557479858, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 9.749509135861916e-08, + "logits/chosen": -3.2023327350616455, + "logits/rejected": -3.120014190673828, + "logps/chosen": -252.36080932617188, + "logps/rejected": -430.3523254394531, + "loss": 0.512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1627601683139801, + "rewards/margins": 0.8160896301269531, + "rewards/rejected": -0.6533294916152954, + "step": 101 + }, + { + "epoch": 0.13, + "learning_rate": 9.743017304046327e-08, + "logits/chosen": -3.2270140647888184, + "logits/rejected": -3.0671257972717285, + "logps/chosen": -251.53106689453125, + "logps/rejected": -679.5632934570312, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28220444917678833, + "rewards/margins": 1.361890435218811, + "rewards/rejected": -1.0796860456466675, + "step": 102 + }, + { + "epoch": 0.13, + "learning_rate": 9.736444640007461e-08, + "logits/chosen": -3.1556220054626465, + "logits/rejected": -3.110731601715088, + "logps/chosen": -251.99371337890625, + "logps/rejected": -799.9169921875, + "loss": 0.5229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13392791152000427, + "rewards/margins": 1.678492784500122, + "rewards/rejected": -1.5445648431777954, + "step": 103 + }, + { + "epoch": 0.13, + "learning_rate": 9.729791255759045e-08, + "logits/chosen": -3.2568795680999756, + "logits/rejected": -3.08252215385437, + "logps/chosen": -260.31182861328125, + "logps/rejected": -635.7320556640625, + "loss": 0.6165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22326813638210297, + "rewards/margins": 0.9363632202148438, + "rewards/rejected": -0.7130950689315796, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 9.723057264690469e-08, + "logits/chosen": -3.238161563873291, + "logits/rejected": -3.0837156772613525, + "logps/chosen": -303.37713623046875, + "logps/rejected": -596.8211669921875, + "loss": 0.5145, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.252532958984375, + "rewards/margins": 1.1266357898712158, + "rewards/rejected": -0.874102771282196, + "step": 105 + }, + { + "epoch": 0.14, + "learning_rate": 9.716242781564854e-08, + "logits/chosen": -3.243218183517456, + "logits/rejected": -3.1227712631225586, + "logps/chosen": -266.5194091796875, + "logps/rejected": -461.1973876953125, + "loss": 0.5368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2078704833984375, + "rewards/margins": 0.9181258678436279, + "rewards/rejected": -0.7102554440498352, + "step": 106 + }, + { + "epoch": 0.14, + "learning_rate": 9.709347922517099e-08, + "logits/chosen": -3.179899215698242, + "logits/rejected": -3.0377731323242188, + "logps/chosen": -257.87701416015625, + "logps/rejected": -516.263427734375, + "loss": 0.538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2843307554721832, + "rewards/margins": 1.0232895612716675, + "rewards/rejected": -0.7389587759971619, + "step": 107 + }, + { + "epoch": 0.14, + "learning_rate": 9.702372805051891e-08, + "logits/chosen": -3.2026143074035645, + "logits/rejected": -3.0887811183929443, + "logps/chosen": -334.0399169921875, + "logps/rejected": -243.12130737304688, + "loss": 0.5725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28722381591796875, + "rewards/margins": 0.5264725089073181, + "rewards/rejected": -0.23924866318702698, + "step": 108 + }, + { + "epoch": 0.14, + "learning_rate": 9.695317548041719e-08, + "logits/chosen": -3.197269916534424, + "logits/rejected": -3.0767698287963867, + "logps/chosen": -257.7247619628906, + "logps/rejected": -439.71246337890625, + "loss": 0.5526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33890610933303833, + "rewards/margins": 1.0757637023925781, + "rewards/rejected": -0.7368576526641846, + "step": 109 + }, + { + "epoch": 0.14, + "learning_rate": 9.688182271724833e-08, + "logits/chosen": -3.2633066177368164, + "logits/rejected": -3.0828347206115723, + "logps/chosen": -250.9093475341797, + "logps/rejected": -820.91796875, + "loss": 0.4915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2983047664165497, + "rewards/margins": 1.7410110235214233, + "rewards/rejected": -1.4427063465118408, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 9.680967097703203e-08, + "logits/chosen": -3.228881359100342, + "logits/rejected": -3.078465461730957, + "logps/chosen": -275.4618835449219, + "logps/rejected": -307.90045166015625, + "loss": 0.557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31545183062553406, + "rewards/margins": 0.8337638974189758, + "rewards/rejected": -0.5183120965957642, + "step": 111 + }, + { + "epoch": 0.14, + "learning_rate": 9.673672148940445e-08, + "logits/chosen": -3.135953903198242, + "logits/rejected": -3.146289825439453, + "logps/chosen": -277.59161376953125, + "logps/rejected": -944.2274780273438, + "loss": 0.5427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36097872257232666, + "rewards/margins": 2.051034450531006, + "rewards/rejected": -1.6900558471679688, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 9.666297549759726e-08, + "logits/chosen": -3.281543731689453, + "logits/rejected": -3.11496639251709, + "logps/chosen": -280.8348388671875, + "logps/rejected": -1003.9251708984375, + "loss": 0.4928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36419984698295593, + "rewards/margins": 2.470532178878784, + "rewards/rejected": -2.106332302093506, + "step": 113 + }, + { + "epoch": 0.15, + "learning_rate": 9.65884342584164e-08, + "logits/chosen": -3.1269402503967285, + "logits/rejected": -2.990598678588867, + "logps/chosen": -293.01348876953125, + "logps/rejected": -1148.6622314453125, + "loss": 0.4581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2702987790107727, + "rewards/margins": 2.0775375366210938, + "rewards/rejected": -1.8072388172149658, + "step": 114 + }, + { + "epoch": 0.15, + "learning_rate": 9.651309904222078e-08, + "logits/chosen": -3.2423791885375977, + "logits/rejected": -3.1098580360412598, + "logps/chosen": -256.99786376953125, + "logps/rejected": -672.1112670898438, + "loss": 0.5179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2046409547328949, + "rewards/margins": 1.2454826831817627, + "rewards/rejected": -1.0408416986465454, + "step": 115 + }, + { + "epoch": 0.15, + "learning_rate": 9.643697113290051e-08, + "logits/chosen": -3.1883420944213867, + "logits/rejected": -3.0837364196777344, + "logps/chosen": -275.5028076171875, + "logps/rejected": -692.0013427734375, + "loss": 0.4904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.330953985452652, + "rewards/margins": 1.3984544277191162, + "rewards/rejected": -1.067500352859497, + "step": 116 + }, + { + "epoch": 0.15, + "learning_rate": 9.6360051827855e-08, + "logits/chosen": -3.2222557067871094, + "logits/rejected": -3.119480609893799, + "logps/chosen": -228.82159423828125, + "logps/rejected": -508.6033935546875, + "loss": 0.4504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2783950865268707, + "rewards/margins": 1.228947401046753, + "rewards/rejected": -0.9505523443222046, + "step": 117 + }, + { + "epoch": 0.15, + "learning_rate": 9.628234243797106e-08, + "logits/chosen": -3.172647476196289, + "logits/rejected": -3.0887198448181152, + "logps/chosen": -279.63812255859375, + "logps/rejected": -996.97607421875, + "loss": 0.5193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30589449405670166, + "rewards/margins": 2.0497238636016846, + "rewards/rejected": -1.743829369544983, + "step": 118 + }, + { + "epoch": 0.15, + "learning_rate": 9.620384428760029e-08, + "logits/chosen": -3.136875629425049, + "logits/rejected": -3.119985580444336, + "logps/chosen": -300.7896728515625, + "logps/rejected": -807.3178100585938, + "loss": 0.514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24753418564796448, + "rewards/margins": 1.7031844854354858, + "rewards/rejected": -1.4556503295898438, + "step": 119 + }, + { + "epoch": 0.15, + "learning_rate": 9.612455871453669e-08, + "logits/chosen": -3.2029643058776855, + "logits/rejected": -2.9941248893737793, + "logps/chosen": -246.60369873046875, + "logps/rejected": -893.73974609375, + "loss": 0.5421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22825774550437927, + "rewards/margins": 1.6941574811935425, + "rewards/rejected": -1.4658997058868408, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 9.604448706999378e-08, + "logits/chosen": -3.229733467102051, + "logits/rejected": -3.0020053386688232, + "logps/chosen": -237.27587890625, + "logps/rejected": -986.417236328125, + "loss": 0.4754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24705886840820312, + "rewards/margins": 2.1934595108032227, + "rewards/rejected": -1.94640052318573, + "step": 121 + }, + { + "epoch": 0.16, + "learning_rate": 9.59636307185816e-08, + "logits/chosen": -3.2045376300811768, + "logits/rejected": -3.180051326751709, + "logps/chosen": -312.288818359375, + "logps/rejected": -459.1131591796875, + "loss": 0.6461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18702088296413422, + "rewards/margins": 1.050201416015625, + "rewards/rejected": -0.8631805777549744, + "step": 122 + }, + { + "epoch": 0.16, + "learning_rate": 9.588199103828345e-08, + "logits/chosen": -3.2150626182556152, + "logits/rejected": -3.131837844848633, + "logps/chosen": -278.1380310058594, + "logps/rejected": -631.116455078125, + "loss": 0.5088, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24539795517921448, + "rewards/margins": 1.543701171875, + "rewards/rejected": -1.298303246498108, + "step": 123 + }, + { + "epoch": 0.16, + "learning_rate": 9.579956942043242e-08, + "logits/chosen": -3.1884818077087402, + "logits/rejected": -3.0181937217712402, + "logps/chosen": -257.4538269042969, + "logps/rejected": -835.2711181640625, + "loss": 0.4792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30469590425491333, + "rewards/margins": 2.078514814376831, + "rewards/rejected": -1.7738189697265625, + "step": 124 + }, + { + "epoch": 0.16, + "learning_rate": 9.571636726968766e-08, + "logits/chosen": -3.293717384338379, + "logits/rejected": -3.086698532104492, + "logps/chosen": -269.9416809082031, + "logps/rejected": -737.4856567382812, + "loss": 0.496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2578277587890625, + "rewards/margins": 1.6447205543518066, + "rewards/rejected": -1.3868927955627441, + "step": 125 + }, + { + "epoch": 0.16, + "learning_rate": 9.563238600401041e-08, + "logits/chosen": -3.1779212951660156, + "logits/rejected": -3.148211717605591, + "logps/chosen": -277.1113586425781, + "logps/rejected": -605.111572265625, + "loss": 0.592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3063186705112457, + "rewards/margins": 1.2802581787109375, + "rewards/rejected": -0.9739395380020142, + "step": 126 + }, + { + "epoch": 0.16, + "learning_rate": 9.554762705463992e-08, + "logits/chosen": -3.2508440017700195, + "logits/rejected": -3.09360933303833, + "logps/chosen": -268.811767578125, + "logps/rejected": -346.638671875, + "loss": 0.4755, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28869858384132385, + "rewards/margins": 0.8173393607139587, + "rewards/rejected": -0.5286407470703125, + "step": 127 + }, + { + "epoch": 0.16, + "learning_rate": 9.546209186606898e-08, + "logits/chosen": -3.2202720642089844, + "logits/rejected": -3.1030807495117188, + "logps/chosen": -312.3057861328125, + "logps/rejected": -940.1329956054688, + "loss": 0.3992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3403030335903168, + "rewards/margins": 2.332249402999878, + "rewards/rejected": -1.9919464588165283, + "step": 128 + }, + { + "epoch": 0.16, + "learning_rate": 9.537578189601933e-08, + "logits/chosen": -3.2270679473876953, + "logits/rejected": -3.138296604156494, + "logps/chosen": -238.21621704101562, + "logps/rejected": -622.474609375, + "loss": 0.4757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25400620698928833, + "rewards/margins": 1.6745476722717285, + "rewards/rejected": -1.4205414056777954, + "step": 129 + }, + { + "epoch": 0.17, + "learning_rate": 9.528869861541682e-08, + "logits/chosen": -3.1625423431396484, + "logits/rejected": -3.1049513816833496, + "logps/chosen": -248.64353942871094, + "logps/rejected": -684.012451171875, + "loss": 0.5199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21230240166187286, + "rewards/margins": 1.934638261795044, + "rewards/rejected": -1.7223358154296875, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 9.520084350836635e-08, + "logits/chosen": -3.204989194869995, + "logits/rejected": -3.2090210914611816, + "logps/chosen": -268.6278991699219, + "logps/rejected": -715.8641357421875, + "loss": 0.4564, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4009689390659332, + "rewards/margins": 1.9989091157913208, + "rewards/rejected": -1.59794020652771, + "step": 131 + }, + { + "epoch": 0.17, + "learning_rate": 9.511221807212653e-08, + "logits/chosen": -3.2362537384033203, + "logits/rejected": -3.052773952484131, + "logps/chosen": -242.73367309570312, + "logps/rejected": -420.5086975097656, + "loss": 0.4979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30641403794288635, + "rewards/margins": 1.0698295831680298, + "rewards/rejected": -0.763415515422821, + "step": 132 + }, + { + "epoch": 0.17, + "learning_rate": 9.502282381708428e-08, + "logits/chosen": -3.164997100830078, + "logits/rejected": -3.150294542312622, + "logps/chosen": -256.1204528808594, + "logps/rejected": -656.7142333984375, + "loss": 0.5006, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4610198736190796, + "rewards/margins": 2.0258195400238037, + "rewards/rejected": -1.5647995471954346, + "step": 133 + }, + { + "epoch": 0.17, + "learning_rate": 9.493266226672891e-08, + "logits/chosen": -3.181918144226074, + "logits/rejected": -3.1111059188842773, + "logps/chosen": -266.79620361328125, + "logps/rejected": -537.0488891601562, + "loss": 0.4623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2787612974643707, + "rewards/margins": 1.2865097522735596, + "rewards/rejected": -1.0077484846115112, + "step": 134 + }, + { + "epoch": 0.17, + "learning_rate": 9.484173495762633e-08, + "logits/chosen": -3.2281413078308105, + "logits/rejected": -3.046518087387085, + "logps/chosen": -243.65345764160156, + "logps/rejected": -243.04986572265625, + "loss": 0.487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33231812715530396, + "rewards/margins": 0.7207168936729431, + "rewards/rejected": -0.3883987367153168, + "step": 135 + }, + { + "epoch": 0.17, + "learning_rate": 9.475004343939275e-08, + "logits/chosen": -3.2216453552246094, + "logits/rejected": -3.049669027328491, + "logps/chosen": -239.78466796875, + "logps/rejected": -923.7952270507812, + "loss": 0.4865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3145942687988281, + "rewards/margins": 2.1393165588378906, + "rewards/rejected": -1.8247222900390625, + "step": 136 + }, + { + "epoch": 0.17, + "learning_rate": 9.465758927466832e-08, + "logits/chosen": -3.195037364959717, + "logits/rejected": -3.046823501586914, + "logps/chosen": -282.1971130371094, + "logps/rejected": -883.6561279296875, + "loss": 0.4698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3580032289028168, + "rewards/margins": 2.2876663208007812, + "rewards/rejected": -1.929663062095642, + "step": 137 + }, + { + "epoch": 0.18, + "learning_rate": 9.456437403909048e-08, + "logits/chosen": -3.2358083724975586, + "logits/rejected": -3.0674924850463867, + "logps/chosen": -258.6293029785156, + "logps/rejected": -886.3447875976562, + "loss": 0.5586, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.333343505859375, + "rewards/margins": 2.233978271484375, + "rewards/rejected": -1.900634765625, + "step": 138 + }, + { + "epoch": 0.18, + "learning_rate": 9.447039932126716e-08, + "logits/chosen": -3.246428966522217, + "logits/rejected": -3.0922842025756836, + "logps/chosen": -284.79437255859375, + "logps/rejected": -515.1822509765625, + "loss": 0.4894, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3681541681289673, + "rewards/margins": 1.409752607345581, + "rewards/rejected": -1.0415985584259033, + "step": 139 + }, + { + "epoch": 0.18, + "learning_rate": 9.437566672274959e-08, + "logits/chosen": -3.153373956680298, + "logits/rejected": -3.0502734184265137, + "logps/chosen": -251.0087890625, + "logps/rejected": -435.6750183105469, + "loss": 0.4582, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36455079913139343, + "rewards/margins": 1.2134308815002441, + "rewards/rejected": -0.8488800525665283, + "step": 140 + }, + { + "epoch": 0.18, + "learning_rate": 9.428017785800512e-08, + "logits/chosen": -3.2096242904663086, + "logits/rejected": -2.996295928955078, + "logps/chosen": -274.2810974121094, + "logps/rejected": -2548.95849609375, + "loss": 0.4285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20963898301124573, + "rewards/margins": 4.303006172180176, + "rewards/rejected": -4.093367099761963, + "step": 141 + }, + { + "epoch": 0.18, + "learning_rate": 9.41839343543897e-08, + "logits/chosen": -3.206796646118164, + "logits/rejected": -3.1113641262054443, + "logps/chosen": -278.80419921875, + "logps/rejected": -419.61761474609375, + "loss": 0.5476, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3592514097690582, + "rewards/margins": 1.0209320783615112, + "rewards/rejected": -0.6616806387901306, + "step": 142 + }, + { + "epoch": 0.18, + "learning_rate": 9.408693785212001e-08, + "logits/chosen": -3.203113079071045, + "logits/rejected": -3.0526115894317627, + "logps/chosen": -260.4986267089844, + "logps/rejected": -268.36553955078125, + "loss": 0.4843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3788261413574219, + "rewards/margins": 0.732469916343689, + "rewards/rejected": -0.3536438047885895, + "step": 143 + }, + { + "epoch": 0.18, + "learning_rate": 9.398919000424568e-08, + "logits/chosen": -3.2813944816589355, + "logits/rejected": -3.193938732147217, + "logps/chosen": -233.734375, + "logps/rejected": -334.469482421875, + "loss": 0.4834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40195387601852417, + "rewards/margins": 0.9450874328613281, + "rewards/rejected": -0.543133556842804, + "step": 144 + }, + { + "epoch": 0.18, + "learning_rate": 9.389069247662106e-08, + "logits/chosen": -3.2548928260803223, + "logits/rejected": -3.084261894226074, + "logps/chosen": -285.93951416015625, + "logps/rejected": -810.722900390625, + "loss": 0.4754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38745272159576416, + "rewards/margins": 2.1580307483673096, + "rewards/rejected": -1.7705779075622559, + "step": 145 + }, + { + "epoch": 0.19, + "learning_rate": 9.379144694787676e-08, + "logits/chosen": -3.2531628608703613, + "logits/rejected": -3.1168715953826904, + "logps/chosen": -229.6959991455078, + "logps/rejected": -852.9217529296875, + "loss": 0.4257, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3079727292060852, + "rewards/margins": 2.2054338455200195, + "rewards/rejected": -1.8974609375, + "step": 146 + }, + { + "epoch": 0.19, + "learning_rate": 9.369145510939113e-08, + "logits/chosen": -3.238938808441162, + "logits/rejected": -3.1122405529022217, + "logps/chosen": -235.4759521484375, + "logps/rejected": -608.984375, + "loss": 0.4454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3737541437149048, + "rewards/margins": 1.947023868560791, + "rewards/rejected": -1.5732696056365967, + "step": 147 + }, + { + "epoch": 0.19, + "learning_rate": 9.359071866526139e-08, + "logits/chosen": -3.1695480346679688, + "logits/rejected": -3.1162242889404297, + "logps/chosen": -266.3382568359375, + "logps/rejected": -529.27734375, + "loss": 0.4547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4390518367290497, + "rewards/margins": 1.417921543121338, + "rewards/rejected": -0.9788696765899658, + "step": 148 + }, + { + "epoch": 0.19, + "learning_rate": 9.348923933227459e-08, + "logits/chosen": -3.2500557899475098, + "logits/rejected": -3.0549232959747314, + "logps/chosen": -269.807861328125, + "logps/rejected": -418.61248779296875, + "loss": 0.4555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49683380126953125, + "rewards/margins": 1.275712490081787, + "rewards/rejected": -0.7788788080215454, + "step": 149 + }, + { + "epoch": 0.19, + "learning_rate": 9.338701883987838e-08, + "logits/chosen": -3.26505184173584, + "logits/rejected": -3.151846408843994, + "logps/chosen": -227.4384765625, + "logps/rejected": -589.3878173828125, + "loss": 0.3796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36612167954444885, + "rewards/margins": 1.7015373706817627, + "rewards/rejected": -1.3354157209396362, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 9.328405893015154e-08, + "logits/chosen": -3.184156894683838, + "logits/rejected": -3.073212146759033, + "logps/chosen": -260.486083984375, + "logps/rejected": -394.641845703125, + "loss": 0.4814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34951552748680115, + "rewards/margins": 1.1310112476348877, + "rewards/rejected": -0.7814956903457642, + "step": 151 + }, + { + "epoch": 0.19, + "learning_rate": 9.31803613577742e-08, + "logits/chosen": -3.2038612365722656, + "logits/rejected": -3.0518569946289062, + "logps/chosen": -286.0539855957031, + "logps/rejected": -298.28857421875, + "loss": 0.5069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5011001825332642, + "rewards/margins": 0.9079315662384033, + "rewards/rejected": -0.40683138370513916, + "step": 152 + }, + { + "epoch": 0.19, + "learning_rate": 9.307592788999808e-08, + "logits/chosen": -3.2326483726501465, + "logits/rejected": -3.07513689994812, + "logps/chosen": -290.81219482421875, + "logps/rejected": -529.403564453125, + "loss": 0.5193, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36326295137405396, + "rewards/margins": 1.3858932256698608, + "rewards/rejected": -1.0226303339004517, + "step": 153 + }, + { + "epoch": 0.2, + "learning_rate": 9.297076030661621e-08, + "logits/chosen": -3.2540106773376465, + "logits/rejected": -3.101107358932495, + "logps/chosen": -280.65179443359375, + "logps/rejected": -387.185546875, + "loss": 0.4826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5250053405761719, + "rewards/margins": 1.2513405084609985, + "rewards/rejected": -0.7263351678848267, + "step": 154 + }, + { + "epoch": 0.2, + "learning_rate": 9.28648603999328e-08, + "logits/chosen": -3.2072412967681885, + "logits/rejected": -3.0776214599609375, + "logps/chosen": -258.58917236328125, + "logps/rejected": -649.0457763671875, + "loss": 0.4276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31289827823638916, + "rewards/margins": 1.843763828277588, + "rewards/rejected": -1.5308655500411987, + "step": 155 + }, + { + "epoch": 0.2, + "learning_rate": 9.275822997473248e-08, + "logits/chosen": -3.222892999649048, + "logits/rejected": -3.142955780029297, + "logps/chosen": -247.41513061523438, + "logps/rejected": -911.1489868164062, + "loss": 0.4329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4041946530342102, + "rewards/margins": 2.489192247390747, + "rewards/rejected": -2.0849976539611816, + "step": 156 + }, + { + "epoch": 0.2, + "learning_rate": 9.265087084824969e-08, + "logits/chosen": -3.242703437805176, + "logits/rejected": -3.15677547454834, + "logps/chosen": -293.03118896484375, + "logps/rejected": -567.2324829101562, + "loss": 0.4258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3046371638774872, + "rewards/margins": 1.662251353263855, + "rewards/rejected": -1.3576141595840454, + "step": 157 + }, + { + "epoch": 0.2, + "learning_rate": 9.254278485013763e-08, + "logits/chosen": -3.240649461746216, + "logits/rejected": -3.1050732135772705, + "logps/chosen": -221.806884765625, + "logps/rejected": -931.3372802734375, + "loss": 0.4101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27136993408203125, + "rewards/margins": 2.8252272605895996, + "rewards/rejected": -2.5538573265075684, + "step": 158 + }, + { + "epoch": 0.2, + "learning_rate": 9.243397382243716e-08, + "logits/chosen": -3.1791062355041504, + "logits/rejected": -3.183838367462158, + "logps/chosen": -233.36570739746094, + "logps/rejected": -736.770751953125, + "loss": 0.4113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3962394595146179, + "rewards/margins": 2.432903289794922, + "rewards/rejected": -2.036663770675659, + "step": 159 + }, + { + "epoch": 0.2, + "learning_rate": 9.23244396195453e-08, + "logits/chosen": -3.232722282409668, + "logits/rejected": -3.067598819732666, + "logps/chosen": -246.45372009277344, + "logps/rejected": -643.5903930664062, + "loss": 0.4882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4458114504814148, + "rewards/margins": 1.6544692516326904, + "rewards/rejected": -1.2086578607559204, + "step": 160 + }, + { + "epoch": 0.21, + "learning_rate": 9.221418410818374e-08, + "logits/chosen": -3.150550127029419, + "logits/rejected": -3.0208749771118164, + "logps/chosen": -265.9141540527344, + "logps/rejected": -540.4219360351562, + "loss": 0.4615, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37449875473976135, + "rewards/margins": 1.628446102142334, + "rewards/rejected": -1.253947377204895, + "step": 161 + }, + { + "epoch": 0.21, + "learning_rate": 9.210320916736691e-08, + "logits/chosen": -3.2465405464172363, + "logits/rejected": -3.1358253955841064, + "logps/chosen": -272.438232421875, + "logps/rejected": -440.01904296875, + "loss": 0.4543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36916810274124146, + "rewards/margins": 1.2627227306365967, + "rewards/rejected": -0.8935546875, + "step": 162 + }, + { + "epoch": 0.21, + "learning_rate": 9.199151668837008e-08, + "logits/chosen": -3.204798698425293, + "logits/rejected": -3.1973981857299805, + "logps/chosen": -252.67169189453125, + "logps/rejected": -4108.79248046875, + "loss": 0.4217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4663528800010681, + "rewards/margins": 5.112245559692383, + "rewards/rejected": -4.64589262008667, + "step": 163 + }, + { + "epoch": 0.21, + "learning_rate": 9.187910857469702e-08, + "logits/chosen": -3.150951862335205, + "logits/rejected": -3.021536350250244, + "logps/chosen": -260.623291015625, + "logps/rejected": -694.2340698242188, + "loss": 0.4425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28003770112991333, + "rewards/margins": 2.038567543029785, + "rewards/rejected": -1.7585296630859375, + "step": 164 + }, + { + "epoch": 0.21, + "learning_rate": 9.176598674204765e-08, + "logits/chosen": -3.1878397464752197, + "logits/rejected": -3.0770130157470703, + "logps/chosen": -286.9486083984375, + "logps/rejected": -525.04296875, + "loss": 0.4168, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37277525663375854, + "rewards/margins": 1.5854873657226562, + "rewards/rejected": -1.2127121686935425, + "step": 165 + }, + { + "epoch": 0.21, + "learning_rate": 9.16521531182853e-08, + "logits/chosen": -3.1633498668670654, + "logits/rejected": -3.170494556427002, + "logps/chosen": -270.62078857421875, + "logps/rejected": -681.103515625, + "loss": 0.3738, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45832520723342896, + "rewards/margins": 2.258261203765869, + "rewards/rejected": -1.7999359369277954, + "step": 166 + }, + { + "epoch": 0.21, + "learning_rate": 9.153760964340389e-08, + "logits/chosen": -3.289674997329712, + "logits/rejected": -3.1169209480285645, + "logps/chosen": -242.41200256347656, + "logps/rejected": -984.0115966796875, + "loss": 0.4248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39962464570999146, + "rewards/margins": 2.798718214035034, + "rewards/rejected": -2.3990936279296875, + "step": 167 + }, + { + "epoch": 0.21, + "learning_rate": 9.142235826949497e-08, + "logits/chosen": -3.1916027069091797, + "logits/rejected": -3.1550545692443848, + "logps/chosen": -298.8336181640625, + "logps/rejected": -755.726318359375, + "loss": 0.4378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3725578188896179, + "rewards/margins": 2.2425498962402344, + "rewards/rejected": -1.8699921369552612, + "step": 168 + }, + { + "epoch": 0.22, + "learning_rate": 9.130640096071428e-08, + "logits/chosen": -3.1944987773895264, + "logits/rejected": -3.1028521060943604, + "logps/chosen": -245.4756622314453, + "logps/rejected": -1180.396728515625, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4078766107559204, + "rewards/margins": 3.2585511207580566, + "rewards/rejected": -2.850674629211426, + "step": 169 + }, + { + "epoch": 0.22, + "learning_rate": 9.118973969324839e-08, + "logits/chosen": -3.2128264904022217, + "logits/rejected": -2.9887537956237793, + "logps/chosen": -228.69435119628906, + "logps/rejected": -493.3900146484375, + "loss": 0.5181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3617599606513977, + "rewards/margins": 1.5102546215057373, + "rewards/rejected": -1.1484947204589844, + "step": 170 + }, + { + "epoch": 0.22, + "learning_rate": 9.107237645528099e-08, + "logits/chosen": -3.2159857749938965, + "logits/rejected": -3.1818106174468994, + "logps/chosen": -259.3443603515625, + "logps/rejected": -552.5289306640625, + "loss": 0.4559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2971740663051605, + "rewards/margins": 1.6061828136444092, + "rewards/rejected": -1.3090088367462158, + "step": 171 + }, + { + "epoch": 0.22, + "learning_rate": 9.095431324695899e-08, + "logits/chosen": -3.1985769271850586, + "logits/rejected": -3.1013665199279785, + "logps/chosen": -268.3742370605469, + "logps/rejected": -520.94091796875, + "loss": 0.3761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5236008167266846, + "rewards/margins": 1.790867567062378, + "rewards/rejected": -1.267266869544983, + "step": 172 + }, + { + "epoch": 0.22, + "learning_rate": 9.083555208035846e-08, + "logits/chosen": -3.1920535564422607, + "logits/rejected": -3.1590843200683594, + "logps/chosen": -262.38470458984375, + "logps/rejected": -516.03466796875, + "loss": 0.4097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2838706970214844, + "rewards/margins": 1.5002846717834473, + "rewards/rejected": -1.216413974761963, + "step": 173 + }, + { + "epoch": 0.22, + "learning_rate": 9.071609497945035e-08, + "logits/chosen": -3.162108898162842, + "logits/rejected": -3.130139112472534, + "logps/chosen": -288.027099609375, + "logps/rejected": -551.6715087890625, + "loss": 0.4379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5049331784248352, + "rewards/margins": 1.6779892444610596, + "rewards/rejected": -1.1730561256408691, + "step": 174 + }, + { + "epoch": 0.22, + "learning_rate": 9.059594398006592e-08, + "logits/chosen": -3.2844748497009277, + "logits/rejected": -3.1619319915771484, + "logps/chosen": -245.99496459960938, + "logps/rejected": -605.8023681640625, + "loss": 0.3683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47226792573928833, + "rewards/margins": 2.4894280433654785, + "rewards/rejected": -2.017160177230835, + "step": 175 + }, + { + "epoch": 0.22, + "learning_rate": 9.047510112986216e-08, + "logits/chosen": -3.2162508964538574, + "logits/rejected": -3.187875747680664, + "logps/chosen": -290.54388427734375, + "logps/rejected": -821.8953857421875, + "loss": 0.4489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4580551087856293, + "rewards/margins": 2.664555311203003, + "rewards/rejected": -2.206500291824341, + "step": 176 + }, + { + "epoch": 0.23, + "learning_rate": 9.035356848828679e-08, + "logits/chosen": -3.2703123092651367, + "logits/rejected": -3.1456198692321777, + "logps/chosen": -245.0301971435547, + "logps/rejected": -656.9476318359375, + "loss": 0.4385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.444113165140152, + "rewards/margins": 2.2398407459259033, + "rewards/rejected": -1.7957276105880737, + "step": 177 + }, + { + "epoch": 0.23, + "learning_rate": 9.023134812654324e-08, + "logits/chosen": -3.203815221786499, + "logits/rejected": -3.1089401245117188, + "logps/chosen": -303.5981750488281, + "logps/rejected": -389.4306640625, + "loss": 0.4585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4637817442417145, + "rewards/margins": 1.4135711193084717, + "rewards/rejected": -0.9497894048690796, + "step": 178 + }, + { + "epoch": 0.23, + "learning_rate": 9.010844212755528e-08, + "logits/chosen": -3.2496695518493652, + "logits/rejected": -3.144810438156128, + "logps/chosen": -251.60601806640625, + "logps/rejected": -732.1996459960938, + "loss": 0.383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3652504086494446, + "rewards/margins": 2.3523201942443848, + "rewards/rejected": -1.9870697259902954, + "step": 179 + }, + { + "epoch": 0.23, + "learning_rate": 8.998485258593157e-08, + "logits/chosen": -3.245692491531372, + "logits/rejected": -3.142960786819458, + "logps/chosen": -244.49813842773438, + "logps/rejected": -945.3111572265625, + "loss": 0.4385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4981674551963806, + "rewards/margins": 2.7275466918945312, + "rewards/rejected": -2.229379415512085, + "step": 180 + }, + { + "epoch": 0.23, + "learning_rate": 8.986058160792997e-08, + "logits/chosen": -3.1786603927612305, + "logits/rejected": -3.112433433532715, + "logps/chosen": -265.2529602050781, + "logps/rejected": -539.9921875, + "loss": 0.3911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3614051938056946, + "rewards/margins": 1.8699135780334473, + "rewards/rejected": -1.508508324623108, + "step": 181 + }, + { + "epoch": 0.23, + "learning_rate": 8.973563131142163e-08, + "logits/chosen": -3.2056150436401367, + "logits/rejected": -3.112265110015869, + "logps/chosen": -255.99856567382812, + "logps/rejected": -635.2487182617188, + "loss": 0.4324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4511260986328125, + "rewards/margins": 2.0510239601135254, + "rewards/rejected": -1.599897861480713, + "step": 182 + }, + { + "epoch": 0.23, + "learning_rate": 8.961000382585486e-08, + "logits/chosen": -3.2654099464416504, + "logits/rejected": -3.0412256717681885, + "logps/chosen": -256.8570556640625, + "logps/rejected": -366.1820373535156, + "loss": 0.4273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35671311616897583, + "rewards/margins": 1.2593848705291748, + "rewards/rejected": -0.9026718139648438, + "step": 183 + }, + { + "epoch": 0.23, + "learning_rate": 8.94837012922189e-08, + "logits/chosen": -3.2071781158447266, + "logits/rejected": -3.0798416137695312, + "logps/chosen": -265.418701171875, + "logps/rejected": -790.5769653320312, + "loss": 0.4509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5574508905410767, + "rewards/margins": 2.3524749279022217, + "rewards/rejected": -1.7950241565704346, + "step": 184 + }, + { + "epoch": 0.24, + "learning_rate": 8.935672586300736e-08, + "logits/chosen": -3.221679210662842, + "logits/rejected": -3.048048973083496, + "logps/chosen": -273.675048828125, + "logps/rejected": -742.5791015625, + "loss": 0.4325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3422508239746094, + "rewards/margins": 1.838285207748413, + "rewards/rejected": -1.4960342645645142, + "step": 185 + }, + { + "epoch": 0.24, + "learning_rate": 8.922907970218166e-08, + "logits/chosen": -3.122047185897827, + "logits/rejected": -3.0945184230804443, + "logps/chosen": -267.8146057128906, + "logps/rejected": -398.21923828125, + "loss": 0.4387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3821670413017273, + "rewards/margins": 1.320369005203247, + "rewards/rejected": -0.938201904296875, + "step": 186 + }, + { + "epoch": 0.24, + "learning_rate": 8.910076498513401e-08, + "logits/chosen": -3.164684772491455, + "logits/rejected": -3.0402700901031494, + "logps/chosen": -251.24176025390625, + "logps/rejected": -399.8775634765625, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5014732480049133, + "rewards/margins": 1.4305946826934814, + "rewards/rejected": -0.9291214346885681, + "step": 187 + }, + { + "epoch": 0.24, + "learning_rate": 8.897178389865041e-08, + "logits/chosen": -3.221512794494629, + "logits/rejected": -3.075716495513916, + "logps/chosen": -264.9689636230469, + "logps/rejected": -978.8206176757812, + "loss": 0.4243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4482986330986023, + "rewards/margins": 2.7807908058166504, + "rewards/rejected": -2.3324921131134033, + "step": 188 + }, + { + "epoch": 0.24, + "learning_rate": 8.884213864087338e-08, + "logits/chosen": -3.2351186275482178, + "logits/rejected": -3.1471943855285645, + "logps/chosen": -272.26690673828125, + "logps/rejected": -1331.097412109375, + "loss": 0.4156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5360015630722046, + "rewards/margins": 4.338812351226807, + "rewards/rejected": -3.8028106689453125, + "step": 189 + }, + { + "epoch": 0.24, + "learning_rate": 8.871183142126445e-08, + "logits/chosen": -3.2438161373138428, + "logits/rejected": -3.048952102661133, + "logps/chosen": -262.193603515625, + "logps/rejected": -1842.337646484375, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4557251036167145, + "rewards/margins": 5.221234321594238, + "rewards/rejected": -4.765509128570557, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 8.858086446056663e-08, + "logits/chosen": -3.238229513168335, + "logits/rejected": -2.9604640007019043, + "logps/chosen": -289.41741943359375, + "logps/rejected": -1446.9671630859375, + "loss": 0.411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3606552183628082, + "rewards/margins": 3.678056240081787, + "rewards/rejected": -3.317401170730591, + "step": 191 + }, + { + "epoch": 0.24, + "learning_rate": 8.844923999076639e-08, + "logits/chosen": -3.208956480026245, + "logits/rejected": -2.8914074897766113, + "logps/chosen": -245.79071044921875, + "logps/rejected": -1647.966064453125, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33461302518844604, + "rewards/margins": 4.411657810211182, + "rewards/rejected": -4.077044486999512, + "step": 192 + }, + { + "epoch": 0.25, + "learning_rate": 8.831696025505577e-08, + "logits/chosen": -3.1297593116760254, + "logits/rejected": -3.154938220977783, + "logps/chosen": -245.9024200439453, + "logps/rejected": -526.7095947265625, + "loss": 0.3737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6381645202636719, + "rewards/margins": 2.1688284873962402, + "rewards/rejected": -1.530664086341858, + "step": 193 + }, + { + "epoch": 0.25, + "learning_rate": 8.818402750779401e-08, + "logits/chosen": -3.2473459243774414, + "logits/rejected": -3.118410587310791, + "logps/chosen": -267.11065673828125, + "logps/rejected": -358.7467956542969, + "loss": 0.4223, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38788145780563354, + "rewards/margins": 1.34075927734375, + "rewards/rejected": -0.9528778791427612, + "step": 194 + }, + { + "epoch": 0.25, + "learning_rate": 8.805044401446931e-08, + "logits/chosen": -3.215189218521118, + "logits/rejected": -3.0604348182678223, + "logps/chosen": -240.96878051757812, + "logps/rejected": -393.77978515625, + "loss": 0.4207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.525312066078186, + "rewards/margins": 1.5510551929473877, + "rewards/rejected": -1.0257431268692017, + "step": 195 + }, + { + "epoch": 0.25, + "learning_rate": 8.791621205166007e-08, + "logits/chosen": -3.2527105808258057, + "logits/rejected": -3.1157824993133545, + "logps/chosen": -252.12977600097656, + "logps/rejected": -366.12847900390625, + "loss": 0.4075, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5418990850448608, + "rewards/margins": 1.5897064208984375, + "rewards/rejected": -1.0478073358535767, + "step": 196 + }, + { + "epoch": 0.25, + "learning_rate": 8.778133390699613e-08, + "logits/chosen": -3.232922077178955, + "logits/rejected": -3.052764892578125, + "logps/chosen": -256.86260986328125, + "logps/rejected": -545.4364624023438, + "loss": 0.4537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39188385009765625, + "rewards/margins": 1.8708617687225342, + "rewards/rejected": -1.478977918624878, + "step": 197 + }, + { + "epoch": 0.25, + "learning_rate": 8.764581187911979e-08, + "logits/chosen": -3.2099311351776123, + "logits/rejected": -3.151747465133667, + "logps/chosen": -263.8714294433594, + "logps/rejected": -967.8262939453125, + "loss": 0.4116, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6305763721466064, + "rewards/margins": 3.484067440032959, + "rewards/rejected": -2.8534913063049316, + "step": 198 + }, + { + "epoch": 0.25, + "learning_rate": 8.750964827764671e-08, + "logits/chosen": -3.1924822330474854, + "logits/rejected": -3.11008882522583, + "logps/chosen": -274.79681396484375, + "logps/rejected": -750.4725341796875, + "loss": 0.4081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5162101984024048, + "rewards/margins": 2.607116222381592, + "rewards/rejected": -2.0909059047698975, + "step": 199 + }, + { + "epoch": 0.25, + "learning_rate": 8.737284542312639e-08, + "logits/chosen": -3.2610580921173096, + "logits/rejected": -3.1566004753112793, + "logps/chosen": -254.1729736328125, + "logps/rejected": -529.12109375, + "loss": 0.4191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3852600157260895, + "rewards/margins": 1.4818146228790283, + "rewards/rejected": -1.0965545177459717, + "step": 200 + }, + { + "epoch": 0.26, + "learning_rate": 8.723540564700281e-08, + "logits/chosen": -3.2453360557556152, + "logits/rejected": -3.1342670917510986, + "logps/chosen": -248.72607421875, + "logps/rejected": -381.4747619628906, + "loss": 0.4302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4651497006416321, + "rewards/margins": 1.6355125904083252, + "rewards/rejected": -1.1703628301620483, + "step": 201 + }, + { + "epoch": 0.26, + "learning_rate": 8.709733129157448e-08, + "logits/chosen": -3.2451746463775635, + "logits/rejected": -3.1721243858337402, + "logps/chosen": -275.6437072753906, + "logps/rejected": -574.4359741210938, + "loss": 0.3995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4914947748184204, + "rewards/margins": 2.403738498687744, + "rewards/rejected": -1.9122436046600342, + "step": 202 + }, + { + "epoch": 0.26, + "learning_rate": 8.695862470995474e-08, + "logits/chosen": -3.2122802734375, + "logits/rejected": -3.14990234375, + "logps/chosen": -273.3551940917969, + "logps/rejected": -575.4923095703125, + "loss": 0.3921, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.365835577249527, + "rewards/margins": 1.7897217273712158, + "rewards/rejected": -1.4238860607147217, + "step": 203 + }, + { + "epoch": 0.26, + "learning_rate": 8.681928826603152e-08, + "logits/chosen": -3.213062286376953, + "logits/rejected": -3.1039907932281494, + "logps/chosen": -263.5570068359375, + "logps/rejected": -412.31768798828125, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5670318603515625, + "rewards/margins": 1.65496826171875, + "rewards/rejected": -1.0879364013671875, + "step": 204 + }, + { + "epoch": 0.26, + "learning_rate": 8.667932433442711e-08, + "logits/chosen": -3.217229127883911, + "logits/rejected": -3.1006598472595215, + "logps/chosen": -276.9178466796875, + "logps/rejected": -322.039794921875, + "loss": 0.4238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5659881830215454, + "rewards/margins": 1.5218209028244019, + "rewards/rejected": -0.9558327198028564, + "step": 205 + }, + { + "epoch": 0.26, + "learning_rate": 8.653873530045762e-08, + "logits/chosen": -3.1957664489746094, + "logits/rejected": -3.0080065727233887, + "logps/chosen": -278.8944091796875, + "logps/rejected": -514.1397705078125, + "loss": 0.3546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.311026006937027, + "rewards/margins": 1.4131637811660767, + "rewards/rejected": -1.102137804031372, + "step": 206 + }, + { + "epoch": 0.26, + "learning_rate": 8.639752356009246e-08, + "logits/chosen": -3.214120388031006, + "logits/rejected": -3.0537686347961426, + "logps/chosen": -228.57733154296875, + "logps/rejected": -528.9526977539062, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39709627628326416, + "rewards/margins": 1.4372010231018066, + "rewards/rejected": -1.0401047468185425, + "step": 207 + }, + { + "epoch": 0.26, + "learning_rate": 8.625569151991337e-08, + "logits/chosen": -3.335843086242676, + "logits/rejected": -3.153425693511963, + "logps/chosen": -227.745361328125, + "logps/rejected": -635.447509765625, + "loss": 0.4233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4939231872558594, + "rewards/margins": 2.126800060272217, + "rewards/rejected": -1.6328766345977783, + "step": 208 + }, + { + "epoch": 0.27, + "learning_rate": 8.611324159707348e-08, + "logits/chosen": -3.224142551422119, + "logits/rejected": -3.1399412155151367, + "logps/chosen": -274.1496887207031, + "logps/rejected": -523.1587524414062, + "loss": 0.4278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39578020572662354, + "rewards/margins": 1.7359123229980469, + "rewards/rejected": -1.340132236480713, + "step": 209 + }, + { + "epoch": 0.27, + "learning_rate": 8.597017621925612e-08, + "logits/chosen": -3.2909364700317383, + "logits/rejected": -3.1181225776672363, + "logps/chosen": -259.4304504394531, + "logps/rejected": -1269.4248046875, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4661979675292969, + "rewards/margins": 4.00806188583374, + "rewards/rejected": -3.5418639183044434, + "step": 210 + }, + { + "epoch": 0.27, + "learning_rate": 8.58264978246334e-08, + "logits/chosen": -3.1974925994873047, + "logits/rejected": -3.0771689414978027, + "logps/chosen": -254.8982391357422, + "logps/rejected": -1038.029296875, + "loss": 0.3915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40181732177734375, + "rewards/margins": 3.1639938354492188, + "rewards/rejected": -2.762176513671875, + "step": 211 + }, + { + "epoch": 0.27, + "learning_rate": 8.56822088618247e-08, + "logits/chosen": -3.1564550399780273, + "logits/rejected": -3.12095308303833, + "logps/chosen": -263.68817138671875, + "logps/rejected": -636.96484375, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.402426153421402, + "rewards/margins": 1.9496047496795654, + "rewards/rejected": -1.5471787452697754, + "step": 212 + }, + { + "epoch": 0.27, + "learning_rate": 8.553731178985492e-08, + "logits/chosen": -3.2253365516662598, + "logits/rejected": -3.0310449600219727, + "logps/chosen": -274.10064697265625, + "logps/rejected": -415.33642578125, + "loss": 0.4327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47417449951171875, + "rewards/margins": 1.4844939708709717, + "rewards/rejected": -1.0103195905685425, + "step": 213 + }, + { + "epoch": 0.27, + "learning_rate": 8.539180907811258e-08, + "logits/chosen": -3.1972174644470215, + "logits/rejected": -3.0472075939178467, + "logps/chosen": -290.5929260253906, + "logps/rejected": -362.4389343261719, + "loss": 0.4188, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.501727283000946, + "rewards/margins": 1.2940583229064941, + "rewards/rejected": -0.7923309803009033, + "step": 214 + }, + { + "epoch": 0.27, + "learning_rate": 8.524570320630774e-08, + "logits/chosen": -3.2070822715759277, + "logits/rejected": -3.0893044471740723, + "logps/chosen": -285.7363586425781, + "logps/rejected": -1026.6065673828125, + "loss": 0.3739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7164337635040283, + "rewards/margins": 3.2609314918518066, + "rewards/rejected": -2.5444977283477783, + "step": 215 + }, + { + "epoch": 0.28, + "learning_rate": 8.509899666442972e-08, + "logits/chosen": -3.183382987976074, + "logits/rejected": -3.044717788696289, + "logps/chosen": -323.7349548339844, + "logps/rejected": -308.1398010253906, + "loss": 0.4891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4000808596611023, + "rewards/margins": 0.9472000598907471, + "rewards/rejected": -0.547119140625, + "step": 216 + }, + { + "epoch": 0.28, + "learning_rate": 8.495169195270466e-08, + "logits/chosen": -3.2228262424468994, + "logits/rejected": -3.199533700942993, + "logps/chosen": -254.15798950195312, + "logps/rejected": -715.6832275390625, + "loss": 0.3699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5679199695587158, + "rewards/margins": 2.7964720726013184, + "rewards/rejected": -2.2285523414611816, + "step": 217 + }, + { + "epoch": 0.28, + "learning_rate": 8.480379158155297e-08, + "logits/chosen": -3.315688133239746, + "logits/rejected": -3.1584415435791016, + "logps/chosen": -247.25802612304688, + "logps/rejected": -603.0113525390625, + "loss": 0.4167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5365028381347656, + "rewards/margins": 2.550565242767334, + "rewards/rejected": -2.0140626430511475, + "step": 218 + }, + { + "epoch": 0.28, + "learning_rate": 8.465529807154647e-08, + "logits/chosen": -3.2543880939483643, + "logits/rejected": -3.065855026245117, + "logps/chosen": -274.950927734375, + "logps/rejected": -831.3494262695312, + "loss": 0.4277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48056793212890625, + "rewards/margins": 2.7290496826171875, + "rewards/rejected": -2.2484817504882812, + "step": 219 + }, + { + "epoch": 0.28, + "learning_rate": 8.450621395336553e-08, + "logits/chosen": -3.221987724304199, + "logits/rejected": -3.2169876098632812, + "logps/chosen": -221.04611206054688, + "logps/rejected": -3803.634033203125, + "loss": 0.3316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45991289615631104, + "rewards/margins": 3.4265875816345215, + "rewards/rejected": -2.9666748046875, + "step": 220 + }, + { + "epoch": 0.28, + "learning_rate": 8.435654176775577e-08, + "logits/chosen": -3.140847682952881, + "logits/rejected": -3.1243538856506348, + "logps/chosen": -282.6670227050781, + "logps/rejected": -841.6272583007812, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5871673822402954, + "rewards/margins": 3.467071533203125, + "rewards/rejected": -2.879904270172119, + "step": 221 + }, + { + "epoch": 0.28, + "learning_rate": 8.420628406548493e-08, + "logits/chosen": -3.2162060737609863, + "logits/rejected": -3.0779075622558594, + "logps/chosen": -284.3597412109375, + "logps/rejected": -674.0504150390625, + "loss": 0.409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5024269223213196, + "rewards/margins": 1.9014854431152344, + "rewards/rejected": -1.3990585803985596, + "step": 222 + }, + { + "epoch": 0.28, + "learning_rate": 8.405544340729938e-08, + "logits/chosen": -3.2607600688934326, + "logits/rejected": -3.1883156299591064, + "logps/chosen": -245.59454345703125, + "logps/rejected": -799.2098388671875, + "loss": 0.4001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4750511050224304, + "rewards/margins": 3.0571954250335693, + "rewards/rejected": -2.582144260406494, + "step": 223 + }, + { + "epoch": 0.29, + "learning_rate": 8.390402236388037e-08, + "logits/chosen": -3.2147483825683594, + "logits/rejected": -3.106247663497925, + "logps/chosen": -291.4506530761719, + "logps/rejected": -677.9744873046875, + "loss": 0.4058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49452972412109375, + "rewards/margins": 2.6932601928710938, + "rewards/rejected": -2.19873046875, + "step": 224 + }, + { + "epoch": 0.29, + "learning_rate": 8.37520235158003e-08, + "logits/chosen": -3.269489288330078, + "logits/rejected": -3.1293044090270996, + "logps/chosen": -262.1939697265625, + "logps/rejected": -571.1243896484375, + "loss": 0.3954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3324943780899048, + "rewards/margins": 2.041353702545166, + "rewards/rejected": -1.7088592052459717, + "step": 225 + }, + { + "epoch": 0.29, + "learning_rate": 8.359944945347878e-08, + "logits/chosen": -3.2500975131988525, + "logits/rejected": -3.02590274810791, + "logps/chosen": -209.7407684326172, + "logps/rejected": -1121.1392822265625, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49620363116264343, + "rewards/margins": 3.238086223602295, + "rewards/rejected": -2.74188232421875, + "step": 226 + }, + { + "epoch": 0.29, + "learning_rate": 8.344630277713832e-08, + "logits/chosen": -3.1528515815734863, + "logits/rejected": -3.107137680053711, + "logps/chosen": -256.5837097167969, + "logps/rejected": -613.8658447265625, + "loss": 0.3992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5288101434707642, + "rewards/margins": 2.4153685569763184, + "rewards/rejected": -1.8865585327148438, + "step": 227 + }, + { + "epoch": 0.29, + "learning_rate": 8.329258609676024e-08, + "logits/chosen": -3.2433831691741943, + "logits/rejected": -3.107501983642578, + "logps/chosen": -243.30706787109375, + "logps/rejected": -826.2371215820312, + "loss": 0.3676, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6840133666992188, + "rewards/margins": 2.912672519683838, + "rewards/rejected": -2.228659152984619, + "step": 228 + }, + { + "epoch": 0.29, + "learning_rate": 8.313830203204e-08, + "logits/chosen": -3.1806139945983887, + "logits/rejected": -3.1512041091918945, + "logps/chosen": -284.5518798828125, + "logps/rejected": -960.5046997070312, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.49681395292282104, + "rewards/margins": 3.3226869106292725, + "rewards/rejected": -2.8258728981018066, + "step": 229 + }, + { + "epoch": 0.29, + "learning_rate": 8.298345321234267e-08, + "logits/chosen": -3.249293804168701, + "logits/rejected": -3.1443405151367188, + "logps/chosen": -290.69586181640625, + "logps/rejected": -749.9720458984375, + "loss": 0.3899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5329849720001221, + "rewards/margins": 3.0351409912109375, + "rewards/rejected": -2.5021560192108154, + "step": 230 + }, + { + "epoch": 0.29, + "learning_rate": 8.28280422766581e-08, + "logits/chosen": -3.2432613372802734, + "logits/rejected": -3.0893044471740723, + "logps/chosen": -295.21197509765625, + "logps/rejected": -659.3438720703125, + "loss": 0.3972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5416610836982727, + "rewards/margins": 2.4710617065429688, + "rewards/rejected": -1.9294006824493408, + "step": 231 + }, + { + "epoch": 0.3, + "learning_rate": 8.267207187355583e-08, + "logits/chosen": -3.168903350830078, + "logits/rejected": -3.062058925628662, + "logps/chosen": -252.12315368652344, + "logps/rejected": -537.052490234375, + "loss": 0.4114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4640617370605469, + "rewards/margins": 1.8237998485565186, + "rewards/rejected": -1.3597381114959717, + "step": 232 + }, + { + "epoch": 0.3, + "learning_rate": 8.251554466114014e-08, + "logits/chosen": -3.177605152130127, + "logits/rejected": -3.0587921142578125, + "logps/chosen": -258.39056396484375, + "logps/rejected": -441.15185546875, + "loss": 0.4114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42604827880859375, + "rewards/margins": 1.7212417125701904, + "rewards/rejected": -1.2951934337615967, + "step": 233 + }, + { + "epoch": 0.3, + "learning_rate": 8.235846330700462e-08, + "logits/chosen": -3.2301077842712402, + "logits/rejected": -3.0806474685668945, + "logps/chosen": -263.22796630859375, + "logps/rejected": -510.5741271972656, + "loss": 0.4062, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5553863644599915, + "rewards/margins": 2.0903899669647217, + "rewards/rejected": -1.535003662109375, + "step": 234 + }, + { + "epoch": 0.3, + "learning_rate": 8.220083048818675e-08, + "logits/chosen": -3.219942331314087, + "logits/rejected": -3.0994324684143066, + "logps/chosen": -266.5556640625, + "logps/rejected": -499.8226013183594, + "loss": 0.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5083969235420227, + "rewards/margins": 2.07275390625, + "rewards/rejected": -1.564357042312622, + "step": 235 + }, + { + "epoch": 0.3, + "learning_rate": 8.204264889112229e-08, + "logits/chosen": -3.2407917976379395, + "logits/rejected": -3.1257076263427734, + "logps/chosen": -273.9761047363281, + "logps/rejected": -776.641357421875, + "loss": 0.4025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5828536748886108, + "rewards/margins": 2.955946445465088, + "rewards/rejected": -2.3730926513671875, + "step": 236 + }, + { + "epoch": 0.3, + "learning_rate": 8.188392121159944e-08, + "logits/chosen": -3.2542948722839355, + "logits/rejected": -3.081416130065918, + "logps/chosen": -262.3619384765625, + "logps/rejected": -459.6590576171875, + "loss": 0.3677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5266494750976562, + "rewards/margins": 1.582096815109253, + "rewards/rejected": -1.0554473400115967, + "step": 237 + }, + { + "epoch": 0.3, + "learning_rate": 8.172465015471295e-08, + "logits/chosen": -3.244428873062134, + "logits/rejected": -3.037184715270996, + "logps/chosen": -279.05389404296875, + "logps/rejected": -1787.97119140625, + "loss": 0.4218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48907774686813354, + "rewards/margins": 4.7138214111328125, + "rewards/rejected": -4.224743843078613, + "step": 238 + }, + { + "epoch": 0.3, + "learning_rate": 8.156483843481802e-08, + "logits/chosen": -3.253204345703125, + "logits/rejected": -3.089653491973877, + "logps/chosen": -269.3818664550781, + "logps/rejected": -556.8336181640625, + "loss": 0.4063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7498054504394531, + "rewards/margins": 2.088742971420288, + "rewards/rejected": -1.3389374017715454, + "step": 239 + }, + { + "epoch": 0.31, + "learning_rate": 8.140448877548401e-08, + "logits/chosen": -3.2324862480163574, + "logits/rejected": -3.1521012783050537, + "logps/chosen": -214.70062255859375, + "logps/rejected": -695.1243896484375, + "loss": 0.3698, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5366760492324829, + "rewards/margins": 3.1512374877929688, + "rewards/rejected": -2.6145615577697754, + "step": 240 + }, + { + "epoch": 0.31, + "learning_rate": 8.124360390944804e-08, + "logits/chosen": -3.2007057666778564, + "logits/rejected": -3.164072275161743, + "logps/chosen": -257.64959716796875, + "logps/rejected": -557.3220825195312, + "loss": 0.3849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38883286714553833, + "rewards/margins": 2.5503029823303223, + "rewards/rejected": -2.1614699363708496, + "step": 241 + }, + { + "epoch": 0.31, + "learning_rate": 8.108218657856846e-08, + "logits/chosen": -3.224480628967285, + "logits/rejected": -3.1385951042175293, + "logps/chosen": -279.5715026855469, + "logps/rejected": -567.0617065429688, + "loss": 0.3848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5450767278671265, + "rewards/margins": 2.146700382232666, + "rewards/rejected": -1.60162353515625, + "step": 242 + }, + { + "epoch": 0.31, + "learning_rate": 8.092023953377797e-08, + "logits/chosen": -3.251471996307373, + "logits/rejected": -3.0781350135803223, + "logps/chosen": -253.32455444335938, + "logps/rejected": -579.6173095703125, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5254433155059814, + "rewards/margins": 2.2552850246429443, + "rewards/rejected": -1.729841709136963, + "step": 243 + }, + { + "epoch": 0.31, + "learning_rate": 8.075776553503697e-08, + "logits/chosen": -3.2186317443847656, + "logits/rejected": -3.131552219390869, + "logps/chosen": -233.47195434570312, + "logps/rejected": -352.6204833984375, + "loss": 0.4178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5934768915176392, + "rewards/margins": 1.4481369256973267, + "rewards/rejected": -0.8546600341796875, + "step": 244 + }, + { + "epoch": 0.31, + "learning_rate": 8.059476735128632e-08, + "logits/chosen": -3.275721549987793, + "logits/rejected": -3.075591802597046, + "logps/chosen": -296.1845703125, + "logps/rejected": -1024.3177490234375, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6454170346260071, + "rewards/margins": 3.4556527137756348, + "rewards/rejected": -2.8102357387542725, + "step": 245 + }, + { + "epoch": 0.31, + "learning_rate": 8.04312477604003e-08, + "logits/chosen": -3.184464454650879, + "logits/rejected": -3.097918748855591, + "logps/chosen": -259.78521728515625, + "logps/rejected": -762.9404296875, + "loss": 0.3461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48611146211624146, + "rewards/margins": 2.543443202972412, + "rewards/rejected": -2.0573318004608154, + "step": 246 + }, + { + "epoch": 0.31, + "learning_rate": 8.02672095491391e-08, + "logits/chosen": -3.1391475200653076, + "logits/rejected": -3.0703985691070557, + "logps/chosen": -291.8800964355469, + "logps/rejected": -485.6929016113281, + "loss": 0.4133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4956253170967102, + "rewards/margins": 1.5496689081192017, + "rewards/rejected": -1.0540435314178467, + "step": 247 + }, + { + "epoch": 0.32, + "learning_rate": 8.010265551310152e-08, + "logits/chosen": -3.1638846397399902, + "logits/rejected": -3.071324586868286, + "logps/chosen": -276.2183837890625, + "logps/rejected": -686.8514404296875, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44435882568359375, + "rewards/margins": 2.256129264831543, + "rewards/rejected": -1.8117706775665283, + "step": 248 + }, + { + "epoch": 0.32, + "learning_rate": 7.993758845667719e-08, + "logits/chosen": -3.21122670173645, + "logits/rejected": -3.0711984634399414, + "logps/chosen": -218.97161865234375, + "logps/rejected": -403.25994873046875, + "loss": 0.3888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.609783947467804, + "rewards/margins": 1.5549911260604858, + "rewards/rejected": -0.9452072381973267, + "step": 249 + }, + { + "epoch": 0.32, + "learning_rate": 7.977201119299883e-08, + "logits/chosen": -3.221928119659424, + "logits/rejected": -3.080059766769409, + "logps/chosen": -236.09361267089844, + "logps/rejected": -342.03131103515625, + "loss": 0.4224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6145050525665283, + "rewards/margins": 1.539627194404602, + "rewards/rejected": -0.9251221418380737, + "step": 250 + }, + { + "epoch": 0.32, + "learning_rate": 7.960592654389431e-08, + "logits/chosen": -3.262247323989868, + "logits/rejected": -3.025470733642578, + "logps/chosen": -263.190185546875, + "logps/rejected": -422.36834716796875, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.61578369140625, + "rewards/margins": 1.7172653675079346, + "rewards/rejected": -1.1014816761016846, + "step": 251 + }, + { + "epoch": 0.32, + "learning_rate": 7.94393373398385e-08, + "logits/chosen": -3.2770023345947266, + "logits/rejected": -3.0569727420806885, + "logps/chosen": -297.8747253417969, + "logps/rejected": -645.6150512695312, + "loss": 0.3998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5461807250976562, + "rewards/margins": 2.81559157371521, + "rewards/rejected": -2.2694108486175537, + "step": 252 + }, + { + "epoch": 0.32, + "learning_rate": 7.927224641990514e-08, + "logits/chosen": -3.1859238147735596, + "logits/rejected": -3.036137104034424, + "logps/chosen": -256.8155822753906, + "logps/rejected": -1209.1806640625, + "loss": 0.3781, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6561431884765625, + "rewards/margins": 3.939535617828369, + "rewards/rejected": -3.2833924293518066, + "step": 253 + }, + { + "epoch": 0.32, + "learning_rate": 7.910465663171835e-08, + "logits/chosen": -3.215507984161377, + "logits/rejected": -3.0356359481811523, + "logps/chosen": -240.1996307373047, + "logps/rejected": -363.9996337890625, + "loss": 0.4501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5695655941963196, + "rewards/margins": 1.4865150451660156, + "rewards/rejected": -0.916949450969696, + "step": 254 + }, + { + "epoch": 0.32, + "learning_rate": 7.893657083140416e-08, + "logits/chosen": -3.274641990661621, + "logits/rejected": -3.1215429306030273, + "logps/chosen": -277.7816467285156, + "logps/rejected": -425.6816101074219, + "loss": 0.4064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6193298101425171, + "rewards/margins": 1.9161499738693237, + "rewards/rejected": -1.296820044517517, + "step": 255 + }, + { + "epoch": 0.33, + "learning_rate": 7.876799188354182e-08, + "logits/chosen": -3.2096450328826904, + "logits/rejected": -3.0775318145751953, + "logps/chosen": -244.80067443847656, + "logps/rejected": -636.809814453125, + "loss": 0.3995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5183876156806946, + "rewards/margins": 2.891761064529419, + "rewards/rejected": -2.373373508453369, + "step": 256 + }, + { + "epoch": 0.33, + "learning_rate": 7.859892266111498e-08, + "logits/chosen": -3.2053003311157227, + "logits/rejected": -3.001115322113037, + "logps/chosen": -237.04226684570312, + "logps/rejected": -731.2721557617188, + "loss": 0.4004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47852784395217896, + "rewards/margins": 2.520172119140625, + "rewards/rejected": -2.041644334793091, + "step": 257 + }, + { + "epoch": 0.33, + "learning_rate": 7.842936604546273e-08, + "logits/chosen": -3.225518226623535, + "logits/rejected": -3.1259026527404785, + "logps/chosen": -241.6566619873047, + "logps/rejected": -589.5785522460938, + "loss": 0.3889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6291374564170837, + "rewards/margins": 2.3948585987091064, + "rewards/rejected": -1.765721082687378, + "step": 258 + }, + { + "epoch": 0.33, + "learning_rate": 7.825932492623046e-08, + "logits/chosen": -3.2544922828674316, + "logits/rejected": -3.0280544757843018, + "logps/chosen": -278.0334167480469, + "logps/rejected": -458.2476806640625, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4883834719657898, + "rewards/margins": 1.5708435773849487, + "rewards/rejected": -1.0824600458145142, + "step": 259 + }, + { + "epoch": 0.33, + "learning_rate": 7.80888022013207e-08, + "logits/chosen": -3.1577401161193848, + "logits/rejected": -3.060087203979492, + "logps/chosen": -232.7978515625, + "logps/rejected": -436.724365234375, + "loss": 0.3762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.645843505859375, + "rewards/margins": 2.0927858352661133, + "rewards/rejected": -1.4469422101974487, + "step": 260 + }, + { + "epoch": 0.33, + "learning_rate": 7.791780077684364e-08, + "logits/chosen": -3.2412500381469727, + "logits/rejected": -3.1207923889160156, + "logps/chosen": -258.2470397949219, + "logps/rejected": -523.8555908203125, + "loss": 0.3797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5939872860908508, + "rewards/margins": 2.192192792892456, + "rewards/rejected": -1.59820556640625, + "step": 261 + }, + { + "epoch": 0.33, + "learning_rate": 7.774632356706767e-08, + "logits/chosen": -3.163024425506592, + "logits/rejected": -3.0529141426086426, + "logps/chosen": -278.3245849609375, + "logps/rejected": -570.2645874023438, + "loss": 0.3792, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.79632568359375, + "rewards/margins": 2.410810947418213, + "rewards/rejected": -1.614485263824463, + "step": 262 + }, + { + "epoch": 0.34, + "learning_rate": 7.757437349436963e-08, + "logits/chosen": -3.222926139831543, + "logits/rejected": -3.0586557388305664, + "logps/chosen": -274.50048828125, + "logps/rejected": -392.5932922363281, + "loss": 0.3877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4498702883720398, + "rewards/margins": 1.4618682861328125, + "rewards/rejected": -1.0119980573654175, + "step": 263 + }, + { + "epoch": 0.34, + "learning_rate": 7.740195348918516e-08, + "logits/chosen": -3.237031936645508, + "logits/rejected": -3.0554747581481934, + "logps/chosen": -268.8831787109375, + "logps/rejected": -1265.7962646484375, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6322219967842102, + "rewards/margins": 3.9122164249420166, + "rewards/rejected": -3.279994249343872, + "step": 264 + }, + { + "epoch": 0.34, + "learning_rate": 7.722906648995856e-08, + "logits/chosen": -3.1344361305236816, + "logits/rejected": -3.09702730178833, + "logps/chosen": -272.57763671875, + "logps/rejected": -426.663330078125, + "loss": 0.4084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5474197864532471, + "rewards/margins": 1.8268828392028809, + "rewards/rejected": -1.2794631719589233, + "step": 265 + }, + { + "epoch": 0.34, + "learning_rate": 7.705571544309284e-08, + "logits/chosen": -3.195786237716675, + "logits/rejected": -3.1570382118225098, + "logps/chosen": -287.0042419433594, + "logps/rejected": -564.2870483398438, + "loss": 0.3741, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7140312194824219, + "rewards/margins": 2.5981223583221436, + "rewards/rejected": -1.8840911388397217, + "step": 266 + }, + { + "epoch": 0.34, + "learning_rate": 7.688190330289953e-08, + "logits/chosen": -3.2305169105529785, + "logits/rejected": -3.092038631439209, + "logps/chosen": -291.22552490234375, + "logps/rejected": -415.5330810546875, + "loss": 0.3996, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5877593755722046, + "rewards/margins": 1.6569931507110596, + "rewards/rejected": -1.069233775138855, + "step": 267 + }, + { + "epoch": 0.34, + "learning_rate": 7.670763303154821e-08, + "logits/chosen": -3.169088125228882, + "logits/rejected": -3.077226161956787, + "logps/chosen": -260.267822265625, + "logps/rejected": -715.7284545898438, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7758934497833252, + "rewards/margins": 2.9713830947875977, + "rewards/rejected": -2.1954894065856934, + "step": 268 + }, + { + "epoch": 0.34, + "learning_rate": 7.653290759901616e-08, + "logits/chosen": -3.2715036869049072, + "logits/rejected": -3.0227279663085938, + "logps/chosen": -238.01698303222656, + "logps/rejected": -778.4895629882812, + "loss": 0.348, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5489578247070312, + "rewards/margins": 3.090742588043213, + "rewards/rejected": -2.5417847633361816, + "step": 269 + }, + { + "epoch": 0.34, + "learning_rate": 7.635772998303762e-08, + "logits/chosen": -3.238170862197876, + "logits/rejected": -3.1598987579345703, + "logps/chosen": -250.9261474609375, + "logps/rejected": -542.6221923828125, + "loss": 0.3411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5384262204170227, + "rewards/margins": 2.1461594104766846, + "rewards/rejected": -1.6077332496643066, + "step": 270 + }, + { + "epoch": 0.35, + "learning_rate": 7.618210316905315e-08, + "logits/chosen": -3.236649751663208, + "logits/rejected": -3.142137050628662, + "logps/chosen": -252.16390991210938, + "logps/rejected": -409.3905029296875, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4925377070903778, + "rewards/margins": 2.0762596130371094, + "rewards/rejected": -1.5837218761444092, + "step": 271 + }, + { + "epoch": 0.35, + "learning_rate": 7.60060301501587e-08, + "logits/chosen": -3.1898398399353027, + "logits/rejected": -3.083029270172119, + "logps/chosen": -267.3125, + "logps/rejected": -1402.85009765625, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5706466436386108, + "rewards/margins": 4.659287929534912, + "rewards/rejected": -4.088641166687012, + "step": 272 + }, + { + "epoch": 0.35, + "learning_rate": 7.582951392705459e-08, + "logits/chosen": -3.238384485244751, + "logits/rejected": -3.1668167114257812, + "logps/chosen": -247.49732971191406, + "logps/rejected": -760.72607421875, + "loss": 0.3298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5264488458633423, + "rewards/margins": 3.54891300201416, + "rewards/rejected": -3.0224640369415283, + "step": 273 + }, + { + "epoch": 0.35, + "learning_rate": 7.565255750799438e-08, + "logits/chosen": -3.1803929805755615, + "logits/rejected": -3.1081862449645996, + "logps/chosen": -251.3216552734375, + "logps/rejected": -429.3166809082031, + "loss": 0.3377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6661468744277954, + "rewards/margins": 1.7219040393829346, + "rewards/rejected": -1.0557571649551392, + "step": 274 + }, + { + "epoch": 0.35, + "learning_rate": 7.547516390873365e-08, + "logits/chosen": -3.1895341873168945, + "logits/rejected": -3.090141773223877, + "logps/chosen": -282.8044128417969, + "logps/rejected": -1087.6593017578125, + "loss": 0.3381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4757828116416931, + "rewards/margins": 4.111555576324463, + "rewards/rejected": -3.635772705078125, + "step": 275 + }, + { + "epoch": 0.35, + "learning_rate": 7.529733615247851e-08, + "logits/chosen": -3.181023597717285, + "logits/rejected": -3.113766670227051, + "logps/chosen": -240.5284423828125, + "logps/rejected": -1467.79443359375, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6007980108261108, + "rewards/margins": 5.008580207824707, + "rewards/rejected": -4.407782077789307, + "step": 276 + }, + { + "epoch": 0.35, + "learning_rate": 7.511907726983418e-08, + "logits/chosen": -3.224748373031616, + "logits/rejected": -3.1554925441741943, + "logps/chosen": -245.17010498046875, + "logps/rejected": -430.31134033203125, + "loss": 0.3993, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4324173331260681, + "rewards/margins": 1.6892868280410767, + "rewards/rejected": -1.2568695545196533, + "step": 277 + }, + { + "epoch": 0.35, + "learning_rate": 7.494039029875325e-08, + "logits/chosen": -3.191553831100464, + "logits/rejected": -3.048285722732544, + "logps/chosen": -278.761474609375, + "logps/rejected": -581.3726806640625, + "loss": 0.3936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.630352795124054, + "rewards/margins": 2.5671067237854004, + "rewards/rejected": -1.9367538690567017, + "step": 278 + }, + { + "epoch": 0.36, + "learning_rate": 7.4761278284484e-08, + "logits/chosen": -3.240879535675049, + "logits/rejected": -3.1762325763702393, + "logps/chosen": -252.4827880859375, + "logps/rejected": -728.6073608398438, + "loss": 0.3161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5736312866210938, + "rewards/margins": 3.0306198596954346, + "rewards/rejected": -2.456988573074341, + "step": 279 + }, + { + "epoch": 0.36, + "learning_rate": 7.45817442795184e-08, + "logits/chosen": -3.2448368072509766, + "logits/rejected": -3.139061212539673, + "logps/chosen": -246.44471740722656, + "logps/rejected": -547.3530883789062, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6580917835235596, + "rewards/margins": 2.397573947906494, + "rewards/rejected": -1.7394821643829346, + "step": 280 + }, + { + "epoch": 0.36, + "learning_rate": 7.440179134354014e-08, + "logits/chosen": -3.208570957183838, + "logits/rejected": -3.0202980041503906, + "logps/chosen": -258.1172180175781, + "logps/rejected": -336.66412353515625, + "loss": 0.3929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6174072027206421, + "rewards/margins": 1.1706757545471191, + "rewards/rejected": -0.5532684326171875, + "step": 281 + }, + { + "epoch": 0.36, + "learning_rate": 7.422142254337254e-08, + "logits/chosen": -3.2536845207214355, + "logits/rejected": -3.1284191608428955, + "logps/chosen": -277.00750732421875, + "logps/rejected": -367.1120910644531, + "loss": 0.4213, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5982620716094971, + "rewards/margins": 1.6821351051330566, + "rewards/rejected": -1.0838730335235596, + "step": 282 + }, + { + "epoch": 0.36, + "learning_rate": 7.404064095292619e-08, + "logits/chosen": -3.213559627532959, + "logits/rejected": -3.1598563194274902, + "logps/chosen": -293.13409423828125, + "logps/rejected": -1501.52978515625, + "loss": 0.4067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7568161487579346, + "rewards/margins": 5.247291564941406, + "rewards/rejected": -4.490475654602051, + "step": 283 + }, + { + "epoch": 0.36, + "learning_rate": 7.385944965314658e-08, + "logits/chosen": -3.2212753295898438, + "logits/rejected": -3.1120564937591553, + "logps/chosen": -264.46075439453125, + "logps/rejected": -798.0558471679688, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.547052800655365, + "rewards/margins": 3.3613076210021973, + "rewards/rejected": -2.8142547607421875, + "step": 284 + }, + { + "epoch": 0.36, + "learning_rate": 7.367785173196164e-08, + "logits/chosen": -3.218261957168579, + "logits/rejected": -3.104633331298828, + "logps/chosen": -242.0089111328125, + "logps/rejected": -405.90887451171875, + "loss": 0.37, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.684979259967804, + "rewards/margins": 1.8213715553283691, + "rewards/rejected": -1.1363922357559204, + "step": 285 + }, + { + "epoch": 0.36, + "learning_rate": 7.34958502842291e-08, + "logits/chosen": -3.1442952156066895, + "logits/rejected": -3.020730495452881, + "logps/chosen": -287.85260009765625, + "logps/rejected": -573.6448364257812, + "loss": 0.4238, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6836105585098267, + "rewards/margins": 2.330941677093506, + "rewards/rejected": -1.6473312377929688, + "step": 286 + }, + { + "epoch": 0.37, + "learning_rate": 7.331344841168371e-08, + "logits/chosen": -3.2261619567871094, + "logits/rejected": -3.1810126304626465, + "logps/chosen": -273.2899169921875, + "logps/rejected": -658.1124267578125, + "loss": 0.3428, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6618728637695312, + "rewards/margins": 2.6684982776641846, + "rewards/rejected": -2.0066254138946533, + "step": 287 + }, + { + "epoch": 0.37, + "learning_rate": 7.313064922288446e-08, + "logits/chosen": -3.1827545166015625, + "logits/rejected": -3.080845594406128, + "logps/chosen": -286.10601806640625, + "logps/rejected": -277.8323974609375, + "loss": 0.3665, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.631787121295929, + "rewards/margins": 1.2272125482559204, + "rewards/rejected": -0.5954254269599915, + "step": 288 + }, + { + "epoch": 0.37, + "learning_rate": 7.294745583316145e-08, + "logits/chosen": -3.241729736328125, + "logits/rejected": -3.0170392990112305, + "logps/chosen": -249.64878845214844, + "logps/rejected": -1706.4073486328125, + "loss": 0.3673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5323638916015625, + "rewards/margins": 4.995724678039551, + "rewards/rejected": -4.463360786437988, + "step": 289 + }, + { + "epoch": 0.37, + "learning_rate": 7.2763871364563e-08, + "logits/chosen": -3.1980690956115723, + "logits/rejected": -3.127747058868408, + "logps/chosen": -249.64645385742188, + "logps/rejected": -1818.2667236328125, + "loss": 0.3911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7050621509552002, + "rewards/margins": 5.569634437561035, + "rewards/rejected": -4.864572048187256, + "step": 290 + }, + { + "epoch": 0.37, + "learning_rate": 7.257989894580226e-08, + "logits/chosen": -3.2275872230529785, + "logits/rejected": -3.0495147705078125, + "logps/chosen": -244.2228546142578, + "logps/rejected": -1054.7816162109375, + "loss": 0.3888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6150360107421875, + "rewards/margins": 3.5391664505004883, + "rewards/rejected": -2.924130439758301, + "step": 291 + }, + { + "epoch": 0.37, + "learning_rate": 7.239554171220401e-08, + "logits/chosen": -3.167515277862549, + "logits/rejected": -3.0358333587646484, + "logps/chosen": -281.1877136230469, + "logps/rejected": -458.151123046875, + "loss": 0.3725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6963142156600952, + "rewards/margins": 1.739235758781433, + "rewards/rejected": -1.042921543121338, + "step": 292 + }, + { + "epoch": 0.37, + "learning_rate": 7.221080280565119e-08, + "logits/chosen": -3.2191336154937744, + "logits/rejected": -3.054119110107422, + "logps/chosen": -267.6853332519531, + "logps/rejected": -631.2477416992188, + "loss": 0.4191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5067421197891235, + "rewards/margins": 2.128070831298828, + "rewards/rejected": -1.6213287115097046, + "step": 293 + }, + { + "epoch": 0.37, + "learning_rate": 7.202568537453128e-08, + "logits/chosen": -3.2285284996032715, + "logits/rejected": -3.1465985774993896, + "logps/chosen": -231.7071990966797, + "logps/rejected": -668.5352783203125, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5994430780410767, + "rewards/margins": 3.21026611328125, + "rewards/rejected": -2.610823154449463, + "step": 294 + }, + { + "epoch": 0.38, + "learning_rate": 7.184019257368282e-08, + "logits/chosen": -3.194899082183838, + "logits/rejected": -3.1398561000823975, + "logps/chosen": -278.76007080078125, + "logps/rejected": -688.0074462890625, + "loss": 0.3765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6485732793807983, + "rewards/margins": 2.8306686878204346, + "rewards/rejected": -2.1820952892303467, + "step": 295 + }, + { + "epoch": 0.38, + "learning_rate": 7.165432756434145e-08, + "logits/chosen": -3.2298903465270996, + "logits/rejected": -3.1521010398864746, + "logps/chosen": -255.7893524169922, + "logps/rejected": -455.32830810546875, + "loss": 0.3711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6129105091094971, + "rewards/margins": 2.6431381702423096, + "rewards/rejected": -2.0302276611328125, + "step": 296 + }, + { + "epoch": 0.38, + "learning_rate": 7.14680935140862e-08, + "logits/chosen": -3.1669583320617676, + "logits/rejected": -3.128098487854004, + "logps/chosen": -244.5135040283203, + "logps/rejected": -562.5509033203125, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7565544247627258, + "rewards/margins": 2.5186028480529785, + "rewards/rejected": -1.762048363685608, + "step": 297 + }, + { + "epoch": 0.38, + "learning_rate": 7.12814935967853e-08, + "logits/chosen": -3.168271064758301, + "logits/rejected": -3.1408472061157227, + "logps/chosen": -258.435546875, + "logps/rejected": -863.5226440429688, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4979743957519531, + "rewards/margins": 3.2670083045959473, + "rewards/rejected": -2.769033908843994, + "step": 298 + }, + { + "epoch": 0.38, + "learning_rate": 7.109453099254239e-08, + "logits/chosen": -3.2456564903259277, + "logits/rejected": -3.096428871154785, + "logps/chosen": -277.0466003417969, + "logps/rejected": -642.8870849609375, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7049407958984375, + "rewards/margins": 2.697458028793335, + "rewards/rejected": -1.9925172328948975, + "step": 299 + }, + { + "epoch": 0.38, + "learning_rate": 7.0907208887642e-08, + "logits/chosen": -3.1831846237182617, + "logits/rejected": -3.1000850200653076, + "logps/chosen": -246.80194091796875, + "logps/rejected": -897.7669677734375, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6593201160430908, + "rewards/margins": 3.5792055130004883, + "rewards/rejected": -2.9198851585388184, + "step": 300 + }, + { + "epoch": 0.38, + "learning_rate": 7.07195304744955e-08, + "logits/chosen": -3.260908842086792, + "logits/rejected": -3.1739776134490967, + "logps/chosen": -257.0269470214844, + "logps/rejected": -917.8510131835938, + "loss": 0.3258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4712127447128296, + "rewards/margins": 3.2610349655151367, + "rewards/rejected": -2.7898223400115967, + "step": 301 + }, + { + "epoch": 0.38, + "learning_rate": 7.053149895158654e-08, + "logits/chosen": -3.152566432952881, + "logits/rejected": -2.8628149032592773, + "logps/chosen": -250.86782836914062, + "logps/rejected": -826.0477294921875, + "loss": 0.4063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5342880487442017, + "rewards/margins": 2.3621597290039062, + "rewards/rejected": -1.8278716802597046, + "step": 302 + }, + { + "epoch": 0.39, + "learning_rate": 7.034311752341666e-08, + "logits/chosen": -3.291776180267334, + "logits/rejected": -3.1820125579833984, + "logps/chosen": -264.0392150878906, + "logps/rejected": -958.594970703125, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6094719171524048, + "rewards/margins": 4.586922645568848, + "rewards/rejected": -3.9774506092071533, + "step": 303 + }, + { + "epoch": 0.39, + "learning_rate": 7.015438940045051e-08, + "logits/chosen": -3.232985258102417, + "logits/rejected": -3.0752992630004883, + "logps/chosen": -290.117431640625, + "logps/rejected": -643.3663330078125, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6692962646484375, + "rewards/margins": 2.498199462890625, + "rewards/rejected": -1.8289031982421875, + "step": 304 + }, + { + "epoch": 0.39, + "learning_rate": 6.996531779906133e-08, + "logits/chosen": -3.2079646587371826, + "logits/rejected": -3.1346848011016846, + "logps/chosen": -303.12664794921875, + "logps/rejected": -867.5700073242188, + "loss": 0.3462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5715515613555908, + "rewards/margins": 3.8500733375549316, + "rewards/rejected": -3.278521776199341, + "step": 305 + }, + { + "epoch": 0.39, + "learning_rate": 6.977590594147601e-08, + "logits/chosen": -3.1840667724609375, + "logits/rejected": -3.1509251594543457, + "logps/chosen": -258.2975769042969, + "logps/rejected": -556.5767822265625, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6929847598075867, + "rewards/margins": 2.365650177001953, + "rewards/rejected": -1.6726653575897217, + "step": 306 + }, + { + "epoch": 0.39, + "learning_rate": 6.95861570557202e-08, + "logits/chosen": -3.260798931121826, + "logits/rejected": -3.0790116786956787, + "logps/chosen": -257.42742919921875, + "logps/rejected": -593.2567138671875, + "loss": 0.3592, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6713745594024658, + "rewards/margins": 2.5226149559020996, + "rewards/rejected": -1.8512405157089233, + "step": 307 + }, + { + "epoch": 0.39, + "learning_rate": 6.939607437556332e-08, + "logits/chosen": -3.2384748458862305, + "logits/rejected": -3.1354775428771973, + "logps/chosen": -246.22640991210938, + "logps/rejected": -1183.111572265625, + "loss": 0.3335, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6883881092071533, + "rewards/margins": 4.711153984069824, + "rewards/rejected": -4.02276611328125, + "step": 308 + }, + { + "epoch": 0.39, + "learning_rate": 6.920566114046341e-08, + "logits/chosen": -3.1928658485412598, + "logits/rejected": -3.0874578952789307, + "logps/chosen": -280.6830139160156, + "logps/rejected": -520.6817016601562, + "loss": 0.3717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6309082508087158, + "rewards/margins": 2.524472236633301, + "rewards/rejected": -1.8935638666152954, + "step": 309 + }, + { + "epoch": 0.39, + "learning_rate": 6.9014920595512e-08, + "logits/chosen": -3.2521419525146484, + "logits/rejected": -3.0563526153564453, + "logps/chosen": -192.95547485351562, + "logps/rejected": -609.1278686523438, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4848625361919403, + "rewards/margins": 2.3708291053771973, + "rewards/rejected": -1.8859665393829346, + "step": 310 + }, + { + "epoch": 0.4, + "learning_rate": 6.882385599137872e-08, + "logits/chosen": -3.256758689880371, + "logits/rejected": -3.0288145542144775, + "logps/chosen": -255.2431182861328, + "logps/rejected": -1283.4351806640625, + "loss": 0.3103, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6519806385040283, + "rewards/margins": 4.0796709060668945, + "rewards/rejected": -3.427690029144287, + "step": 311 + }, + { + "epoch": 0.4, + "learning_rate": 6.863247058425593e-08, + "logits/chosen": -3.1810083389282227, + "logits/rejected": -3.0196967124938965, + "logps/chosen": -238.87843322753906, + "logps/rejected": -405.5235595703125, + "loss": 0.353, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6055595278739929, + "rewards/margins": 2.0739264488220215, + "rewards/rejected": -1.468367099761963, + "step": 312 + }, + { + "epoch": 0.4, + "learning_rate": 6.844076763580324e-08, + "logits/chosen": -3.169071674346924, + "logits/rejected": -3.068519115447998, + "logps/chosen": -267.01397705078125, + "logps/rejected": -530.2950439453125, + "loss": 0.3504, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5615478754043579, + "rewards/margins": 2.1072616577148438, + "rewards/rejected": -1.5457137823104858, + "step": 313 + }, + { + "epoch": 0.4, + "learning_rate": 6.824875041309193e-08, + "logits/chosen": -3.225882053375244, + "logits/rejected": -3.1110382080078125, + "logps/chosen": -264.82666015625, + "logps/rejected": -648.922607421875, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5374343991279602, + "rewards/margins": 2.9961135387420654, + "rewards/rejected": -2.45867919921875, + "step": 314 + }, + { + "epoch": 0.4, + "learning_rate": 6.80564221885492e-08, + "logits/chosen": -3.206669330596924, + "logits/rejected": -3.1415257453918457, + "logps/chosen": -272.2914733886719, + "logps/rejected": -895.7167358398438, + "loss": 0.3681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6838181018829346, + "rewards/margins": 4.068736553192139, + "rewards/rejected": -3.384918212890625, + "step": 315 + }, + { + "epoch": 0.4, + "learning_rate": 6.78637862399025e-08, + "logits/chosen": -3.242002010345459, + "logits/rejected": -3.1317036151885986, + "logps/chosen": -236.78085327148438, + "logps/rejected": -846.0016479492188, + "loss": 0.3463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6934906244277954, + "rewards/margins": 3.741694688796997, + "rewards/rejected": -3.048204183578491, + "step": 316 + }, + { + "epoch": 0.4, + "learning_rate": 6.767084585012364e-08, + "logits/chosen": -3.207566738128662, + "logits/rejected": -3.094359874725342, + "logps/chosen": -279.3963928222656, + "logps/rejected": -558.6920776367188, + "loss": 0.3823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6077201962471008, + "rewards/margins": 2.4112191200256348, + "rewards/rejected": -1.8034988641738892, + "step": 317 + }, + { + "epoch": 0.41, + "learning_rate": 6.747760430737282e-08, + "logits/chosen": -3.2109241485595703, + "logits/rejected": -3.112882614135742, + "logps/chosen": -227.8834228515625, + "logps/rejected": -617.474853515625, + "loss": 0.3273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5459389090538025, + "rewards/margins": 2.9714972972869873, + "rewards/rejected": -2.425558567047119, + "step": 318 + }, + { + "epoch": 0.41, + "learning_rate": 6.728406490494257e-08, + "logits/chosen": -3.2839040756225586, + "logits/rejected": -3.058953046798706, + "logps/chosen": -275.7601013183594, + "logps/rejected": -702.5219116210938, + "loss": 0.3822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6147781610488892, + "rewards/margins": 2.7925643920898438, + "rewards/rejected": -2.177786350250244, + "step": 319 + }, + { + "epoch": 0.41, + "learning_rate": 6.709023094120162e-08, + "logits/chosen": -3.2485909461975098, + "logits/rejected": -3.1495471000671387, + "logps/chosen": -261.490966796875, + "logps/rejected": -364.25360107421875, + "loss": 0.3886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6387618780136108, + "rewards/margins": 1.6340057849884033, + "rewards/rejected": -0.9952439069747925, + "step": 320 + }, + { + "epoch": 0.41, + "learning_rate": 6.689610571953886e-08, + "logits/chosen": -3.185666084289551, + "logits/rejected": -3.1678826808929443, + "logps/chosen": -267.1490173339844, + "logps/rejected": -644.1242065429688, + "loss": 0.3733, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7451789975166321, + "rewards/margins": 3.257025957107544, + "rewards/rejected": -2.5118470191955566, + "step": 321 + }, + { + "epoch": 0.41, + "learning_rate": 6.670169254830676e-08, + "logits/chosen": -3.2202961444854736, + "logits/rejected": -3.0525383949279785, + "logps/chosen": -233.91439819335938, + "logps/rejected": -1805.923095703125, + "loss": 0.2943, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5234947204589844, + "rewards/margins": 6.190389156341553, + "rewards/rejected": -5.666894435882568, + "step": 322 + }, + { + "epoch": 0.41, + "learning_rate": 6.650699474076519e-08, + "logits/chosen": -3.2201623916625977, + "logits/rejected": -3.129304885864258, + "logps/chosen": -274.170166015625, + "logps/rejected": -484.02655029296875, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6006561517715454, + "rewards/margins": 2.161846160888672, + "rewards/rejected": -1.561190128326416, + "step": 323 + }, + { + "epoch": 0.41, + "learning_rate": 6.631201561502489e-08, + "logits/chosen": -3.231149673461914, + "logits/rejected": -3.145214557647705, + "logps/chosen": -277.907958984375, + "logps/rejected": -621.90966796875, + "loss": 0.3604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7247635126113892, + "rewards/margins": 2.583944797515869, + "rewards/rejected": -1.85918128490448, + "step": 324 + }, + { + "epoch": 0.41, + "learning_rate": 6.611675849399092e-08, + "logits/chosen": -3.2344157695770264, + "logits/rejected": -3.1457324028015137, + "logps/chosen": -242.2562713623047, + "logps/rejected": -468.2638854980469, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6403694152832031, + "rewards/margins": 2.000324249267578, + "rewards/rejected": -1.3599549531936646, + "step": 325 + }, + { + "epoch": 0.42, + "learning_rate": 6.592122670530604e-08, + "logits/chosen": -3.2435190677642822, + "logits/rejected": -3.136463165283203, + "logps/chosen": -261.8953857421875, + "logps/rejected": -506.09539794921875, + "loss": 0.3631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5601837635040283, + "rewards/margins": 2.2976670265197754, + "rewards/rejected": -1.737483263015747, + "step": 326 + }, + { + "epoch": 0.42, + "learning_rate": 6.572542358129402e-08, + "logits/chosen": -3.210536479949951, + "logits/rejected": -3.123551845550537, + "logps/chosen": -274.0307312011719, + "logps/rejected": -680.39990234375, + "loss": 0.357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6556099057197571, + "rewards/margins": 3.085648536682129, + "rewards/rejected": -2.4300384521484375, + "step": 327 + }, + { + "epoch": 0.42, + "learning_rate": 6.552935245890278e-08, + "logits/chosen": -3.181593179702759, + "logits/rejected": -3.06083083152771, + "logps/chosen": -242.00027465820312, + "logps/rejected": -355.4420166015625, + "loss": 0.3614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6109573841094971, + "rewards/margins": 1.812525987625122, + "rewards/rejected": -1.201568603515625, + "step": 328 + }, + { + "epoch": 0.42, + "learning_rate": 6.53330166796476e-08, + "logits/chosen": -3.238454580307007, + "logits/rejected": -3.1699745655059814, + "logps/chosen": -299.7017822265625, + "logps/rejected": -419.0378723144531, + "loss": 0.3873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5815688967704773, + "rewards/margins": 1.9884324073791504, + "rewards/rejected": -1.4068634510040283, + "step": 329 + }, + { + "epoch": 0.42, + "learning_rate": 6.513641958955414e-08, + "logits/chosen": -3.1574459075927734, + "logits/rejected": -3.119083881378174, + "logps/chosen": -259.3731689453125, + "logps/rejected": -1181.5028076171875, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6808853149414062, + "rewards/margins": 4.785417556762695, + "rewards/rejected": -4.104531764984131, + "step": 330 + }, + { + "epoch": 0.42, + "learning_rate": 6.493956453910136e-08, + "logits/chosen": -3.2351598739624023, + "logits/rejected": -3.0781307220458984, + "logps/chosen": -240.66488647460938, + "logps/rejected": -395.773681640625, + "loss": 0.3987, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7788292169570923, + "rewards/margins": 1.9296729564666748, + "rewards/rejected": -1.150843858718872, + "step": 331 + }, + { + "epoch": 0.42, + "learning_rate": 6.474245488316457e-08, + "logits/chosen": -3.202373504638672, + "logits/rejected": -3.165569305419922, + "logps/chosen": -276.07000732421875, + "logps/rejected": -646.40673828125, + "loss": 0.3298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7009620666503906, + "rewards/margins": 3.107480525970459, + "rewards/rejected": -2.4065184593200684, + "step": 332 + }, + { + "epoch": 0.42, + "learning_rate": 6.454509398095807e-08, + "logits/chosen": -3.228877067565918, + "logits/rejected": -3.1381492614746094, + "logps/chosen": -272.10040283203125, + "logps/rejected": -778.0130615234375, + "loss": 0.3441, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8581283688545227, + "rewards/margins": 3.6534743309020996, + "rewards/rejected": -2.795346260070801, + "step": 333 + }, + { + "epoch": 0.43, + "learning_rate": 6.434748519597804e-08, + "logits/chosen": -3.2218236923217773, + "logits/rejected": -3.1177725791931152, + "logps/chosen": -263.6954345703125, + "logps/rejected": -762.1744995117188, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6894683837890625, + "rewards/margins": 3.7182648181915283, + "rewards/rejected": -3.028796434402466, + "step": 334 + }, + { + "epoch": 0.43, + "learning_rate": 6.414963189594512e-08, + "logits/chosen": -3.2245078086853027, + "logits/rejected": -3.0847573280334473, + "logps/chosen": -265.0016784667969, + "logps/rejected": -555.0517578125, + "loss": 0.3581, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7019546627998352, + "rewards/margins": 2.7320494651794434, + "rewards/rejected": -2.030094861984253, + "step": 335 + }, + { + "epoch": 0.43, + "learning_rate": 6.395153745274715e-08, + "logits/chosen": -3.229811668395996, + "logits/rejected": -3.098731517791748, + "logps/chosen": -278.39300537109375, + "logps/rejected": -1010.6920166015625, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.576434314250946, + "rewards/margins": 4.196420192718506, + "rewards/rejected": -3.619986057281494, + "step": 336 + }, + { + "epoch": 0.43, + "learning_rate": 6.375320524238154e-08, + "logits/chosen": -3.2282800674438477, + "logits/rejected": -3.076411724090576, + "logps/chosen": -275.1763916015625, + "logps/rejected": -1102.8814697265625, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6854171752929688, + "rewards/margins": 4.4059953689575195, + "rewards/rejected": -3.720578193664551, + "step": 337 + }, + { + "epoch": 0.43, + "learning_rate": 6.355463864489782e-08, + "logits/chosen": -3.1875040531158447, + "logits/rejected": -3.0878686904907227, + "logps/chosen": -290.3229064941406, + "logps/rejected": -747.6629028320312, + "loss": 0.3709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.842419445514679, + "rewards/margins": 3.696493625640869, + "rewards/rejected": -2.854074239730835, + "step": 338 + }, + { + "epoch": 0.43, + "learning_rate": 6.33558410443401e-08, + "logits/chosen": -3.17329740524292, + "logits/rejected": -3.0454978942871094, + "logps/chosen": -296.6310119628906, + "logps/rejected": -554.7703857421875, + "loss": 0.3748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7479743957519531, + "rewards/margins": 2.7531824111938477, + "rewards/rejected": -2.0052077770233154, + "step": 339 + }, + { + "epoch": 0.43, + "learning_rate": 6.315681582868926e-08, + "logits/chosen": -3.210263252258301, + "logits/rejected": -3.1396303176879883, + "logps/chosen": -275.1025695800781, + "logps/rejected": -336.0916748046875, + "loss": 0.4173, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8976761102676392, + "rewards/margins": 1.8028411865234375, + "rewards/rejected": -0.9051651358604431, + "step": 340 + }, + { + "epoch": 0.43, + "learning_rate": 6.295756638980528e-08, + "logits/chosen": -3.2631731033325195, + "logits/rejected": -3.0504093170166016, + "logps/chosen": -240.60366821289062, + "logps/rejected": -370.5918273925781, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7129570245742798, + "rewards/margins": 1.9466804265975952, + "rewards/rejected": -1.2337234020233154, + "step": 341 + }, + { + "epoch": 0.44, + "learning_rate": 6.275809612336946e-08, + "logits/chosen": -3.2300992012023926, + "logits/rejected": -3.0759565830230713, + "logps/chosen": -278.25457763671875, + "logps/rejected": -726.2928466796875, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7272980213165283, + "rewards/margins": 3.4558258056640625, + "rewards/rejected": -2.7285280227661133, + "step": 342 + }, + { + "epoch": 0.44, + "learning_rate": 6.255840842882653e-08, + "logits/chosen": -3.2714896202087402, + "logits/rejected": -3.1509101390838623, + "logps/chosen": -287.25018310546875, + "logps/rejected": -460.7113952636719, + "loss": 0.3547, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5036705136299133, + "rewards/margins": 1.9429527521133423, + "rewards/rejected": -1.4392821788787842, + "step": 343 + }, + { + "epoch": 0.44, + "learning_rate": 6.23585067093267e-08, + "logits/chosen": -3.221683979034424, + "logits/rejected": -3.197873115539551, + "logps/chosen": -264.4109802246094, + "logps/rejected": -675.1456298828125, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6876205205917358, + "rewards/margins": 2.9045608043670654, + "rewards/rejected": -2.216940402984619, + "step": 344 + }, + { + "epoch": 0.44, + "learning_rate": 6.215839437166766e-08, + "logits/chosen": -3.2561168670654297, + "logits/rejected": -3.129390239715576, + "logps/chosen": -257.55609130859375, + "logps/rejected": -370.2957763671875, + "loss": 0.3778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6532738208770752, + "rewards/margins": 1.7791695594787598, + "rewards/rejected": -1.1258957386016846, + "step": 345 + }, + { + "epoch": 0.44, + "learning_rate": 6.195807482623652e-08, + "logits/chosen": -3.2789087295532227, + "logits/rejected": -2.9468722343444824, + "logps/chosen": -250.0885009765625, + "logps/rejected": -1154.9915771484375, + "loss": 0.3752, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7979896664619446, + "rewards/margins": 4.098119258880615, + "rewards/rejected": -3.3001296520233154, + "step": 346 + }, + { + "epoch": 0.44, + "learning_rate": 6.175755148695173e-08, + "logits/chosen": -3.198063373565674, + "logits/rejected": -3.0647132396698, + "logps/chosen": -279.2771301269531, + "logps/rejected": -4242.96337890625, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5644241571426392, + "rewards/margins": 4.328251838684082, + "rewards/rejected": -3.7638278007507324, + "step": 347 + }, + { + "epoch": 0.44, + "learning_rate": 6.155682777120486e-08, + "logits/chosen": -3.185502529144287, + "logits/rejected": -3.1430888175964355, + "logps/chosen": -250.17803955078125, + "logps/rejected": -519.8248291015625, + "loss": 0.3756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6212852597236633, + "rewards/margins": 2.368911027908325, + "rewards/rejected": -1.747625708580017, + "step": 348 + }, + { + "epoch": 0.44, + "learning_rate": 6.135590709980236e-08, + "logits/chosen": -3.282736301422119, + "logits/rejected": -3.1754841804504395, + "logps/chosen": -247.39523315429688, + "logps/rejected": -931.2933959960938, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7862594723701477, + "rewards/margins": 5.460859775543213, + "rewards/rejected": -4.674600601196289, + "step": 349 + }, + { + "epoch": 0.45, + "learning_rate": 6.115479289690729e-08, + "logits/chosen": -3.1922850608825684, + "logits/rejected": -3.130467414855957, + "logps/chosen": -265.7606506347656, + "logps/rejected": -782.80615234375, + "loss": 0.3281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7093788385391235, + "rewards/margins": 3.9885153770446777, + "rewards/rejected": -3.2791366577148438, + "step": 350 + }, + { + "epoch": 0.45, + "learning_rate": 6.095348858998088e-08, + "logits/chosen": -3.225222587585449, + "logits/rejected": -3.1326165199279785, + "logps/chosen": -271.3457946777344, + "logps/rejected": -383.0349426269531, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7142959833145142, + "rewards/margins": 2.2188661098480225, + "rewards/rejected": -1.5045700073242188, + "step": 351 + }, + { + "epoch": 0.45, + "learning_rate": 6.075199760972428e-08, + "logits/chosen": -3.19018816947937, + "logits/rejected": -3.2133312225341797, + "logps/chosen": -250.18722534179688, + "logps/rejected": -678.7883911132812, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6722091436386108, + "rewards/margins": 3.3354387283325195, + "rewards/rejected": -2.663229465484619, + "step": 352 + }, + { + "epoch": 0.45, + "learning_rate": 6.055032339001994e-08, + "logits/chosen": -3.233020782470703, + "logits/rejected": -2.9999332427978516, + "logps/chosen": -227.3245849609375, + "logps/rejected": -781.0535888671875, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7004325985908508, + "rewards/margins": 3.256398916244507, + "rewards/rejected": -2.555966377258301, + "step": 353 + }, + { + "epoch": 0.45, + "learning_rate": 6.034846936787313e-08, + "logits/chosen": -3.2419495582580566, + "logits/rejected": -3.118199348449707, + "logps/chosen": -270.13385009765625, + "logps/rejected": -580.2787475585938, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6757873296737671, + "rewards/margins": 2.496983289718628, + "rewards/rejected": -1.8211960792541504, + "step": 354 + }, + { + "epoch": 0.45, + "learning_rate": 6.014643898335341e-08, + "logits/chosen": -3.2442381381988525, + "logits/rejected": -3.0558581352233887, + "logps/chosen": -227.2612762451172, + "logps/rejected": -474.29217529296875, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7612503170967102, + "rewards/margins": 2.3337693214416504, + "rewards/rejected": -1.5725189447402954, + "step": 355 + }, + { + "epoch": 0.45, + "learning_rate": 5.994423567953594e-08, + "logits/chosen": -3.1625704765319824, + "logits/rejected": -3.049424409866333, + "logps/chosen": -267.01446533203125, + "logps/rejected": -261.77044677734375, + "loss": 0.4041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6249557733535767, + "rewards/margins": 1.603515625, + "rewards/rejected": -0.9785598516464233, + "step": 356 + }, + { + "epoch": 0.45, + "learning_rate": 5.974186290244286e-08, + "logits/chosen": -3.1348037719726562, + "logits/rejected": -3.1214358806610107, + "logps/chosen": -267.24200439453125, + "logps/rejected": -405.637451171875, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6552917957305908, + "rewards/margins": 1.9493300914764404, + "rewards/rejected": -1.2940384149551392, + "step": 357 + }, + { + "epoch": 0.46, + "learning_rate": 5.9539324100984544e-08, + "logits/chosen": -3.225360870361328, + "logits/rejected": -3.0596866607666016, + "logps/chosen": -286.7353820800781, + "logps/rejected": -1014.926025390625, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8119949102401733, + "rewards/margins": 3.767622470855713, + "rewards/rejected": -2.955627679824829, + "step": 358 + }, + { + "epoch": 0.46, + "learning_rate": 5.933662272690079e-08, + "logits/chosen": -3.201200008392334, + "logits/rejected": -3.164257526397705, + "logps/chosen": -299.71319580078125, + "logps/rejected": -848.9205322265625, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8179977536201477, + "rewards/margins": 3.703932285308838, + "rewards/rejected": -2.885934352874756, + "step": 359 + }, + { + "epoch": 0.46, + "learning_rate": 5.9133762234702e-08, + "logits/chosen": -3.2617154121398926, + "logits/rejected": -3.2026188373565674, + "logps/chosen": -253.29953002929688, + "logps/rejected": -605.815185546875, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6394714117050171, + "rewards/margins": 3.1186447143554688, + "rewards/rejected": -2.479173183441162, + "step": 360 + }, + { + "epoch": 0.46, + "learning_rate": 5.8930746081610386e-08, + "logits/chosen": -3.171210289001465, + "logits/rejected": -3.094325542449951, + "logps/chosen": -299.7009582519531, + "logps/rejected": -1386.303955078125, + "loss": 0.3175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7548630237579346, + "rewards/margins": 5.790196418762207, + "rewards/rejected": -5.035333633422852, + "step": 361 + }, + { + "epoch": 0.46, + "learning_rate": 5.8727577727500924e-08, + "logits/chosen": -3.236953020095825, + "logits/rejected": -3.1310436725616455, + "logps/chosen": -279.1293029785156, + "logps/rejected": -908.18408203125, + "loss": 0.3355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6374099850654602, + "rewards/margins": 3.7881851196289062, + "rewards/rejected": -3.150775194168091, + "step": 362 + }, + { + "epoch": 0.46, + "learning_rate": 5.852426063484249e-08, + "logits/chosen": -3.214144229888916, + "logits/rejected": -3.0711922645568848, + "logps/chosen": -264.03094482421875, + "logps/rejected": -1073.927001953125, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6820083856582642, + "rewards/margins": 4.751176834106445, + "rewards/rejected": -4.0691680908203125, + "step": 363 + }, + { + "epoch": 0.46, + "learning_rate": 5.832079826863883e-08, + "logits/chosen": -3.2379612922668457, + "logits/rejected": -3.1623973846435547, + "logps/chosen": -250.77572631835938, + "logps/rejected": -571.6696166992188, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7851707339286804, + "rewards/margins": 2.8355965614318848, + "rewards/rejected": -2.0504257678985596, + "step": 364 + }, + { + "epoch": 0.46, + "learning_rate": 5.8117194096369426e-08, + "logits/chosen": -3.2338616847991943, + "logits/rejected": -3.1365015506744385, + "logps/chosen": -264.0157470703125, + "logps/rejected": -659.0657348632812, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6174224615097046, + "rewards/margins": 3.2474136352539062, + "rewards/rejected": -2.629991292953491, + "step": 365 + }, + { + "epoch": 0.47, + "learning_rate": 5.791345158793057e-08, + "logits/chosen": -3.225299119949341, + "logits/rejected": -3.1767373085021973, + "logps/chosen": -274.5421142578125, + "logps/rejected": -574.6021118164062, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7804893851280212, + "rewards/margins": 2.5422539710998535, + "rewards/rejected": -1.7617645263671875, + "step": 366 + }, + { + "epoch": 0.47, + "learning_rate": 5.7709574215576054e-08, + "logits/chosen": -3.255784511566162, + "logits/rejected": -3.1005897521972656, + "logps/chosen": -278.8935546875, + "logps/rejected": -1287.404296875, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6550262570381165, + "rewards/margins": 5.1175994873046875, + "rewards/rejected": -4.462573528289795, + "step": 367 + }, + { + "epoch": 0.47, + "learning_rate": 5.750556545385808e-08, + "logits/chosen": -3.126737594604492, + "logits/rejected": -3.116323471069336, + "logps/chosen": -265.2958984375, + "logps/rejected": -407.8165588378906, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7063629627227783, + "rewards/margins": 2.2335662841796875, + "rewards/rejected": -1.5272033214569092, + "step": 368 + }, + { + "epoch": 0.47, + "learning_rate": 5.730142877956809e-08, + "logits/chosen": -3.2569987773895264, + "logits/rejected": -3.086724281311035, + "logps/chosen": -315.4830322265625, + "logps/rejected": -824.9827880859375, + "loss": 0.3622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6915146112442017, + "rewards/margins": 3.558311700820923, + "rewards/rejected": -2.8667969703674316, + "step": 369 + }, + { + "epoch": 0.47, + "learning_rate": 5.70971676716774e-08, + "logits/chosen": -3.202070474624634, + "logits/rejected": -3.1171350479125977, + "logps/chosen": -267.83642578125, + "logps/rejected": -840.9493408203125, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8511680364608765, + "rewards/margins": 3.8680717945098877, + "rewards/rejected": -3.0169036388397217, + "step": 370 + }, + { + "epoch": 0.47, + "learning_rate": 5.689278561127797e-08, + "logits/chosen": -3.2055060863494873, + "logits/rejected": -3.0384624004364014, + "logps/chosen": -262.22552490234375, + "logps/rejected": -584.8299560546875, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44190216064453125, + "rewards/margins": 2.621351718902588, + "rewards/rejected": -2.1794495582580566, + "step": 371 + }, + { + "epoch": 0.47, + "learning_rate": 5.668828608152309e-08, + "logits/chosen": -3.1930131912231445, + "logits/rejected": -3.063206911087036, + "logps/chosen": -281.518310546875, + "logps/rejected": -1076.519775390625, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5837677121162415, + "rewards/margins": 4.266848564147949, + "rewards/rejected": -3.6830811500549316, + "step": 372 + }, + { + "epoch": 0.48, + "learning_rate": 5.6483672567568044e-08, + "logits/chosen": -3.23343563079834, + "logits/rejected": -3.1513991355895996, + "logps/chosen": -243.510498046875, + "logps/rejected": -968.8843994140625, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5792641043663025, + "rewards/margins": 4.402311325073242, + "rewards/rejected": -3.823046922683716, + "step": 373 + }, + { + "epoch": 0.48, + "learning_rate": 5.6278948556510606e-08, + "logits/chosen": -3.2438666820526123, + "logits/rejected": -3.0522642135620117, + "logps/chosen": -286.9366455078125, + "logps/rejected": -681.769287109375, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5430435538291931, + "rewards/margins": 2.9652910232543945, + "rewards/rejected": -2.4222474098205566, + "step": 374 + }, + { + "epoch": 0.48, + "learning_rate": 5.607411753733172e-08, + "logits/chosen": -3.228710651397705, + "logits/rejected": -3.112534999847412, + "logps/chosen": -263.0841369628906, + "logps/rejected": -564.6973876953125, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5904243588447571, + "rewards/margins": 2.270819664001465, + "rewards/rejected": -1.680395483970642, + "step": 375 + }, + { + "epoch": 0.48, + "learning_rate": 5.5869183000836006e-08, + "logits/chosen": -3.1251683235168457, + "logits/rejected": -3.052274227142334, + "logps/chosen": -272.2841491699219, + "logps/rejected": -638.2950439453125, + "loss": 0.322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.803753674030304, + "rewards/margins": 2.698007106781006, + "rewards/rejected": -1.8942536115646362, + "step": 376 + }, + { + "epoch": 0.48, + "learning_rate": 5.5664148439592274e-08, + "logits/chosen": -3.1487231254577637, + "logits/rejected": -3.0841851234436035, + "logps/chosen": -287.10577392578125, + "logps/rejected": -571.0196533203125, + "loss": 0.3998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7128372192382812, + "rewards/margins": 2.8129379749298096, + "rewards/rejected": -2.1001007556915283, + "step": 377 + }, + { + "epoch": 0.48, + "learning_rate": 5.545901734787394e-08, + "logits/chosen": -3.256636619567871, + "logits/rejected": -3.061497211456299, + "logps/chosen": -281.9757080078125, + "logps/rejected": -690.678955078125, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7485641241073608, + "rewards/margins": 3.0777297019958496, + "rewards/rejected": -2.3291656970977783, + "step": 378 + }, + { + "epoch": 0.48, + "learning_rate": 5.525379322159958e-08, + "logits/chosen": -3.147373914718628, + "logits/rejected": -3.1613378524780273, + "logps/chosen": -261.32818603515625, + "logps/rejected": -550.505126953125, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6573807001113892, + "rewards/margins": 2.626879930496216, + "rewards/rejected": -1.9694992303848267, + "step": 379 + }, + { + "epoch": 0.48, + "learning_rate": 5.5048479558273256e-08, + "logits/chosen": -3.22505259513855, + "logits/rejected": -3.0054831504821777, + "logps/chosen": -211.61151123046875, + "logps/rejected": -1152.2745361328125, + "loss": 0.3114, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6949844360351562, + "rewards/margins": 4.237500190734863, + "rewards/rejected": -3.542515754699707, + "step": 380 + }, + { + "epoch": 0.49, + "learning_rate": 5.484307985692499e-08, + "logits/chosen": -3.235170364379883, + "logits/rejected": -3.1188371181488037, + "logps/chosen": -249.55572509765625, + "logps/rejected": -374.6783752441406, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7002594470977783, + "rewards/margins": 2.030561923980713, + "rewards/rejected": -1.3303024768829346, + "step": 381 + }, + { + "epoch": 0.49, + "learning_rate": 5.463759761805109e-08, + "logits/chosen": -3.2978062629699707, + "logits/rejected": -3.1799702644348145, + "logps/chosen": -248.9640655517578, + "logps/rejected": -820.8369750976562, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8375869989395142, + "rewards/margins": 3.766606330871582, + "rewards/rejected": -2.9290192127227783, + "step": 382 + }, + { + "epoch": 0.49, + "learning_rate": 5.443203634355449e-08, + "logits/chosen": -3.238846778869629, + "logits/rejected": -3.192047357559204, + "logps/chosen": -278.8880310058594, + "logps/rejected": -534.0758666992188, + "loss": 0.3344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7454407215118408, + "rewards/margins": 2.6529297828674316, + "rewards/rejected": -1.9074890613555908, + "step": 383 + }, + { + "epoch": 0.49, + "learning_rate": 5.4226399536685075e-08, + "logits/chosen": -3.2318930625915527, + "logits/rejected": -3.0163259506225586, + "logps/chosen": -299.0809326171875, + "logps/rejected": -438.0403747558594, + "loss": 0.3773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5950806140899658, + "rewards/margins": 1.94342041015625, + "rewards/rejected": -1.3483399152755737, + "step": 384 + }, + { + "epoch": 0.49, + "learning_rate": 5.402069070197996e-08, + "logits/chosen": -3.184865951538086, + "logits/rejected": -3.1228790283203125, + "logps/chosen": -306.5110168457031, + "logps/rejected": -499.53582763671875, + "loss": 0.3962, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5685867071151733, + "rewards/margins": 2.1248397827148438, + "rewards/rejected": -1.55625319480896, + "step": 385 + }, + { + "epoch": 0.49, + "learning_rate": 5.381491334520385e-08, + "logits/chosen": -3.2272400856018066, + "logits/rejected": -3.0289082527160645, + "logps/chosen": -246.52447509765625, + "logps/rejected": -917.7881469726562, + "loss": 0.3545, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.589550793170929, + "rewards/margins": 3.5446717739105225, + "rewards/rejected": -2.9551210403442383, + "step": 386 + }, + { + "epoch": 0.49, + "learning_rate": 5.360907097328915e-08, + "logits/chosen": -3.275890350341797, + "logits/rejected": -3.126254081726074, + "logps/chosen": -267.40753173828125, + "logps/rejected": -467.48486328125, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6568435430526733, + "rewards/margins": 2.1186797618865967, + "rewards/rejected": -1.461836338043213, + "step": 387 + }, + { + "epoch": 0.49, + "learning_rate": 5.3403167094276324e-08, + "logits/chosen": -3.2741785049438477, + "logits/rejected": -3.065547227859497, + "logps/chosen": -284.5071716308594, + "logps/rejected": -416.9211120605469, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5544632077217102, + "rewards/margins": 1.7963318824768066, + "rewards/rejected": -1.2418686151504517, + "step": 388 + }, + { + "epoch": 0.5, + "learning_rate": 5.319720521725404e-08, + "logits/chosen": -3.206319808959961, + "logits/rejected": -3.124816417694092, + "logps/chosen": -293.26287841796875, + "logps/rejected": -732.98974609375, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6914657354354858, + "rewards/margins": 3.076626777648926, + "rewards/rejected": -2.3851609230041504, + "step": 389 + }, + { + "epoch": 0.5, + "learning_rate": 5.299118885229943e-08, + "logits/chosen": -3.2319626808166504, + "logits/rejected": -3.144106149673462, + "logps/chosen": -249.2318878173828, + "logps/rejected": -600.5133056640625, + "loss": 0.3414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7612183094024658, + "rewards/margins": 2.6296205520629883, + "rewards/rejected": -1.8684022426605225, + "step": 390 + }, + { + "epoch": 0.5, + "learning_rate": 5.2785121510418164e-08, + "logits/chosen": -3.266145706176758, + "logits/rejected": -3.144723415374756, + "logps/chosen": -229.35525512695312, + "logps/rejected": -717.50146484375, + "loss": 0.3298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4556419253349304, + "rewards/margins": 2.9597373008728027, + "rewards/rejected": -2.5040955543518066, + "step": 391 + }, + { + "epoch": 0.5, + "learning_rate": 5.257900670348472e-08, + "logits/chosen": -3.12154483795166, + "logits/rejected": -3.0091986656188965, + "logps/chosen": -230.98150634765625, + "logps/rejected": -1142.305419921875, + "loss": 0.2844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8334022760391235, + "rewards/margins": 5.153318405151367, + "rewards/rejected": -4.319915771484375, + "step": 392 + }, + { + "epoch": 0.5, + "learning_rate": 5.237284794418251e-08, + "logits/chosen": -3.231339454650879, + "logits/rejected": -3.1364951133728027, + "logps/chosen": -262.1949462890625, + "logps/rejected": -843.2003173828125, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6267051696777344, + "rewards/margins": 3.7320916652679443, + "rewards/rejected": -3.10538649559021, + "step": 393 + }, + { + "epoch": 0.5, + "learning_rate": 5.216664874594394e-08, + "logits/chosen": -3.191300868988037, + "logits/rejected": -3.126401901245117, + "logps/chosen": -276.2685852050781, + "logps/rejected": -412.11322021484375, + "loss": 0.3254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6629608273506165, + "rewards/margins": 2.0126161575317383, + "rewards/rejected": -1.3496551513671875, + "step": 394 + }, + { + "epoch": 0.5, + "learning_rate": 5.196041262289067e-08, + "logits/chosen": -3.2384395599365234, + "logits/rejected": -3.04899263381958, + "logps/chosen": -257.21807861328125, + "logps/rejected": -498.31658935546875, + "loss": 0.321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7899978756904602, + "rewards/margins": 2.4482970237731934, + "rewards/rejected": -1.658299207687378, + "step": 395 + }, + { + "epoch": 0.5, + "learning_rate": 5.175414308977355e-08, + "logits/chosen": -3.2010507583618164, + "logits/rejected": -3.091165542602539, + "logps/chosen": -242.90798950195312, + "logps/rejected": -745.718994140625, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7977310419082642, + "rewards/margins": 3.5055253505706787, + "rewards/rejected": -2.707794189453125, + "step": 396 + }, + { + "epoch": 0.51, + "learning_rate": 5.15478436619129e-08, + "logits/chosen": -3.255770444869995, + "logits/rejected": -3.1483025550842285, + "logps/chosen": -246.0277557373047, + "logps/rejected": -1175.147216796875, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.776252031326294, + "rewards/margins": 5.369507789611816, + "rewards/rejected": -4.593255996704102, + "step": 397 + }, + { + "epoch": 0.51, + "learning_rate": 5.134151785513847e-08, + "logits/chosen": -3.1897506713867188, + "logits/rejected": -3.1428799629211426, + "logps/chosen": -275.273681640625, + "logps/rejected": -539.9623413085938, + "loss": 0.3484, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6734329462051392, + "rewards/margins": 2.6617982387542725, + "rewards/rejected": -1.9883651733398438, + "step": 398 + }, + { + "epoch": 0.51, + "learning_rate": 5.113516918572961e-08, + "logits/chosen": -3.2071328163146973, + "logits/rejected": -3.206360101699829, + "logps/chosen": -258.28369140625, + "logps/rejected": -705.7774658203125, + "loss": 0.3163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7718216180801392, + "rewards/margins": 3.6282362937927246, + "rewards/rejected": -2.856414794921875, + "step": 399 + }, + { + "epoch": 0.51, + "learning_rate": 5.0928801170355265e-08, + "logits/chosen": -3.236013889312744, + "logits/rejected": -3.0804362297058105, + "logps/chosen": -254.41842651367188, + "logps/rejected": -663.1736450195312, + "loss": 0.2899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6906532049179077, + "rewards/margins": 3.3262581825256348, + "rewards/rejected": -2.6356048583984375, + "step": 400 + }, + { + "epoch": 0.51, + "learning_rate": 5.072241732601409e-08, + "logits/chosen": -3.1696581840515137, + "logits/rejected": -3.0998146533966064, + "logps/chosen": -260.0411376953125, + "logps/rejected": -443.4596862792969, + "loss": 0.3912, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8386048078536987, + "rewards/margins": 2.197932481765747, + "rewards/rejected": -1.359327793121338, + "step": 401 + }, + { + "epoch": 0.51, + "learning_rate": 5.051602116997449e-08, + "logits/chosen": -3.2562060356140137, + "logits/rejected": -3.1439685821533203, + "logps/chosen": -222.93826293945312, + "logps/rejected": -369.662353515625, + "loss": 0.3221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7202819585800171, + "rewards/margins": 2.154257297515869, + "rewards/rejected": -1.4339752197265625, + "step": 402 + }, + { + "epoch": 0.51, + "learning_rate": 5.0309616219714724e-08, + "logits/chosen": -3.149498462677002, + "logits/rejected": -3.067265510559082, + "logps/chosen": -302.4503173828125, + "logps/rejected": -357.09832763671875, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8170211911201477, + "rewards/margins": 2.0273513793945312, + "rewards/rejected": -1.2103302478790283, + "step": 403 + }, + { + "epoch": 0.51, + "learning_rate": 5.010320599286291e-08, + "logits/chosen": -3.156874656677246, + "logits/rejected": -3.024984836578369, + "logps/chosen": -260.42681884765625, + "logps/rejected": -1526.234619140625, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8651329278945923, + "rewards/margins": 6.129271507263184, + "rewards/rejected": -5.264138698577881, + "step": 404 + }, + { + "epoch": 0.52, + "learning_rate": 4.9896794007137094e-08, + "logits/chosen": -3.2544901371002197, + "logits/rejected": -3.1190185546875, + "logps/chosen": -271.03656005859375, + "logps/rejected": -566.3451538085938, + "loss": 0.3512, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7142746448516846, + "rewards/margins": 2.8720459938049316, + "rewards/rejected": -2.157771348953247, + "step": 405 + }, + { + "epoch": 0.52, + "learning_rate": 4.9690383780285265e-08, + "logits/chosen": -3.241532802581787, + "logits/rejected": -3.1575822830200195, + "logps/chosen": -236.1625213623047, + "logps/rejected": -725.033935546875, + "loss": 0.3169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7644218802452087, + "rewards/margins": 3.72799015045166, + "rewards/rejected": -2.9635682106018066, + "step": 406 + }, + { + "epoch": 0.52, + "learning_rate": 4.948397883002551e-08, + "logits/chosen": -3.231106758117676, + "logits/rejected": -3.098782539367676, + "logps/chosen": -279.0130310058594, + "logps/rejected": -1108.706298828125, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9125076532363892, + "rewards/margins": 5.1647725105285645, + "rewards/rejected": -4.252264499664307, + "step": 407 + }, + { + "epoch": 0.52, + "learning_rate": 4.927758267398593e-08, + "logits/chosen": -3.26763916015625, + "logits/rejected": -3.1436290740966797, + "logps/chosen": -238.6458740234375, + "logps/rejected": -639.398681640625, + "loss": 0.3219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4984115958213806, + "rewards/margins": 2.886949300765991, + "rewards/rejected": -2.388537645339966, + "step": 408 + }, + { + "epoch": 0.52, + "learning_rate": 4.907119882964474e-08, + "logits/chosen": -3.22021222114563, + "logits/rejected": -3.1465392112731934, + "logps/chosen": -267.3582458496094, + "logps/rejected": -504.51800537109375, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6808547973632812, + "rewards/margins": 2.5639634132385254, + "rewards/rejected": -1.8831086158752441, + "step": 409 + }, + { + "epoch": 0.52, + "learning_rate": 4.8864830814270394e-08, + "logits/chosen": -3.217008113861084, + "logits/rejected": -3.0582337379455566, + "logps/chosen": -268.91680908203125, + "logps/rejected": -492.9132080078125, + "loss": 0.3502, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7027176022529602, + "rewards/margins": 2.380497932434082, + "rewards/rejected": -1.6777801513671875, + "step": 410 + }, + { + "epoch": 0.52, + "learning_rate": 4.865848214486151e-08, + "logits/chosen": -3.233123302459717, + "logits/rejected": -3.130079746246338, + "logps/chosen": -251.6842498779297, + "logps/rejected": -452.0837707519531, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7064621448516846, + "rewards/margins": 2.4076128005981445, + "rewards/rejected": -1.70115065574646, + "step": 411 + }, + { + "epoch": 0.52, + "learning_rate": 4.84521563380871e-08, + "logits/chosen": -3.2502870559692383, + "logits/rejected": -3.008194923400879, + "logps/chosen": -257.29620361328125, + "logps/rejected": -454.1904602050781, + "loss": 0.368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7653763294219971, + "rewards/margins": 2.2921252250671387, + "rewards/rejected": -1.5267486572265625, + "step": 412 + }, + { + "epoch": 0.53, + "learning_rate": 4.8245856910226463e-08, + "logits/chosen": -3.2547073364257812, + "logits/rejected": -3.11653470993042, + "logps/chosen": -252.1581573486328, + "logps/rejected": -832.8375244140625, + "loss": 0.3101, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5514183044433594, + "rewards/margins": 3.8920679092407227, + "rewards/rejected": -3.3406496047973633, + "step": 413 + }, + { + "epoch": 0.53, + "learning_rate": 4.8039587377109334e-08, + "logits/chosen": -3.2677066326141357, + "logits/rejected": -3.1921021938323975, + "logps/chosen": -257.638671875, + "logps/rejected": -691.106689453125, + "loss": 0.3056, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7543106079101562, + "rewards/margins": 3.313746690750122, + "rewards/rejected": -2.559436082839966, + "step": 414 + }, + { + "epoch": 0.53, + "learning_rate": 4.7833351254056056e-08, + "logits/chosen": -3.2185091972351074, + "logits/rejected": -3.0801966190338135, + "logps/chosen": -294.447509765625, + "logps/rejected": -1558.754638671875, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4214416444301605, + "rewards/margins": 5.62103271484375, + "rewards/rejected": -5.199591159820557, + "step": 415 + }, + { + "epoch": 0.53, + "learning_rate": 4.7627152055817495e-08, + "logits/chosen": -3.215862274169922, + "logits/rejected": -3.114138603210449, + "logps/chosen": -261.256103515625, + "logps/rejected": -639.8779296875, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7509682178497314, + "rewards/margins": 3.3709144592285156, + "rewards/rejected": -2.6199464797973633, + "step": 416 + }, + { + "epoch": 0.53, + "learning_rate": 4.7420993296515284e-08, + "logits/chosen": -3.236647605895996, + "logits/rejected": -3.0516481399536133, + "logps/chosen": -275.08673095703125, + "logps/rejected": -869.8739013671875, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8491623401641846, + "rewards/margins": 4.049003601074219, + "rewards/rejected": -3.1998414993286133, + "step": 417 + }, + { + "epoch": 0.53, + "learning_rate": 4.721487848958186e-08, + "logits/chosen": -3.312636375427246, + "logits/rejected": -3.1503701210021973, + "logps/chosen": -233.27833557128906, + "logps/rejected": -958.4671020507812, + "loss": 0.2857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6504700183868408, + "rewards/margins": 4.314703464508057, + "rewards/rejected": -3.664233446121216, + "step": 418 + }, + { + "epoch": 0.53, + "learning_rate": 4.700881114770057e-08, + "logits/chosen": -3.293053150177002, + "logits/rejected": -3.1455564498901367, + "logps/chosen": -265.2518005371094, + "logps/rejected": -645.950927734375, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9064087271690369, + "rewards/margins": 3.705761671066284, + "rewards/rejected": -2.7993531227111816, + "step": 419 + }, + { + "epoch": 0.54, + "learning_rate": 4.680279478274596e-08, + "logits/chosen": -3.1567182540893555, + "logits/rejected": -2.995060443878174, + "logps/chosen": -256.4830627441406, + "logps/rejected": -445.5044250488281, + "loss": 0.3322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6799560785293579, + "rewards/margins": 2.123734951019287, + "rewards/rejected": -1.4437789916992188, + "step": 420 + }, + { + "epoch": 0.54, + "learning_rate": 4.659683290572367e-08, + "logits/chosen": -3.1392369270324707, + "logits/rejected": -2.97550106048584, + "logps/chosen": -297.78985595703125, + "logps/rejected": -1060.4952392578125, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8191101551055908, + "rewards/margins": 4.026003837585449, + "rewards/rejected": -3.2068939208984375, + "step": 421 + }, + { + "epoch": 0.54, + "learning_rate": 4.639092902671085e-08, + "logits/chosen": -3.274202823638916, + "logits/rejected": -3.152200222015381, + "logps/chosen": -255.54324340820312, + "logps/rejected": -904.5203857421875, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7002342343330383, + "rewards/margins": 3.5721182823181152, + "rewards/rejected": -2.871884346008301, + "step": 422 + }, + { + "epoch": 0.54, + "learning_rate": 4.618508665479616e-08, + "logits/chosen": -3.2096214294433594, + "logits/rejected": -3.1521224975585938, + "logps/chosen": -284.352783203125, + "logps/rejected": -692.918212890625, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7269836664199829, + "rewards/margins": 2.95989990234375, + "rewards/rejected": -2.2329163551330566, + "step": 423 + }, + { + "epoch": 0.54, + "learning_rate": 4.597930929802003e-08, + "logits/chosen": -3.280634880065918, + "logits/rejected": -3.0475144386291504, + "logps/chosen": -263.27630615234375, + "logps/rejected": -2005.958740234375, + "loss": 0.2912, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6994659304618835, + "rewards/margins": 7.7316436767578125, + "rewards/rejected": -7.032177925109863, + "step": 424 + }, + { + "epoch": 0.54, + "learning_rate": 4.577360046331493e-08, + "logits/chosen": -3.1683006286621094, + "logits/rejected": -3.0536367893218994, + "logps/chosen": -272.25341796875, + "logps/rejected": -661.1557006835938, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5634201169013977, + "rewards/margins": 2.9333512783050537, + "rewards/rejected": -2.369931221008301, + "step": 425 + }, + { + "epoch": 0.54, + "learning_rate": 4.5567963656445504e-08, + "logits/chosen": -3.210272789001465, + "logits/rejected": -3.1100687980651855, + "logps/chosen": -264.982421875, + "logps/rejected": -633.688720703125, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7728210687637329, + "rewards/margins": 3.219569444656372, + "rewards/rejected": -2.4467482566833496, + "step": 426 + }, + { + "epoch": 0.54, + "learning_rate": 4.53624023819489e-08, + "logits/chosen": -3.1477718353271484, + "logits/rejected": -3.125134229660034, + "logps/chosen": -275.56689453125, + "logps/rejected": -574.2071533203125, + "loss": 0.3507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7692611813545227, + "rewards/margins": 2.9193544387817383, + "rewards/rejected": -2.1500930786132812, + "step": 427 + }, + { + "epoch": 0.55, + "learning_rate": 4.5156920143075015e-08, + "logits/chosen": -3.226088523864746, + "logits/rejected": -3.095350503921509, + "logps/chosen": -245.20668029785156, + "logps/rejected": -741.0020751953125, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8758209347724915, + "rewards/margins": 3.979051113128662, + "rewards/rejected": -3.1032302379608154, + "step": 428 + }, + { + "epoch": 0.55, + "learning_rate": 4.4951520441726746e-08, + "logits/chosen": -3.2358336448669434, + "logits/rejected": -3.097156047821045, + "logps/chosen": -256.82720947265625, + "logps/rejected": -865.0157470703125, + "loss": 0.3092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6265015006065369, + "rewards/margins": 4.475283622741699, + "rewards/rejected": -3.848782539367676, + "step": 429 + }, + { + "epoch": 0.55, + "learning_rate": 4.4746206778400444e-08, + "logits/chosen": -3.220309019088745, + "logits/rejected": -3.205789566040039, + "logps/chosen": -254.6621856689453, + "logps/rejected": -1141.3603515625, + "loss": 0.3401, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7488784790039062, + "rewards/margins": 5.341789245605469, + "rewards/rejected": -4.5929107666015625, + "step": 430 + }, + { + "epoch": 0.55, + "learning_rate": 4.4540982652126056e-08, + "logits/chosen": -3.2492518424987793, + "logits/rejected": -3.0766592025756836, + "logps/chosen": -242.11892700195312, + "logps/rejected": -437.314453125, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7296135425567627, + "rewards/margins": 2.155165910720825, + "rewards/rejected": -1.4255523681640625, + "step": 431 + }, + { + "epoch": 0.55, + "learning_rate": 4.433585156040773e-08, + "logits/chosen": -3.257629871368408, + "logits/rejected": -3.1028409004211426, + "logps/chosen": -282.05389404296875, + "logps/rejected": -471.02362060546875, + "loss": 0.3854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.959797739982605, + "rewards/margins": 2.5506210327148438, + "rewards/rejected": -1.5908234119415283, + "step": 432 + }, + { + "epoch": 0.55, + "learning_rate": 4.413081699916399e-08, + "logits/chosen": -3.274913787841797, + "logits/rejected": -3.125204563140869, + "logps/chosen": -254.23617553710938, + "logps/rejected": -623.4874267578125, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7375198602676392, + "rewards/margins": 3.161829948425293, + "rewards/rejected": -2.4243102073669434, + "step": 433 + }, + { + "epoch": 0.55, + "learning_rate": 4.3925882462668277e-08, + "logits/chosen": -3.2254083156585693, + "logits/rejected": -3.1390323638916016, + "logps/chosen": -262.3830261230469, + "logps/rejected": -663.16162109375, + "loss": 0.3403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7673790454864502, + "rewards/margins": 3.4072227478027344, + "rewards/rejected": -2.639843702316284, + "step": 434 + }, + { + "epoch": 0.55, + "learning_rate": 4.37210514434894e-08, + "logits/chosen": -3.139754295349121, + "logits/rejected": -3.1330461502075195, + "logps/chosen": -286.7021179199219, + "logps/rejected": -945.4827880859375, + "loss": 0.2963, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8562820553779602, + "rewards/margins": 4.424989700317383, + "rewards/rejected": -3.5687074661254883, + "step": 435 + }, + { + "epoch": 0.56, + "learning_rate": 4.351632743243195e-08, + "logits/chosen": -3.22074556350708, + "logits/rejected": -3.062889575958252, + "logps/chosen": -286.273681640625, + "logps/rejected": -389.766357421875, + "loss": 0.323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9421920776367188, + "rewards/margins": 2.6304612159729004, + "rewards/rejected": -1.688269019126892, + "step": 436 + }, + { + "epoch": 0.56, + "learning_rate": 4.33117139184769e-08, + "logits/chosen": -3.2342753410339355, + "logits/rejected": -3.1934847831726074, + "logps/chosen": -248.9696807861328, + "logps/rejected": -576.1734619140625, + "loss": 0.3637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6628974676132202, + "rewards/margins": 2.81453800201416, + "rewards/rejected": -2.1516404151916504, + "step": 437 + }, + { + "epoch": 0.56, + "learning_rate": 4.310721438872204e-08, + "logits/chosen": -3.2710256576538086, + "logits/rejected": -3.0273520946502686, + "logps/chosen": -277.072509765625, + "logps/rejected": -477.3283386230469, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8257690668106079, + "rewards/margins": 2.4145524501800537, + "rewards/rejected": -1.5887833833694458, + "step": 438 + }, + { + "epoch": 0.56, + "learning_rate": 4.29028323283226e-08, + "logits/chosen": -3.1720027923583984, + "logits/rejected": -3.1157515048980713, + "logps/chosen": -289.69610595703125, + "logps/rejected": -892.348388671875, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7825378179550171, + "rewards/margins": 4.480771064758301, + "rewards/rejected": -3.698233127593994, + "step": 439 + }, + { + "epoch": 0.56, + "learning_rate": 4.2698571220431903e-08, + "logits/chosen": -3.2750625610351562, + "logits/rejected": -3.196908473968506, + "logps/chosen": -238.26687622070312, + "logps/rejected": -758.5115356445312, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7350075244903564, + "rewards/margins": 3.9528968334198, + "rewards/rejected": -3.2178893089294434, + "step": 440 + }, + { + "epoch": 0.56, + "learning_rate": 4.24944345461419e-08, + "logits/chosen": -3.2144346237182617, + "logits/rejected": -2.9733517169952393, + "logps/chosen": -280.8861389160156, + "logps/rejected": -646.8771362304688, + "loss": 0.3351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7357116937637329, + "rewards/margins": 2.650421142578125, + "rewards/rejected": -1.914709448814392, + "step": 441 + }, + { + "epoch": 0.56, + "learning_rate": 4.2290425784423954e-08, + "logits/chosen": -3.2120141983032227, + "logits/rejected": -3.1263837814331055, + "logps/chosen": -246.92703247070312, + "logps/rejected": -468.8056640625, + "loss": 0.3381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8802955746650696, + "rewards/margins": 2.617464542388916, + "rewards/rejected": -1.7371689081192017, + "step": 442 + }, + { + "epoch": 0.56, + "learning_rate": 4.2086548412069445e-08, + "logits/chosen": -3.211400270462036, + "logits/rejected": -3.065523624420166, + "logps/chosen": -248.4954833984375, + "logps/rejected": -344.08056640625, + "loss": 0.3445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7457466125488281, + "rewards/margins": 2.078986406326294, + "rewards/rejected": -1.3332397937774658, + "step": 443 + }, + { + "epoch": 0.57, + "learning_rate": 4.188280590363057e-08, + "logits/chosen": -3.1801741123199463, + "logits/rejected": -3.0739104747772217, + "logps/chosen": -241.9627227783203, + "logps/rejected": -294.8995056152344, + "loss": 0.3701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9280350208282471, + "rewards/margins": 1.9230018854141235, + "rewards/rejected": -0.9949669241905212, + "step": 444 + }, + { + "epoch": 0.57, + "learning_rate": 4.167920173136119e-08, + "logits/chosen": -3.1702141761779785, + "logits/rejected": -3.115291118621826, + "logps/chosen": -272.5542907714844, + "logps/rejected": -720.2747802734375, + "loss": 0.3487, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5492828488349915, + "rewards/margins": 3.314236640930176, + "rewards/rejected": -2.76495361328125, + "step": 445 + }, + { + "epoch": 0.57, + "learning_rate": 4.1475739365157505e-08, + "logits/chosen": -3.2151782512664795, + "logits/rejected": -3.135854482650757, + "logps/chosen": -254.84812927246094, + "logps/rejected": -360.6507568359375, + "loss": 0.3298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.657177746295929, + "rewards/margins": 1.8936127424240112, + "rewards/rejected": -1.2364349365234375, + "step": 446 + }, + { + "epoch": 0.57, + "learning_rate": 4.127242227249908e-08, + "logits/chosen": -3.1688923835754395, + "logits/rejected": -3.113621711730957, + "logps/chosen": -253.81573486328125, + "logps/rejected": -525.0240478515625, + "loss": 0.29, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8051506280899048, + "rewards/margins": 2.765270233154297, + "rewards/rejected": -1.9601197242736816, + "step": 447 + }, + { + "epoch": 0.57, + "learning_rate": 4.106925391838962e-08, + "logits/chosen": -3.2263665199279785, + "logits/rejected": -3.203400135040283, + "logps/chosen": -286.47283935546875, + "logps/rejected": -471.3277587890625, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8043320178985596, + "rewards/margins": 2.466796875, + "rewards/rejected": -1.6624648571014404, + "step": 448 + }, + { + "epoch": 0.57, + "learning_rate": 4.0866237765298e-08, + "logits/chosen": -3.2892932891845703, + "logits/rejected": -3.0294077396392822, + "logps/chosen": -255.1900634765625, + "logps/rejected": -332.8899841308594, + "loss": 0.3384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7728058099746704, + "rewards/margins": 1.8623932600021362, + "rewards/rejected": -1.0895874500274658, + "step": 449 + }, + { + "epoch": 0.57, + "learning_rate": 4.066337727309923e-08, + "logits/chosen": -3.2458739280700684, + "logits/rejected": -3.115586757659912, + "logps/chosen": -264.8463134765625, + "logps/rejected": -396.87799072265625, + "loss": 0.3394, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0382087230682373, + "rewards/margins": 2.667628288269043, + "rewards/rejected": -1.6294196844100952, + "step": 450 + }, + { + "epoch": 0.57, + "learning_rate": 4.046067589901545e-08, + "logits/chosen": -3.2097527980804443, + "logits/rejected": -3.117243766784668, + "logps/chosen": -254.91595458984375, + "logps/rejected": -880.145263671875, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.855139970779419, + "rewards/margins": 4.889057159423828, + "rewards/rejected": -4.033917427062988, + "step": 451 + }, + { + "epoch": 0.58, + "learning_rate": 4.0258137097557135e-08, + "logits/chosen": -3.2721166610717773, + "logits/rejected": -3.140284538269043, + "logps/chosen": -248.3983154296875, + "logps/rejected": -633.2344970703125, + "loss": 0.3127, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7294731140136719, + "rewards/margins": 3.5664114952087402, + "rewards/rejected": -2.8369383811950684, + "step": 452 + }, + { + "epoch": 0.58, + "learning_rate": 4.005576432046405e-08, + "logits/chosen": -3.204892635345459, + "logits/rejected": -3.1104896068573, + "logps/chosen": -241.9120635986328, + "logps/rejected": -924.699951171875, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7159828543663025, + "rewards/margins": 4.210933685302734, + "rewards/rejected": -3.494951009750366, + "step": 453 + }, + { + "epoch": 0.58, + "learning_rate": 3.9853561016646596e-08, + "logits/chosen": -3.213212490081787, + "logits/rejected": -3.189880609512329, + "logps/chosen": -249.93988037109375, + "logps/rejected": -1208.1600341796875, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8222610950469971, + "rewards/margins": 5.935652732849121, + "rewards/rejected": -5.113391399383545, + "step": 454 + }, + { + "epoch": 0.58, + "learning_rate": 3.965153063212688e-08, + "logits/chosen": -3.217996597290039, + "logits/rejected": -3.110361099243164, + "logps/chosen": -274.15093994140625, + "logps/rejected": -677.416748046875, + "loss": 0.3398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7303085327148438, + "rewards/margins": 3.420546054840088, + "rewards/rejected": -2.690237522125244, + "step": 455 + }, + { + "epoch": 0.58, + "learning_rate": 3.9449676609980065e-08, + "logits/chosen": -3.152060031890869, + "logits/rejected": -3.0830495357513428, + "logps/chosen": -255.25396728515625, + "logps/rejected": -1148.9443359375, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7629066705703735, + "rewards/margins": 4.238038063049316, + "rewards/rejected": -3.475131034851074, + "step": 456 + }, + { + "epoch": 0.58, + "learning_rate": 3.924800239027572e-08, + "logits/chosen": -3.239511251449585, + "logits/rejected": -3.07348370552063, + "logps/chosen": -251.2755584716797, + "logps/rejected": -1103.8614501953125, + "loss": 0.3345, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8068130612373352, + "rewards/margins": 4.420423984527588, + "rewards/rejected": -3.6136109828948975, + "step": 457 + }, + { + "epoch": 0.58, + "learning_rate": 3.9046511410019115e-08, + "logits/chosen": -3.229635715484619, + "logits/rejected": -3.105039119720459, + "logps/chosen": -283.7078857421875, + "logps/rejected": -449.3948974609375, + "loss": 0.3702, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8384720087051392, + "rewards/margins": 2.4279892444610596, + "rewards/rejected": -1.5895172357559204, + "step": 458 + }, + { + "epoch": 0.58, + "learning_rate": 3.884520710309272e-08, + "logits/chosen": -3.2254223823547363, + "logits/rejected": -3.140604019165039, + "logps/chosen": -247.1359100341797, + "logps/rejected": -612.2134399414062, + "loss": 0.3319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6746323108673096, + "rewards/margins": 3.0799484252929688, + "rewards/rejected": -2.405316114425659, + "step": 459 + }, + { + "epoch": 0.59, + "learning_rate": 3.8644092900197647e-08, + "logits/chosen": -3.2639858722686768, + "logits/rejected": -3.146620273590088, + "logps/chosen": -247.98728942871094, + "logps/rejected": -406.8006591796875, + "loss": 0.3338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.64827960729599, + "rewards/margins": 2.274416446685791, + "rewards/rejected": -1.6261367797851562, + "step": 460 + }, + { + "epoch": 0.59, + "learning_rate": 3.844317222879513e-08, + "logits/chosen": -3.214365005493164, + "logits/rejected": -3.0079970359802246, + "logps/chosen": -256.6406555175781, + "logps/rejected": -925.798095703125, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7989891171455383, + "rewards/margins": 3.7220511436462402, + "rewards/rejected": -2.923062324523926, + "step": 461 + }, + { + "epoch": 0.59, + "learning_rate": 3.824244851304826e-08, + "logits/chosen": -3.2224223613739014, + "logits/rejected": -3.0680484771728516, + "logps/chosen": -284.6484375, + "logps/rejected": -1235.8343505859375, + "loss": 0.3139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7946319580078125, + "rewards/margins": 6.023016929626465, + "rewards/rejected": -5.228384494781494, + "step": 462 + }, + { + "epoch": 0.59, + "learning_rate": 3.804192517376347e-08, + "logits/chosen": -3.252808094024658, + "logits/rejected": -3.0686187744140625, + "logps/chosen": -258.9877014160156, + "logps/rejected": -1108.4444580078125, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5482193231582642, + "rewards/margins": 4.5050859451293945, + "rewards/rejected": -3.956866502761841, + "step": 463 + }, + { + "epoch": 0.59, + "learning_rate": 3.784160562833235e-08, + "logits/chosen": -3.2150142192840576, + "logits/rejected": -3.0764951705932617, + "logps/chosen": -272.24224853515625, + "logps/rejected": -1257.102294921875, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6652023792266846, + "rewards/margins": 5.5199875831604, + "rewards/rejected": -4.854785442352295, + "step": 464 + }, + { + "epoch": 0.59, + "learning_rate": 3.7641493290673287e-08, + "logits/chosen": -3.2231364250183105, + "logits/rejected": -3.00732159614563, + "logps/chosen": -275.6988525390625, + "logps/rejected": -807.453125, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6549347043037415, + "rewards/margins": 3.5144882202148438, + "rewards/rejected": -2.859553575515747, + "step": 465 + }, + { + "epoch": 0.59, + "learning_rate": 3.7441591571173445e-08, + "logits/chosen": -3.2368674278259277, + "logits/rejected": -2.9862375259399414, + "logps/chosen": -247.21136474609375, + "logps/rejected": -4745.12353515625, + "loss": 0.2887, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7504600286483765, + "rewards/margins": 7.689327239990234, + "rewards/rejected": -6.938867568969727, + "step": 466 + }, + { + "epoch": 0.59, + "learning_rate": 3.724190387663053e-08, + "logits/chosen": -3.252859115600586, + "logits/rejected": -3.164618968963623, + "logps/chosen": -270.9765625, + "logps/rejected": -761.14501953125, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9111358523368835, + "rewards/margins": 4.2449951171875, + "rewards/rejected": -3.333859443664551, + "step": 467 + }, + { + "epoch": 0.6, + "learning_rate": 3.704243361019471e-08, + "logits/chosen": -3.180811643600464, + "logits/rejected": -3.154475212097168, + "logps/chosen": -275.91705322265625, + "logps/rejected": -500.38970947265625, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7642303705215454, + "rewards/margins": 2.667468309402466, + "rewards/rejected": -1.9032379388809204, + "step": 468 + }, + { + "epoch": 0.6, + "learning_rate": 3.684318417131074e-08, + "logits/chosen": -3.256410598754883, + "logits/rejected": -3.1338541507720947, + "logps/chosen": -239.05010986328125, + "logps/rejected": -840.0252685546875, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.728339433670044, + "rewards/margins": 4.440552711486816, + "rewards/rejected": -3.7122130393981934, + "step": 469 + }, + { + "epoch": 0.6, + "learning_rate": 3.66441589556599e-08, + "logits/chosen": -3.184053897857666, + "logits/rejected": -3.135021924972534, + "logps/chosen": -247.36012268066406, + "logps/rejected": -464.69342041015625, + "loss": 0.3094, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7536255121231079, + "rewards/margins": 2.390097141265869, + "rewards/rejected": -1.6364716291427612, + "step": 470 + }, + { + "epoch": 0.6, + "learning_rate": 3.6445361355102164e-08, + "logits/chosen": -3.2196521759033203, + "logits/rejected": -3.1006360054016113, + "logps/chosen": -282.3588562011719, + "logps/rejected": -662.939208984375, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6962143182754517, + "rewards/margins": 4.010534763336182, + "rewards/rejected": -3.3143203258514404, + "step": 471 + }, + { + "epoch": 0.6, + "learning_rate": 3.624679475761846e-08, + "logits/chosen": -3.161710500717163, + "logits/rejected": -3.09252667427063, + "logps/chosen": -254.25823974609375, + "logps/rejected": -775.3280029296875, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7693687677383423, + "rewards/margins": 3.830071449279785, + "rewards/rejected": -3.0607025623321533, + "step": 472 + }, + { + "epoch": 0.6, + "learning_rate": 3.604846254725285e-08, + "logits/chosen": -3.1715774536132812, + "logits/rejected": -3.035703182220459, + "logps/chosen": -297.9869384765625, + "logps/rejected": -354.0225830078125, + "loss": 0.3531, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.653094470500946, + "rewards/margins": 1.9293251037597656, + "rewards/rejected": -1.2762306928634644, + "step": 473 + }, + { + "epoch": 0.6, + "learning_rate": 3.585036810405487e-08, + "logits/chosen": -3.2303335666656494, + "logits/rejected": -3.064081907272339, + "logps/chosen": -274.63116455078125, + "logps/rejected": -439.9415283203125, + "loss": 0.3694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5611664056777954, + "rewards/margins": 1.8110932111740112, + "rewards/rejected": -1.2499268054962158, + "step": 474 + }, + { + "epoch": 0.61, + "learning_rate": 3.565251480402198e-08, + "logits/chosen": -3.2782511711120605, + "logits/rejected": -3.143883228302002, + "logps/chosen": -252.266357421875, + "logps/rejected": -550.91064453125, + "loss": 0.3278, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8859275579452515, + "rewards/margins": 3.2019569873809814, + "rewards/rejected": -2.3160293102264404, + "step": 475 + }, + { + "epoch": 0.61, + "learning_rate": 3.545490601904193e-08, + "logits/chosen": -3.206587314605713, + "logits/rejected": -3.1075429916381836, + "logps/chosen": -270.71734619140625, + "logps/rejected": -727.2401733398438, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7366348505020142, + "rewards/margins": 3.460536241531372, + "rewards/rejected": -2.7239012718200684, + "step": 476 + }, + { + "epoch": 0.61, + "learning_rate": 3.525754511683543e-08, + "logits/chosen": -3.2633447647094727, + "logits/rejected": -3.1128735542297363, + "logps/chosen": -261.84893798828125, + "logps/rejected": -532.2954711914062, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.802873969078064, + "rewards/margins": 3.1262459754943848, + "rewards/rejected": -2.3233718872070312, + "step": 477 + }, + { + "epoch": 0.61, + "learning_rate": 3.506043546089862e-08, + "logits/chosen": -3.207989454269409, + "logits/rejected": -3.0457823276519775, + "logps/chosen": -261.05755615234375, + "logps/rejected": -889.8092041015625, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9190170764923096, + "rewards/margins": 4.819371223449707, + "rewards/rejected": -3.9003539085388184, + "step": 478 + }, + { + "epoch": 0.61, + "learning_rate": 3.486358041044586e-08, + "logits/chosen": -3.255971908569336, + "logits/rejected": -3.10848331451416, + "logps/chosen": -238.39352416992188, + "logps/rejected": -631.5830078125, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7125694751739502, + "rewards/margins": 3.0772299766540527, + "rewards/rejected": -2.3646607398986816, + "step": 479 + }, + { + "epoch": 0.61, + "learning_rate": 3.4666983320352393e-08, + "logits/chosen": -3.200803518295288, + "logits/rejected": -3.0654897689819336, + "logps/chosen": -280.8067626953125, + "logps/rejected": -369.34698486328125, + "loss": 0.3491, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7732384204864502, + "rewards/margins": 1.8686425685882568, + "rewards/rejected": -1.0954041481018066, + "step": 480 + }, + { + "epoch": 0.61, + "learning_rate": 3.4470647541097215e-08, + "logits/chosen": -3.2215185165405273, + "logits/rejected": -3.1384191513061523, + "logps/chosen": -285.89642333984375, + "logps/rejected": -612.609375, + "loss": 0.3224, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7379897832870483, + "rewards/margins": 3.530390977859497, + "rewards/rejected": -2.7924013137817383, + "step": 481 + }, + { + "epoch": 0.61, + "learning_rate": 3.4274576418705986e-08, + "logits/chosen": -3.270493984222412, + "logits/rejected": -3.1510744094848633, + "logps/chosen": -283.35992431640625, + "logps/rejected": -822.7593994140625, + "loss": 0.3022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8759247064590454, + "rewards/margins": 4.238623142242432, + "rewards/rejected": -3.362698554992676, + "step": 482 + }, + { + "epoch": 0.62, + "learning_rate": 3.4078773294693947e-08, + "logits/chosen": -3.274292469024658, + "logits/rejected": -2.8801960945129395, + "logps/chosen": -269.8462829589844, + "logps/rejected": -2298.8369140625, + "loss": 0.309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7582015991210938, + "rewards/margins": 7.7548017501831055, + "rewards/rejected": -6.996600151062012, + "step": 483 + }, + { + "epoch": 0.62, + "learning_rate": 3.388324150600908e-08, + "logits/chosen": -3.1952590942382812, + "logits/rejected": -3.036379814147949, + "logps/chosen": -279.311767578125, + "logps/rejected": -721.1558837890625, + "loss": 0.3582, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0257004499435425, + "rewards/margins": 3.693333625793457, + "rewards/rejected": -2.667633056640625, + "step": 484 + }, + { + "epoch": 0.62, + "learning_rate": 3.368798438497512e-08, + "logits/chosen": -3.1402931213378906, + "logits/rejected": -3.116745948791504, + "logps/chosen": -245.31687927246094, + "logps/rejected": -601.505615234375, + "loss": 0.3107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8037117123603821, + "rewards/margins": 3.337449073791504, + "rewards/rejected": -2.5337371826171875, + "step": 485 + }, + { + "epoch": 0.62, + "learning_rate": 3.34930052592348e-08, + "logits/chosen": -3.2084667682647705, + "logits/rejected": -3.0961546897888184, + "logps/chosen": -262.16241455078125, + "logps/rejected": -666.6253051757812, + "loss": 0.3449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.868298351764679, + "rewards/margins": 3.0946242809295654, + "rewards/rejected": -2.2263259887695312, + "step": 486 + }, + { + "epoch": 0.62, + "learning_rate": 3.3298307451693236e-08, + "logits/chosen": -3.2648026943206787, + "logits/rejected": -3.114597797393799, + "logps/chosen": -286.84771728515625, + "logps/rejected": -326.1781005859375, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8594772815704346, + "rewards/margins": 1.8321945667266846, + "rewards/rejected": -0.97271728515625, + "step": 487 + }, + { + "epoch": 0.62, + "learning_rate": 3.310389428046113e-08, + "logits/chosen": -3.194362163543701, + "logits/rejected": -3.0448241233825684, + "logps/chosen": -230.78480529785156, + "logps/rejected": -514.4073486328125, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8172683715820312, + "rewards/margins": 2.6833364963531494, + "rewards/rejected": -1.8660683631896973, + "step": 488 + }, + { + "epoch": 0.62, + "learning_rate": 3.2909769058798364e-08, + "logits/chosen": -3.142951011657715, + "logits/rejected": -3.079570770263672, + "logps/chosen": -274.6739196777344, + "logps/rejected": -404.46942138671875, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9009125232696533, + "rewards/margins": 2.403080701828003, + "rewards/rejected": -1.5021682977676392, + "step": 489 + }, + { + "epoch": 0.62, + "learning_rate": 3.2715935095057454e-08, + "logits/chosen": -3.210923194885254, + "logits/rejected": -3.213418960571289, + "logps/chosen": -298.34600830078125, + "logps/rejected": -661.408447265625, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8660309314727783, + "rewards/margins": 3.494577169418335, + "rewards/rejected": -2.6285462379455566, + "step": 490 + }, + { + "epoch": 0.63, + "learning_rate": 3.252239569262718e-08, + "logits/chosen": -3.2242884635925293, + "logits/rejected": -3.0457801818847656, + "logps/chosen": -267.992919921875, + "logps/rejected": -323.0784606933594, + "loss": 0.3285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8766357898712158, + "rewards/margins": 2.1871421337127686, + "rewards/rejected": -1.3105064630508423, + "step": 491 + }, + { + "epoch": 0.63, + "learning_rate": 3.232915414987636e-08, + "logits/chosen": -3.188497543334961, + "logits/rejected": -3.121151924133301, + "logps/chosen": -283.8603515625, + "logps/rejected": -682.4782104492188, + "loss": 0.3187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8006019592285156, + "rewards/margins": 3.051851749420166, + "rewards/rejected": -2.2512497901916504, + "step": 492 + }, + { + "epoch": 0.63, + "learning_rate": 3.213621376009749e-08, + "logits/chosen": -3.233733654022217, + "logits/rejected": -3.0764474868774414, + "logps/chosen": -249.69264221191406, + "logps/rejected": -1054.0224609375, + "loss": 0.3134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8622070550918579, + "rewards/margins": 5.351006984710693, + "rewards/rejected": -4.488800048828125, + "step": 493 + }, + { + "epoch": 0.63, + "learning_rate": 3.194357781145081e-08, + "logits/chosen": -3.239600896835327, + "logits/rejected": -3.1548047065734863, + "logps/chosen": -255.37449645996094, + "logps/rejected": -739.8114013671875, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7349075078964233, + "rewards/margins": 4.050550937652588, + "rewards/rejected": -3.315643310546875, + "step": 494 + }, + { + "epoch": 0.63, + "learning_rate": 3.175124958690809e-08, + "logits/chosen": -3.239799737930298, + "logits/rejected": -3.105844497680664, + "logps/chosen": -208.19854736328125, + "logps/rejected": -1314.3463134765625, + "loss": 0.317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5776329040527344, + "rewards/margins": 5.467676639556885, + "rewards/rejected": -4.89004373550415, + "step": 495 + }, + { + "epoch": 0.63, + "learning_rate": 3.155923236419675e-08, + "logits/chosen": -3.213491916656494, + "logits/rejected": -3.103288173675537, + "logps/chosen": -289.65875244140625, + "logps/rejected": -465.9349670410156, + "loss": 0.3388, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7372086048126221, + "rewards/margins": 2.335958957672119, + "rewards/rejected": -1.598750352859497, + "step": 496 + }, + { + "epoch": 0.63, + "learning_rate": 3.136752941574407e-08, + "logits/chosen": -3.2355687618255615, + "logits/rejected": -3.047696113586426, + "logps/chosen": -224.34463500976562, + "logps/rejected": -622.6340942382812, + "loss": 0.2885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7778373956680298, + "rewards/margins": 3.1973838806152344, + "rewards/rejected": -2.419546604156494, + "step": 497 + }, + { + "epoch": 0.63, + "learning_rate": 3.117614400862126e-08, + "logits/chosen": -3.2889273166656494, + "logits/rejected": -3.2019498348236084, + "logps/chosen": -224.47628784179688, + "logps/rejected": -478.3635559082031, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8326209783554077, + "rewards/margins": 2.7144217491149902, + "rewards/rejected": -1.881800889968872, + "step": 498 + }, + { + "epoch": 0.64, + "learning_rate": 3.098507940448799e-08, + "logits/chosen": -3.1792092323303223, + "logits/rejected": -3.0351569652557373, + "logps/chosen": -256.368896484375, + "logps/rejected": -545.9934692382812, + "loss": 0.3744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8767196536064148, + "rewards/margins": 2.8091773986816406, + "rewards/rejected": -1.932457685470581, + "step": 499 + }, + { + "epoch": 0.64, + "learning_rate": 3.0794338859536596e-08, + "logits/chosen": -3.2189831733703613, + "logits/rejected": -3.0707287788391113, + "logps/chosen": -245.17279052734375, + "logps/rejected": -567.603271484375, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7137482166290283, + "rewards/margins": 2.654466152191162, + "rewards/rejected": -1.940718173980713, + "step": 500 + }, + { + "epoch": 0.64, + "learning_rate": 3.0603925624436686e-08, + "logits/chosen": -3.209994316101074, + "logits/rejected": -3.0842199325561523, + "logps/chosen": -257.11932373046875, + "logps/rejected": -467.5304260253906, + "loss": 0.3228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8637558221817017, + "rewards/margins": 2.3768157958984375, + "rewards/rejected": -1.5130600929260254, + "step": 501 + }, + { + "epoch": 0.64, + "learning_rate": 3.0413842944279813e-08, + "logits/chosen": -3.2040622234344482, + "logits/rejected": -3.211160182952881, + "logps/chosen": -267.8187255859375, + "logps/rejected": -750.5059814453125, + "loss": 0.2954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6886497735977173, + "rewards/margins": 3.94665789604187, + "rewards/rejected": -3.2580080032348633, + "step": 502 + }, + { + "epoch": 0.64, + "learning_rate": 3.0224094058524e-08, + "logits/chosen": -3.269195556640625, + "logits/rejected": -3.138434410095215, + "logps/chosen": -234.19146728515625, + "logps/rejected": -491.7572021484375, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8732719421386719, + "rewards/margins": 2.820305824279785, + "rewards/rejected": -1.9470336437225342, + "step": 503 + }, + { + "epoch": 0.64, + "learning_rate": 3.003468220093867e-08, + "logits/chosen": -3.238582134246826, + "logits/rejected": -3.101954460144043, + "logps/chosen": -265.48834228515625, + "logps/rejected": -505.20318603515625, + "loss": 0.3367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9624847173690796, + "rewards/margins": 3.1607728004455566, + "rewards/rejected": -2.1982879638671875, + "step": 504 + }, + { + "epoch": 0.64, + "learning_rate": 2.98456105995495e-08, + "logits/chosen": -3.3112759590148926, + "logits/rejected": -3.1977643966674805, + "logps/chosen": -250.44577026367188, + "logps/rejected": -600.5686645507812, + "loss": 0.3113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8144104480743408, + "rewards/margins": 3.3005661964416504, + "rewards/rejected": -2.4861557483673096, + "step": 505 + }, + { + "epoch": 0.64, + "learning_rate": 2.9656882476583343e-08, + "logits/chosen": -3.2024409770965576, + "logits/rejected": -3.175168037414551, + "logps/chosen": -235.15521240234375, + "logps/rejected": -481.2837219238281, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9266387820243835, + "rewards/margins": 2.9233412742614746, + "rewards/rejected": -1.9967025518417358, + "step": 506 + }, + { + "epoch": 0.65, + "learning_rate": 2.9468501048413452e-08, + "logits/chosen": -3.263148784637451, + "logits/rejected": -3.1591663360595703, + "logps/chosen": -284.1138000488281, + "logps/rejected": -686.1558837890625, + "loss": 0.3331, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8997489809989929, + "rewards/margins": 3.4548349380493164, + "rewards/rejected": -2.5550858974456787, + "step": 507 + }, + { + "epoch": 0.65, + "learning_rate": 2.9280469525504493e-08, + "logits/chosen": -3.199110507965088, + "logits/rejected": -3.159590721130371, + "logps/chosen": -257.28643798828125, + "logps/rejected": -395.20306396484375, + "loss": 0.3389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8233604431152344, + "rewards/margins": 2.612481117248535, + "rewards/rejected": -1.7891204357147217, + "step": 508 + }, + { + "epoch": 0.65, + "learning_rate": 2.909279111235801e-08, + "logits/chosen": -3.3070168495178223, + "logits/rejected": -3.132929801940918, + "logps/chosen": -229.95582580566406, + "logps/rejected": -4013.998046875, + "loss": 0.2849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9786560535430908, + "rewards/margins": 9.632665634155273, + "rewards/rejected": -8.654009819030762, + "step": 509 + }, + { + "epoch": 0.65, + "learning_rate": 2.890546900745764e-08, + "logits/chosen": -3.245668649673462, + "logits/rejected": -3.0894675254821777, + "logps/chosen": -254.32713317871094, + "logps/rejected": -1373.546142578125, + "loss": 0.3143, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7967789173126221, + "rewards/margins": 5.378084182739258, + "rewards/rejected": -4.581305027008057, + "step": 510 + }, + { + "epoch": 0.65, + "learning_rate": 2.8718506403214694e-08, + "logits/chosen": -3.2499241828918457, + "logits/rejected": -3.212627649307251, + "logps/chosen": -267.6031494140625, + "logps/rejected": -601.2958984375, + "loss": 0.3303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8885246515274048, + "rewards/margins": 3.2211174964904785, + "rewards/rejected": -2.332592725753784, + "step": 511 + }, + { + "epoch": 0.65, + "learning_rate": 2.8531906485913827e-08, + "logits/chosen": -3.205714464187622, + "logits/rejected": -3.108041286468506, + "logps/chosen": -226.66015625, + "logps/rejected": -318.0849914550781, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8098083734512329, + "rewards/margins": 1.9501968622207642, + "rewards/rejected": -1.1403884887695312, + "step": 512 + }, + { + "epoch": 0.65, + "learning_rate": 2.834567243565853e-08, + "logits/chosen": -3.2583565711975098, + "logits/rejected": -3.128932476043701, + "logps/chosen": -209.16220092773438, + "logps/rejected": -1159.3009033203125, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.713269054889679, + "rewards/margins": 5.988598823547363, + "rewards/rejected": -5.27532958984375, + "step": 513 + }, + { + "epoch": 0.65, + "learning_rate": 2.8159807426317178e-08, + "logits/chosen": -3.2107105255126953, + "logits/rejected": -3.1510510444641113, + "logps/chosen": -256.2755126953125, + "logps/rejected": -1157.71484375, + "loss": 0.2949, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9000869989395142, + "rewards/margins": 5.938737869262695, + "rewards/rejected": -5.0386505126953125, + "step": 514 + }, + { + "epoch": 0.66, + "learning_rate": 2.797431462546872e-08, + "logits/chosen": -3.147399425506592, + "logits/rejected": -3.080294609069824, + "logps/chosen": -275.9324645996094, + "logps/rejected": -965.276123046875, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9332382678985596, + "rewards/margins": 4.583491325378418, + "rewards/rejected": -3.6502532958984375, + "step": 515 + }, + { + "epoch": 0.66, + "learning_rate": 2.7789197194348813e-08, + "logits/chosen": -3.204932689666748, + "logits/rejected": -3.123642683029175, + "logps/chosen": -256.8611145019531, + "logps/rejected": -809.043701171875, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7714736461639404, + "rewards/margins": 4.48806619644165, + "rewards/rejected": -3.716592311859131, + "step": 516 + }, + { + "epoch": 0.66, + "learning_rate": 2.7604458287795985e-08, + "logits/chosen": -3.256981372833252, + "logits/rejected": -3.2165727615356445, + "logps/chosen": -255.96566772460938, + "logps/rejected": -359.7950439453125, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5919106006622314, + "rewards/margins": 2.2595529556274414, + "rewards/rejected": -1.66764235496521, + "step": 517 + }, + { + "epoch": 0.66, + "learning_rate": 2.742010105419773e-08, + "logits/chosen": -3.2431986331939697, + "logits/rejected": -3.1204299926757812, + "logps/chosen": -282.1800842285156, + "logps/rejected": -621.0947265625, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8802276849746704, + "rewards/margins": 3.5424561500549316, + "rewards/rejected": -2.662228584289551, + "step": 518 + }, + { + "epoch": 0.66, + "learning_rate": 2.7236128635436994e-08, + "logits/chosen": -3.2208991050720215, + "logits/rejected": -3.1036648750305176, + "logps/chosen": -256.03875732421875, + "logps/rejected": -678.7178955078125, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8704833984375, + "rewards/margins": 3.6024506092071533, + "rewards/rejected": -2.7319672107696533, + "step": 519 + }, + { + "epoch": 0.66, + "learning_rate": 2.7052544166838542e-08, + "logits/chosen": -3.1957345008850098, + "logits/rejected": -3.140720844268799, + "logps/chosen": -346.9550476074219, + "logps/rejected": -729.2001342773438, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8441177606582642, + "rewards/margins": 3.3735427856445312, + "rewards/rejected": -2.5294251441955566, + "step": 520 + }, + { + "epoch": 0.66, + "learning_rate": 2.686935077711553e-08, + "logits/chosen": -3.250540256500244, + "logits/rejected": -3.144537925720215, + "logps/chosen": -240.52923583984375, + "logps/rejected": -315.99951171875, + "loss": 0.3369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.895093560218811, + "rewards/margins": 1.9841485023498535, + "rewards/rejected": -1.089054822921753, + "step": 521 + }, + { + "epoch": 0.66, + "learning_rate": 2.6686551588316274e-08, + "logits/chosen": -3.274111032485962, + "logits/rejected": -3.089785575866699, + "logps/chosen": -241.9641571044922, + "logps/rejected": -960.3966674804688, + "loss": 0.3322, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8632713556289673, + "rewards/margins": 4.74445104598999, + "rewards/rejected": -3.8811798095703125, + "step": 522 + }, + { + "epoch": 0.67, + "learning_rate": 2.6504149715770903e-08, + "logits/chosen": -3.256728410720825, + "logits/rejected": -3.062682867050171, + "logps/chosen": -267.0240173339844, + "logps/rejected": -1083.97705078125, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7022873163223267, + "rewards/margins": 4.289512634277344, + "rewards/rejected": -3.5872254371643066, + "step": 523 + }, + { + "epoch": 0.67, + "learning_rate": 2.6322148268038368e-08, + "logits/chosen": -3.175222873687744, + "logits/rejected": -3.1242101192474365, + "logps/chosen": -285.4930114746094, + "logps/rejected": -482.8304138183594, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9177337884902954, + "rewards/margins": 3.117623805999756, + "rewards/rejected": -2.19989013671875, + "step": 524 + }, + { + "epoch": 0.67, + "learning_rate": 2.6140550346853442e-08, + "logits/chosen": -3.180166721343994, + "logits/rejected": -3.058640480041504, + "logps/chosen": -262.20001220703125, + "logps/rejected": -510.6512451171875, + "loss": 0.3383, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7296951413154602, + "rewards/margins": 2.39324951171875, + "rewards/rejected": -1.6635544300079346, + "step": 525 + }, + { + "epoch": 0.67, + "learning_rate": 2.5959359047073816e-08, + "logits/chosen": -3.24426007270813, + "logits/rejected": -3.1778926849365234, + "logps/chosen": -209.4611053466797, + "logps/rejected": -698.5535888671875, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7644966244697571, + "rewards/margins": 3.5840201377868652, + "rewards/rejected": -2.819523572921753, + "step": 526 + }, + { + "epoch": 0.67, + "learning_rate": 2.577857745662746e-08, + "logits/chosen": -3.215200901031494, + "logits/rejected": -3.0464909076690674, + "logps/chosen": -263.4476013183594, + "logps/rejected": -424.2737121582031, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8197205066680908, + "rewards/margins": 2.387991428375244, + "rewards/rejected": -1.5682709217071533, + "step": 527 + }, + { + "epoch": 0.67, + "learning_rate": 2.5598208656459854e-08, + "logits/chosen": -3.2123773097991943, + "logits/rejected": -3.1942014694213867, + "logps/chosen": -252.84255981445312, + "logps/rejected": -736.1846923828125, + "loss": 0.3072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8487091064453125, + "rewards/margins": 3.3765411376953125, + "rewards/rejected": -2.52783203125, + "step": 528 + }, + { + "epoch": 0.67, + "learning_rate": 2.5418255720481614e-08, + "logits/chosen": -3.1731529235839844, + "logits/rejected": -3.094529151916504, + "logps/chosen": -255.8529510498047, + "logps/rejected": -502.95751953125, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9556075930595398, + "rewards/margins": 2.9110536575317383, + "rewards/rejected": -1.9554458856582642, + "step": 529 + }, + { + "epoch": 0.68, + "learning_rate": 2.523872171551601e-08, + "logits/chosen": -3.219511032104492, + "logits/rejected": -3.050102710723877, + "logps/chosen": -234.84890747070312, + "logps/rejected": -1164.141845703125, + "loss": 0.3334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.910718560218811, + "rewards/margins": 4.9885382652282715, + "rewards/rejected": -4.07781982421875, + "step": 530 + }, + { + "epoch": 0.68, + "learning_rate": 2.5059609701246744e-08, + "logits/chosen": -3.2321436405181885, + "logits/rejected": -3.120922803878784, + "logps/chosen": -242.7937774658203, + "logps/rejected": -426.6334228515625, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7061294317245483, + "rewards/margins": 2.1093978881835938, + "rewards/rejected": -1.4032684564590454, + "step": 531 + }, + { + "epoch": 0.68, + "learning_rate": 2.488092273016583e-08, + "logits/chosen": -3.276362895965576, + "logits/rejected": -2.9180452823638916, + "logps/chosen": -247.6316680908203, + "logps/rejected": -1671.1436767578125, + "loss": 0.299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6979683041572571, + "rewards/margins": 6.3451361656188965, + "rewards/rejected": -5.647168159484863, + "step": 532 + }, + { + "epoch": 0.68, + "learning_rate": 2.470266384752148e-08, + "logits/chosen": -3.292407751083374, + "logits/rejected": -3.1385955810546875, + "logps/chosen": -255.81735229492188, + "logps/rejected": -455.60382080078125, + "loss": 0.3652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7722427248954773, + "rewards/margins": 2.357147216796875, + "rewards/rejected": -1.5849045515060425, + "step": 533 + }, + { + "epoch": 0.68, + "learning_rate": 2.4524836091266354e-08, + "logits/chosen": -3.246295928955078, + "logits/rejected": -3.090853214263916, + "logps/chosen": -239.68690490722656, + "logps/rejected": -898.5733642578125, + "loss": 0.3603, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9035079479217529, + "rewards/margins": 5.046699523925781, + "rewards/rejected": -4.143191337585449, + "step": 534 + }, + { + "epoch": 0.68, + "learning_rate": 2.4347442492005627e-08, + "logits/chosen": -3.1957545280456543, + "logits/rejected": -3.0921831130981445, + "logps/chosen": -277.5775146484375, + "logps/rejected": -636.3828125, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8426803350448608, + "rewards/margins": 3.4899630546569824, + "rewards/rejected": -2.647282361984253, + "step": 535 + }, + { + "epoch": 0.68, + "learning_rate": 2.4170486072945406e-08, + "logits/chosen": -3.2890186309814453, + "logits/rejected": -3.133449077606201, + "logps/chosen": -227.36216735839844, + "logps/rejected": -925.2939453125, + "loss": 0.2988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7242965698242188, + "rewards/margins": 4.9295854568481445, + "rewards/rejected": -4.205288887023926, + "step": 536 + }, + { + "epoch": 0.68, + "learning_rate": 2.39939698498413e-08, + "logits/chosen": -3.2193009853363037, + "logits/rejected": -3.04600191116333, + "logps/chosen": -263.25909423828125, + "logps/rejected": -277.38531494140625, + "loss": 0.3706, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7438110113143921, + "rewards/margins": 1.8269226551055908, + "rewards/rejected": -1.0831116437911987, + "step": 537 + }, + { + "epoch": 0.69, + "learning_rate": 2.381789683094683e-08, + "logits/chosen": -3.1938977241516113, + "logits/rejected": -3.1106667518615723, + "logps/chosen": -265.57952880859375, + "logps/rejected": -421.7763977050781, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7796264886856079, + "rewards/margins": 2.280914306640625, + "rewards/rejected": -1.501287817955017, + "step": 538 + }, + { + "epoch": 0.69, + "learning_rate": 2.3642270016962375e-08, + "logits/chosen": -3.2121386528015137, + "logits/rejected": -3.0339818000793457, + "logps/chosen": -279.7341613769531, + "logps/rejected": -974.0641479492188, + "loss": 0.3527, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7164528369903564, + "rewards/margins": 4.15935754776001, + "rewards/rejected": -3.4429047107696533, + "step": 539 + }, + { + "epoch": 0.69, + "learning_rate": 2.3467092400983846e-08, + "logits/chosen": -3.1596169471740723, + "logits/rejected": -3.128758668899536, + "logps/chosen": -264.1007080078125, + "logps/rejected": -860.3846435546875, + "loss": 0.2997, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8566330075263977, + "rewards/margins": 4.4624128341674805, + "rewards/rejected": -3.6057801246643066, + "step": 540 + }, + { + "epoch": 0.69, + "learning_rate": 2.3292366968451794e-08, + "logits/chosen": -3.2422473430633545, + "logits/rejected": -3.137838363647461, + "logps/chosen": -285.25775146484375, + "logps/rejected": -409.9993591308594, + "loss": 0.3275, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5764511227607727, + "rewards/margins": 2.1505095958709717, + "rewards/rejected": -1.5740585327148438, + "step": 541 + }, + { + "epoch": 0.69, + "learning_rate": 2.3118096697100485e-08, + "logits/chosen": -3.195892810821533, + "logits/rejected": -3.0510663986206055, + "logps/chosen": -276.3971862792969, + "logps/rejected": -1258.740966796875, + "loss": 0.3061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.830963134765625, + "rewards/margins": 5.580731391906738, + "rewards/rejected": -4.749768257141113, + "step": 542 + }, + { + "epoch": 0.69, + "learning_rate": 2.294428455690716e-08, + "logits/chosen": -3.199321985244751, + "logits/rejected": -3.0738985538482666, + "logps/chosen": -245.28256225585938, + "logps/rejected": -598.4713134765625, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8549110889434814, + "rewards/margins": 3.1360414028167725, + "rewards/rejected": -2.281130313873291, + "step": 543 + }, + { + "epoch": 0.69, + "learning_rate": 2.2770933510041456e-08, + "logits/chosen": -3.198982000350952, + "logits/rejected": -3.1494367122650146, + "logps/chosen": -297.9873046875, + "logps/rejected": -775.822021484375, + "loss": 0.3147, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8414169549942017, + "rewards/margins": 3.5168352127075195, + "rewards/rejected": -2.6754181385040283, + "step": 544 + }, + { + "epoch": 0.69, + "learning_rate": 2.2598046510814855e-08, + "logits/chosen": -3.2253594398498535, + "logits/rejected": -3.0477101802825928, + "logps/chosen": -252.0957794189453, + "logps/rejected": -1062.023193359375, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8692947626113892, + "rewards/margins": 5.151573181152344, + "rewards/rejected": -4.282278537750244, + "step": 545 + }, + { + "epoch": 0.7, + "learning_rate": 2.2425626505630357e-08, + "logits/chosen": -3.259281635284424, + "logits/rejected": -3.15863037109375, + "logps/chosen": -219.43692016601562, + "logps/rejected": -580.316162109375, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8503814935684204, + "rewards/margins": 3.281707763671875, + "rewards/rejected": -2.431326389312744, + "step": 546 + }, + { + "epoch": 0.7, + "learning_rate": 2.2253676432932338e-08, + "logits/chosen": -3.217190742492676, + "logits/rejected": -3.0871357917785645, + "logps/chosen": -274.04083251953125, + "logps/rejected": -593.8109130859375, + "loss": 0.3624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9204559326171875, + "rewards/margins": 2.9239916801452637, + "rewards/rejected": -2.003535509109497, + "step": 547 + }, + { + "epoch": 0.7, + "learning_rate": 2.208219922315635e-08, + "logits/chosen": -3.3029415607452393, + "logits/rejected": -3.20426869392395, + "logps/chosen": -274.9180908203125, + "logps/rejected": -777.9258422851562, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.757886528968811, + "rewards/margins": 4.273484230041504, + "rewards/rejected": -3.515597343444824, + "step": 548 + }, + { + "epoch": 0.7, + "learning_rate": 2.1911197798679297e-08, + "logits/chosen": -3.2358338832855225, + "logits/rejected": -3.144381046295166, + "logps/chosen": -251.73658752441406, + "logps/rejected": -348.9847412109375, + "loss": 0.3656, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0077263116836548, + "rewards/margins": 2.457477569580078, + "rewards/rejected": -1.4497512578964233, + "step": 549 + }, + { + "epoch": 0.7, + "learning_rate": 2.1740675073769527e-08, + "logits/chosen": -3.227144718170166, + "logits/rejected": -3.1379222869873047, + "logps/chosen": -253.60414123535156, + "logps/rejected": -602.5716552734375, + "loss": 0.2939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8063278198242188, + "rewards/margins": 3.159593343734741, + "rewards/rejected": -2.3532652854919434, + "step": 550 + }, + { + "epoch": 0.7, + "learning_rate": 2.157063395453727e-08, + "logits/chosen": -3.2425146102905273, + "logits/rejected": -3.133240222930908, + "logps/chosen": -284.0385437011719, + "logps/rejected": -632.8699340820312, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0903793573379517, + "rewards/margins": 3.9998459815979004, + "rewards/rejected": -2.909466505050659, + "step": 551 + }, + { + "epoch": 0.7, + "learning_rate": 2.140107733888502e-08, + "logits/chosen": -3.2543511390686035, + "logits/rejected": -3.1707513332366943, + "logps/chosen": -257.87969970703125, + "logps/rejected": -493.5851135253906, + "loss": 0.3333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8184722661972046, + "rewards/margins": 2.752200126647949, + "rewards/rejected": -1.9337279796600342, + "step": 552 + }, + { + "epoch": 0.7, + "learning_rate": 2.1232008116458167e-08, + "logits/chosen": -3.244070053100586, + "logits/rejected": -3.1435327529907227, + "logps/chosen": -244.6429443359375, + "logps/rejected": -901.3431396484375, + "loss": 0.2837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9883880615234375, + "rewards/margins": 5.123392105102539, + "rewards/rejected": -4.135004043579102, + "step": 553 + }, + { + "epoch": 0.71, + "learning_rate": 2.1063429168595836e-08, + "logits/chosen": -3.230482578277588, + "logits/rejected": -3.108487606048584, + "logps/chosen": -252.56214904785156, + "logps/rejected": -906.2711181640625, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9037635922431946, + "rewards/margins": 4.962076663970947, + "rewards/rejected": -4.058312892913818, + "step": 554 + }, + { + "epoch": 0.71, + "learning_rate": 2.0895343368281653e-08, + "logits/chosen": -3.304880142211914, + "logits/rejected": -3.1322579383850098, + "logps/chosen": -249.81619262695312, + "logps/rejected": -527.2001953125, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.823834240436554, + "rewards/margins": 2.913055419921875, + "rewards/rejected": -2.089221239089966, + "step": 555 + }, + { + "epoch": 0.71, + "learning_rate": 2.0727753580094864e-08, + "logits/chosen": -3.207033634185791, + "logits/rejected": -3.000938892364502, + "logps/chosen": -262.5947265625, + "logps/rejected": -1150.9256591796875, + "loss": 0.2908, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.827961802482605, + "rewards/margins": 5.70724630355835, + "rewards/rejected": -4.879284858703613, + "step": 556 + }, + { + "epoch": 0.71, + "learning_rate": 2.056066266016151e-08, + "logits/chosen": -3.2187352180480957, + "logits/rejected": -2.990309476852417, + "logps/chosen": -270.96063232421875, + "logps/rejected": -1054.486572265625, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.970806896686554, + "rewards/margins": 4.867428779602051, + "rewards/rejected": -3.8966217041015625, + "step": 557 + }, + { + "epoch": 0.71, + "learning_rate": 2.0394073456105693e-08, + "logits/chosen": -3.260019540786743, + "logits/rejected": -3.157031774520874, + "logps/chosen": -276.2862548828125, + "logps/rejected": -509.2890930175781, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5610946416854858, + "rewards/margins": 2.7914609909057617, + "rewards/rejected": -2.2303664684295654, + "step": 558 + }, + { + "epoch": 0.71, + "learning_rate": 2.0227988807001168e-08, + "logits/chosen": -3.2201991081237793, + "logits/rejected": -3.111665964126587, + "logps/chosen": -264.6993713378906, + "logps/rejected": -682.5281982421875, + "loss": 0.3356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6471916437149048, + "rewards/margins": 3.1575217247009277, + "rewards/rejected": -2.5103302001953125, + "step": 559 + }, + { + "epoch": 0.71, + "learning_rate": 2.00624115433228e-08, + "logits/chosen": -3.2541182041168213, + "logits/rejected": -3.1907665729522705, + "logps/chosen": -263.62530517578125, + "logps/rejected": -800.045166015625, + "loss": 0.3764, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.838623046875, + "rewards/margins": 4.357080459594727, + "rewards/rejected": -3.5184569358825684, + "step": 560 + }, + { + "epoch": 0.71, + "learning_rate": 1.9897344486898483e-08, + "logits/chosen": -3.2513020038604736, + "logits/rejected": -3.1622314453125, + "logps/chosen": -232.2831268310547, + "logps/rejected": -730.3406372070312, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7903175354003906, + "rewards/margins": 4.164444923400879, + "rewards/rejected": -3.3741273880004883, + "step": 561 + }, + { + "epoch": 0.72, + "learning_rate": 1.973279045086091e-08, + "logits/chosen": -3.2168753147125244, + "logits/rejected": -3.1586859226226807, + "logps/chosen": -265.537841796875, + "logps/rejected": -933.283935546875, + "loss": 0.3077, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.697008490562439, + "rewards/margins": 4.542446136474609, + "rewards/rejected": -3.84543776512146, + "step": 562 + }, + { + "epoch": 0.72, + "learning_rate": 1.9568752239599702e-08, + "logits/chosen": -3.2001113891601562, + "logits/rejected": -3.1147003173828125, + "logps/chosen": -254.04144287109375, + "logps/rejected": -468.79705810546875, + "loss": 0.3044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9057861566543579, + "rewards/margins": 2.8032166957855225, + "rewards/rejected": -1.897430419921875, + "step": 563 + }, + { + "epoch": 0.72, + "learning_rate": 1.9405232648713664e-08, + "logits/chosen": -3.166895866394043, + "logits/rejected": -3.0179457664489746, + "logps/chosen": -315.2380065917969, + "logps/rejected": -452.05462646484375, + "loss": 0.3602, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8041718006134033, + "rewards/margins": 2.376913547515869, + "rewards/rejected": -1.5727417469024658, + "step": 564 + }, + { + "epoch": 0.72, + "learning_rate": 1.9242234464963013e-08, + "logits/chosen": -3.2348766326904297, + "logits/rejected": -3.1505613327026367, + "logps/chosen": -259.8319091796875, + "logps/rejected": -895.4387817382812, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8964401483535767, + "rewards/margins": 4.6028733253479, + "rewards/rejected": -3.7064332962036133, + "step": 565 + }, + { + "epoch": 0.72, + "learning_rate": 1.907976046622202e-08, + "logits/chosen": -3.2304458618164062, + "logits/rejected": -3.008967399597168, + "logps/chosen": -267.79815673828125, + "logps/rejected": -1123.94970703125, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9112541675567627, + "rewards/margins": 4.5667290687561035, + "rewards/rejected": -3.655474901199341, + "step": 566 + }, + { + "epoch": 0.72, + "learning_rate": 1.891781342143155e-08, + "logits/chosen": -3.318683624267578, + "logits/rejected": -3.2311840057373047, + "logps/chosen": -231.12765502929688, + "logps/rejected": -661.5933227539062, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7969444394111633, + "rewards/margins": 4.033353328704834, + "rewards/rejected": -3.2364091873168945, + "step": 567 + }, + { + "epoch": 0.72, + "learning_rate": 1.8756396090551935e-08, + "logits/chosen": -3.2284131050109863, + "logits/rejected": -3.151393413543701, + "logps/chosen": -279.058349609375, + "logps/rejected": -593.6044921875, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8531280755996704, + "rewards/margins": 3.7240493297576904, + "rewards/rejected": -2.8709213733673096, + "step": 568 + }, + { + "epoch": 0.72, + "learning_rate": 1.8595511224515982e-08, + "logits/chosen": -3.2443814277648926, + "logits/rejected": -3.0426430702209473, + "logps/chosen": -250.96725463867188, + "logps/rejected": -1744.5302734375, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8509124517440796, + "rewards/margins": 8.075119018554688, + "rewards/rejected": -7.224206924438477, + "step": 569 + }, + { + "epoch": 0.73, + "learning_rate": 1.8435161565181984e-08, + "logits/chosen": -3.1512198448181152, + "logits/rejected": -3.051345109939575, + "logps/chosen": -289.62506103515625, + "logps/rejected": -737.037353515625, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.816699206829071, + "rewards/margins": 3.24615478515625, + "rewards/rejected": -2.4294557571411133, + "step": 570 + }, + { + "epoch": 0.73, + "learning_rate": 1.8275349845287062e-08, + "logits/chosen": -3.2471365928649902, + "logits/rejected": -3.033395290374756, + "logps/chosen": -260.66632080078125, + "logps/rejected": -316.6391296386719, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8151276111602783, + "rewards/margins": 1.7977005243301392, + "rewards/rejected": -0.9825729131698608, + "step": 571 + }, + { + "epoch": 0.73, + "learning_rate": 1.8116078788400562e-08, + "logits/chosen": -3.2087607383728027, + "logits/rejected": -3.1414854526519775, + "logps/chosen": -244.51319885253906, + "logps/rejected": -865.9524536132812, + "loss": 0.294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9029960632324219, + "rewards/margins": 4.856884002685547, + "rewards/rejected": -3.953887939453125, + "step": 572 + }, + { + "epoch": 0.73, + "learning_rate": 1.7957351108877717e-08, + "logits/chosen": -3.2522408962249756, + "logits/rejected": -3.1103296279907227, + "logps/chosen": -224.4817657470703, + "logps/rejected": -350.8609619140625, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7577140927314758, + "rewards/margins": 2.276029348373413, + "rewards/rejected": -1.5183151960372925, + "step": 573 + }, + { + "epoch": 0.73, + "learning_rate": 1.7799169511813256e-08, + "logits/chosen": -3.2648231983184814, + "logits/rejected": -3.105071783065796, + "logps/chosen": -264.11492919921875, + "logps/rejected": -371.1875, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7981605529785156, + "rewards/margins": 2.0386757850646973, + "rewards/rejected": -1.240515112876892, + "step": 574 + }, + { + "epoch": 0.73, + "learning_rate": 1.7641536692995378e-08, + "logits/chosen": -3.264141798019409, + "logits/rejected": -3.122126579284668, + "logps/chosen": -261.9309387207031, + "logps/rejected": -309.270263671875, + "loss": 0.3391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8318023681640625, + "rewards/margins": 1.7402374744415283, + "rewards/rejected": -0.9084351062774658, + "step": 575 + }, + { + "epoch": 0.73, + "learning_rate": 1.748445533885987e-08, + "logits/chosen": -3.253361225128174, + "logits/rejected": -3.1515519618988037, + "logps/chosen": -266.84051513671875, + "logps/rejected": -1264.9619140625, + "loss": 0.3049, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9783943891525269, + "rewards/margins": 6.135480880737305, + "rewards/rejected": -5.157086372375488, + "step": 576 + }, + { + "epoch": 0.74, + "learning_rate": 1.7327928126444187e-08, + "logits/chosen": -3.232435703277588, + "logits/rejected": -3.1210572719573975, + "logps/chosen": -274.2669677734375, + "logps/rejected": -820.6929931640625, + "loss": 0.3431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7014831304550171, + "rewards/margins": 4.128390312194824, + "rewards/rejected": -3.426907539367676, + "step": 577 + }, + { + "epoch": 0.74, + "learning_rate": 1.7171957723341913e-08, + "logits/chosen": -3.2336220741271973, + "logits/rejected": -3.1077070236206055, + "logps/chosen": -239.94871520996094, + "logps/rejected": -1027.950927734375, + "loss": 0.2748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9507324695587158, + "rewards/margins": 5.302740573883057, + "rewards/rejected": -4.352007865905762, + "step": 578 + }, + { + "epoch": 0.74, + "learning_rate": 1.701654678765732e-08, + "logits/chosen": -3.271378993988037, + "logits/rejected": -3.1468758583068848, + "logps/chosen": -261.89617919921875, + "logps/rejected": -769.6102294921875, + "loss": 0.3337, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0582497119903564, + "rewards/margins": 4.01816463470459, + "rewards/rejected": -2.9599151611328125, + "step": 579 + }, + { + "epoch": 0.74, + "learning_rate": 1.6861697967959988e-08, + "logits/chosen": -3.1742100715637207, + "logits/rejected": -3.089136838912964, + "logps/chosen": -250.92591857910156, + "logps/rejected": -710.4549560546875, + "loss": 0.3108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0313690900802612, + "rewards/margins": 3.6788501739501953, + "rewards/rejected": -2.6474809646606445, + "step": 580 + }, + { + "epoch": 0.74, + "learning_rate": 1.670741390323976e-08, + "logits/chosen": -3.154850959777832, + "logits/rejected": -3.0966029167175293, + "logps/chosen": -301.2120361328125, + "logps/rejected": -520.017333984375, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8593307733535767, + "rewards/margins": 2.8925371170043945, + "rewards/rejected": -2.0332062244415283, + "step": 581 + }, + { + "epoch": 0.74, + "learning_rate": 1.6553697222861677e-08, + "logits/chosen": -3.201054573059082, + "logits/rejected": -3.024292230606079, + "logps/chosen": -267.13323974609375, + "logps/rejected": -682.0592041015625, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7948379516601562, + "rewards/margins": 3.1108078956604004, + "rewards/rejected": -2.315969944000244, + "step": 582 + }, + { + "epoch": 0.74, + "learning_rate": 1.640055054652122e-08, + "logits/chosen": -3.1976754665374756, + "logits/rejected": -3.0820698738098145, + "logps/chosen": -254.60952758789062, + "logps/rejected": -723.7103271484375, + "loss": 0.3498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6552925109863281, + "rewards/margins": 3.051746368408203, + "rewards/rejected": -2.396453857421875, + "step": 583 + }, + { + "epoch": 0.74, + "learning_rate": 1.6247976484199684e-08, + "logits/chosen": -3.16763973236084, + "logits/rejected": -2.946324348449707, + "logps/chosen": -324.80084228515625, + "logps/rejected": -1205.185546875, + "loss": 0.3244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8302963376045227, + "rewards/margins": 5.849470615386963, + "rewards/rejected": -5.019174098968506, + "step": 584 + }, + { + "epoch": 0.75, + "learning_rate": 1.6095977636119613e-08, + "logits/chosen": -3.2855093479156494, + "logits/rejected": -3.080674648284912, + "logps/chosen": -290.3113098144531, + "logps/rejected": -1054.92626953125, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6604140996932983, + "rewards/margins": 4.476771831512451, + "rewards/rejected": -3.8163576126098633, + "step": 585 + }, + { + "epoch": 0.75, + "learning_rate": 1.594455659270061e-08, + "logits/chosen": -3.2556238174438477, + "logits/rejected": -3.1896681785583496, + "logps/chosen": -254.7762451171875, + "logps/rejected": -687.8551025390625, + "loss": 0.2955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7123581171035767, + "rewards/margins": 3.802617073059082, + "rewards/rejected": -3.090258836746216, + "step": 586 + }, + { + "epoch": 0.75, + "learning_rate": 1.579371593451506e-08, + "logits/chosen": -3.234292507171631, + "logits/rejected": -3.072748899459839, + "logps/chosen": -229.24212646484375, + "logps/rejected": -1186.90087890625, + "loss": 0.3165, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.910437822341919, + "rewards/margins": 5.474558353424072, + "rewards/rejected": -4.564120292663574, + "step": 587 + }, + { + "epoch": 0.75, + "learning_rate": 1.564345823224425e-08, + "logits/chosen": -3.2757949829101562, + "logits/rejected": -3.142781972885132, + "logps/chosen": -270.482177734375, + "logps/rejected": -417.7461853027344, + "loss": 0.3727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9286254644393921, + "rewards/margins": 2.5731444358825684, + "rewards/rejected": -1.6445190906524658, + "step": 588 + }, + { + "epoch": 0.75, + "learning_rate": 1.549378604663449e-08, + "logits/chosen": -3.270742654800415, + "logits/rejected": -3.1971595287323, + "logps/chosen": -263.28729248046875, + "logps/rejected": -570.1566162109375, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9708092212677002, + "rewards/margins": 3.680844783782959, + "rewards/rejected": -2.710035800933838, + "step": 589 + }, + { + "epoch": 0.75, + "learning_rate": 1.534470192845352e-08, + "logits/chosen": -3.1615734100341797, + "logits/rejected": -3.0486693382263184, + "logps/chosen": -228.39932250976562, + "logps/rejected": -1287.459716796875, + "loss": 0.2998, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8024742603302002, + "rewards/margins": 5.854833602905273, + "rewards/rejected": -5.052359104156494, + "step": 590 + }, + { + "epoch": 0.75, + "learning_rate": 1.519620841844703e-08, + "logits/chosen": -3.166203022003174, + "logits/rejected": -3.153575897216797, + "logps/chosen": -261.28228759765625, + "logps/rejected": -551.6494750976562, + "loss": 0.3444, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9510132074356079, + "rewards/margins": 3.0914430618286133, + "rewards/rejected": -2.140429735183716, + "step": 591 + }, + { + "epoch": 0.75, + "learning_rate": 1.5048308047295354e-08, + "logits/chosen": -3.2139499187469482, + "logits/rejected": -3.0568087100982666, + "logps/chosen": -265.39666748046875, + "logps/rejected": -891.517822265625, + "loss": 0.3086, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7643356323242188, + "rewards/margins": 4.123425483703613, + "rewards/rejected": -3.3590898513793945, + "step": 592 + }, + { + "epoch": 0.76, + "learning_rate": 1.4901003335570288e-08, + "logits/chosen": -3.236555576324463, + "logits/rejected": -3.134690284729004, + "logps/chosen": -252.97525024414062, + "logps/rejected": -629.0213623046875, + "loss": 0.3023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6914070248603821, + "rewards/margins": 3.3474678993225098, + "rewards/rejected": -2.6560606956481934, + "step": 593 + }, + { + "epoch": 0.76, + "learning_rate": 1.4754296793692261e-08, + "logits/chosen": -3.184265613555908, + "logits/rejected": -3.088202953338623, + "logps/chosen": -248.45901489257812, + "logps/rejected": -781.5577392578125, + "loss": 0.3126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9577972888946533, + "rewards/margins": 3.9279375076293945, + "rewards/rejected": -2.970140218734741, + "step": 594 + }, + { + "epoch": 0.76, + "learning_rate": 1.4608190921887403e-08, + "logits/chosen": -3.233309030532837, + "logits/rejected": -3.051480531692505, + "logps/chosen": -243.5015106201172, + "logps/rejected": -906.337890625, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7164719104766846, + "rewards/margins": 4.347343444824219, + "rewards/rejected": -3.6308717727661133, + "step": 595 + }, + { + "epoch": 0.76, + "learning_rate": 1.4462688210145074e-08, + "logits/chosen": -3.2203197479248047, + "logits/rejected": -3.176856517791748, + "logps/chosen": -255.8787384033203, + "logps/rejected": -534.983154296875, + "loss": 0.3385, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9356857538223267, + "rewards/margins": 3.071793556213379, + "rewards/rejected": -2.1361076831817627, + "step": 596 + }, + { + "epoch": 0.76, + "learning_rate": 1.4317791138175301e-08, + "logits/chosen": -3.2462286949157715, + "logits/rejected": -3.068941593170166, + "logps/chosen": -262.40045166015625, + "logps/rejected": -466.3746032714844, + "loss": 0.3358, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0276496410369873, + "rewards/margins": 2.653897762298584, + "rewards/rejected": -1.6262481212615967, + "step": 597 + }, + { + "epoch": 0.76, + "learning_rate": 1.4173502175366591e-08, + "logits/chosen": -3.3054122924804688, + "logits/rejected": -3.0761313438415527, + "logps/chosen": -225.70082092285156, + "logps/rejected": -286.3860778808594, + "loss": 0.3571, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7936760187149048, + "rewards/margins": 1.7094398736953735, + "rewards/rejected": -0.9157638549804688, + "step": 598 + }, + { + "epoch": 0.76, + "learning_rate": 1.4029823780743876e-08, + "logits/chosen": -3.3038196563720703, + "logits/rejected": -3.087092399597168, + "logps/chosen": -263.59588623046875, + "logps/rejected": -1129.630126953125, + "loss": 0.3245, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8290008306503296, + "rewards/margins": 5.551943778991699, + "rewards/rejected": -4.722943305969238, + "step": 599 + }, + { + "epoch": 0.76, + "learning_rate": 1.3886758402926507e-08, + "logits/chosen": -3.192277669906616, + "logits/rejected": -3.1534926891326904, + "logps/chosen": -291.77471923828125, + "logps/rejected": -816.91943359375, + "loss": 0.3474, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6618912220001221, + "rewards/margins": 3.851823329925537, + "rewards/rejected": -3.189932346343994, + "step": 600 + }, + { + "epoch": 0.77, + "learning_rate": 1.374430848008663e-08, + "logits/chosen": -3.2324159145355225, + "logits/rejected": -3.208190679550171, + "logps/chosen": -258.1676025390625, + "logps/rejected": -625.5303344726562, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9962394833564758, + "rewards/margins": 3.9445395469665527, + "rewards/rejected": -2.948300361633301, + "step": 601 + }, + { + "epoch": 0.77, + "learning_rate": 1.3602476439907545e-08, + "logits/chosen": -3.2601146697998047, + "logits/rejected": -3.071575164794922, + "logps/chosen": -249.2401580810547, + "logps/rejected": -533.3653564453125, + "loss": 0.3382, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6891113519668579, + "rewards/margins": 2.5400314331054688, + "rewards/rejected": -1.8509200811386108, + "step": 602 + }, + { + "epoch": 0.77, + "learning_rate": 1.3461264699542384e-08, + "logits/chosen": -3.2526583671569824, + "logits/rejected": -3.213658094406128, + "logps/chosen": -234.99581909179688, + "logps/rejected": -776.9456176757812, + "loss": 0.2915, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7977828979492188, + "rewards/margins": 4.1128034591674805, + "rewards/rejected": -3.315020799636841, + "step": 603 + }, + { + "epoch": 0.77, + "learning_rate": 1.3320675665572911e-08, + "logits/chosen": -3.2728495597839355, + "logits/rejected": -3.110137939453125, + "logps/chosen": -271.7722473144531, + "logps/rejected": -699.9614868164062, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.714599609375, + "rewards/margins": 3.747079372406006, + "rewards/rejected": -3.032480001449585, + "step": 604 + }, + { + "epoch": 0.77, + "learning_rate": 1.3180711733968474e-08, + "logits/chosen": -3.2372779846191406, + "logits/rejected": -3.1405906677246094, + "logps/chosen": -253.88021850585938, + "logps/rejected": -1061.544189453125, + "loss": 0.3455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9591201543807983, + "rewards/margins": 4.513230800628662, + "rewards/rejected": -3.554110527038574, + "step": 605 + }, + { + "epoch": 0.77, + "learning_rate": 1.3041375290045265e-08, + "logits/chosen": -3.249765634536743, + "logits/rejected": -3.1355109214782715, + "logps/chosen": -265.23907470703125, + "logps/rejected": -1311.59765625, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7511459589004517, + "rewards/margins": 6.270387649536133, + "rewards/rejected": -5.5192413330078125, + "step": 606 + }, + { + "epoch": 0.77, + "learning_rate": 1.290266870842553e-08, + "logits/chosen": -3.2792675495147705, + "logits/rejected": -3.134310722351074, + "logps/chosen": -277.19757080078125, + "logps/rejected": -473.5810546875, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7360794544219971, + "rewards/margins": 2.665278673171997, + "rewards/rejected": -1.92919921875, + "step": 607 + }, + { + "epoch": 0.77, + "learning_rate": 1.2764594352997199e-08, + "logits/chosen": -3.280508041381836, + "logits/rejected": -2.984034299850464, + "logps/chosen": -226.81344604492188, + "logps/rejected": -1292.306640625, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9740875363349915, + "rewards/margins": 5.927984714508057, + "rewards/rejected": -4.953896999359131, + "step": 608 + }, + { + "epoch": 0.78, + "learning_rate": 1.26271545768736e-08, + "logits/chosen": -3.237649440765381, + "logits/rejected": -3.122204303741455, + "logps/chosen": -238.70816040039062, + "logps/rejected": -517.947265625, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9389861822128296, + "rewards/margins": 3.171025037765503, + "rewards/rejected": -2.232038974761963, + "step": 609 + }, + { + "epoch": 0.78, + "learning_rate": 1.2490351722353281e-08, + "logits/chosen": -3.22951602935791, + "logits/rejected": -3.1178877353668213, + "logps/chosen": -285.6993103027344, + "logps/rejected": -408.00372314453125, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6709976196289062, + "rewards/margins": 2.679152011871338, + "rewards/rejected": -2.0081543922424316, + "step": 610 + }, + { + "epoch": 0.78, + "learning_rate": 1.2354188120880204e-08, + "logits/chosen": -3.2449889183044434, + "logits/rejected": -3.1590375900268555, + "logps/chosen": -258.1911315917969, + "logps/rejected": -486.283203125, + "loss": 0.34, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8675147891044617, + "rewards/margins": 2.7733635902404785, + "rewards/rejected": -1.905848741531372, + "step": 611 + }, + { + "epoch": 0.78, + "learning_rate": 1.2218666093003881e-08, + "logits/chosen": -3.222259521484375, + "logits/rejected": -3.110126495361328, + "logps/chosen": -277.38031005859375, + "logps/rejected": -474.2116394042969, + "loss": 0.3469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8777374029159546, + "rewards/margins": 2.813704013824463, + "rewards/rejected": -1.9359664916992188, + "step": 612 + }, + { + "epoch": 0.78, + "learning_rate": 1.2083787948339925e-08, + "logits/chosen": -3.273963451385498, + "logits/rejected": -3.1724026203155518, + "logps/chosen": -252.67892456054688, + "logps/rejected": -854.4664306640625, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.029316782951355, + "rewards/margins": 5.159526348114014, + "rewards/rejected": -4.130209445953369, + "step": 613 + }, + { + "epoch": 0.78, + "learning_rate": 1.1949555985530679e-08, + "logits/chosen": -3.236799716949463, + "logits/rejected": -3.17627215385437, + "logps/chosen": -313.8854064941406, + "logps/rejected": -437.70172119140625, + "loss": 0.3553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8802902698516846, + "rewards/margins": 2.8349030017852783, + "rewards/rejected": -1.9546127319335938, + "step": 614 + }, + { + "epoch": 0.78, + "learning_rate": 1.1815972492205973e-08, + "logits/chosen": -3.2962422370910645, + "logits/rejected": -3.071598529815674, + "logps/chosen": -240.19432067871094, + "logps/rejected": -747.1488037109375, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7705528736114502, + "rewards/margins": 3.749840497970581, + "rewards/rejected": -2.979287624359131, + "step": 615 + }, + { + "epoch": 0.78, + "learning_rate": 1.1683039744944234e-08, + "logits/chosen": -3.2371349334716797, + "logits/rejected": -3.139195203781128, + "logps/chosen": -289.4661560058594, + "logps/rejected": -461.3451843261719, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8869079351425171, + "rewards/margins": 2.5828185081481934, + "rewards/rejected": -1.6959106922149658, + "step": 616 + }, + { + "epoch": 0.79, + "learning_rate": 1.1550760009233607e-08, + "logits/chosen": -3.2003426551818848, + "logits/rejected": -3.1713709831237793, + "logps/chosen": -253.92306518554688, + "logps/rejected": -3677.36376953125, + "loss": 0.3068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7293144464492798, + "rewards/margins": 6.794063568115234, + "rewards/rejected": -6.064749240875244, + "step": 617 + }, + { + "epoch": 0.79, + "learning_rate": 1.1419135539433356e-08, + "logits/chosen": -3.21121883392334, + "logits/rejected": -3.1373863220214844, + "logps/chosen": -255.68434143066406, + "logps/rejected": -485.3709411621094, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.74486243724823, + "rewards/margins": 3.178415060043335, + "rewards/rejected": -2.4335525035858154, + "step": 618 + }, + { + "epoch": 0.79, + "learning_rate": 1.1288168578735541e-08, + "logits/chosen": -3.3000521659851074, + "logits/rejected": -3.11216402053833, + "logps/chosen": -247.6477508544922, + "logps/rejected": -799.508544921875, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7169479131698608, + "rewards/margins": 3.8536972999572754, + "rewards/rejected": -3.136749267578125, + "step": 619 + }, + { + "epoch": 0.79, + "learning_rate": 1.1157861359126635e-08, + "logits/chosen": -3.1626219749450684, + "logits/rejected": -3.1449050903320312, + "logps/chosen": -233.05593872070312, + "logps/rejected": -573.292236328125, + "loss": 0.2893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9083045721054077, + "rewards/margins": 3.6561639308929443, + "rewards/rejected": -2.747859239578247, + "step": 620 + }, + { + "epoch": 0.79, + "learning_rate": 1.1028216101349602e-08, + "logits/chosen": -3.1917061805725098, + "logits/rejected": -3.171713352203369, + "logps/chosen": -257.1170349121094, + "logps/rejected": -712.8492431640625, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.87017822265625, + "rewards/margins": 3.7265625, + "rewards/rejected": -2.85638427734375, + "step": 621 + }, + { + "epoch": 0.79, + "learning_rate": 1.0899235014866003e-08, + "logits/chosen": -3.29404354095459, + "logits/rejected": -3.092752456665039, + "logps/chosen": -246.923828125, + "logps/rejected": -449.3905029296875, + "loss": 0.3014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.795684814453125, + "rewards/margins": 2.535923719406128, + "rewards/rejected": -1.740238904953003, + "step": 622 + }, + { + "epoch": 0.79, + "learning_rate": 1.0770920297818336e-08, + "logits/chosen": -3.235396385192871, + "logits/rejected": -3.061380386352539, + "logps/chosen": -281.9817810058594, + "logps/rejected": -524.5133056640625, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8582572937011719, + "rewards/margins": 2.3924217224121094, + "rewards/rejected": -1.5341644287109375, + "step": 623 + }, + { + "epoch": 0.79, + "learning_rate": 1.0643274136992641e-08, + "logits/chosen": -3.220801830291748, + "logits/rejected": -3.0254549980163574, + "logps/chosen": -260.3317565917969, + "logps/rejected": -837.4342651367188, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6590790152549744, + "rewards/margins": 3.8442840576171875, + "rewards/rejected": -3.1852049827575684, + "step": 624 + }, + { + "epoch": 0.8, + "learning_rate": 1.0516298707781107e-08, + "logits/chosen": -3.2623133659362793, + "logits/rejected": -3.084077835083008, + "logps/chosen": -259.6959533691406, + "logps/rejected": -707.7886962890625, + "loss": 0.3089, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7972068786621094, + "rewards/margins": 3.713256359100342, + "rewards/rejected": -2.9160492420196533, + "step": 625 + }, + { + "epoch": 0.8, + "learning_rate": 1.0389996174145144e-08, + "logits/chosen": -3.259096145629883, + "logits/rejected": -3.1188673973083496, + "logps/chosen": -247.66845703125, + "logps/rejected": -468.78887939453125, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8614342212677002, + "rewards/margins": 2.794231414794922, + "rewards/rejected": -1.9327971935272217, + "step": 626 + }, + { + "epoch": 0.8, + "learning_rate": 1.0264368688578373e-08, + "logits/chosen": -3.102475881576538, + "logits/rejected": -3.0904011726379395, + "logps/chosen": -263.1651916503906, + "logps/rejected": -751.5289306640625, + "loss": 0.2916, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8394378423690796, + "rewards/margins": 4.156970500946045, + "rewards/rejected": -3.317532539367676, + "step": 627 + }, + { + "epoch": 0.8, + "learning_rate": 1.013941839207002e-08, + "logits/chosen": -3.2076122760772705, + "logits/rejected": -3.1163136959075928, + "logps/chosen": -246.68490600585938, + "logps/rejected": -308.32879638671875, + "loss": 0.3329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8507003784179688, + "rewards/margins": 2.0852012634277344, + "rewards/rejected": -1.2345008850097656, + "step": 628 + }, + { + "epoch": 0.8, + "learning_rate": 1.0015147414068431e-08, + "logits/chosen": -3.1918039321899414, + "logits/rejected": -3.0757086277008057, + "logps/chosen": -288.651123046875, + "logps/rejected": -538.21875, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6816345453262329, + "rewards/margins": 3.067410469055176, + "rewards/rejected": -2.3857758045196533, + "step": 629 + }, + { + "epoch": 0.8, + "learning_rate": 9.891557872444723e-09, + "logits/chosen": -3.2203798294067383, + "logits/rejected": -3.071591854095459, + "logps/chosen": -269.13232421875, + "logps/rejected": -503.5957336425781, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.981475830078125, + "rewards/margins": 2.9349610805511475, + "rewards/rejected": -1.953485131263733, + "step": 630 + }, + { + "epoch": 0.8, + "learning_rate": 9.768651873456763e-09, + "logits/chosen": -3.2287206649780273, + "logits/rejected": -3.1645419597625732, + "logps/chosen": -274.113037109375, + "logps/rejected": -659.9772338867188, + "loss": 0.349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0296096801757812, + "rewards/margins": 3.8944411277770996, + "rewards/rejected": -2.8648314476013184, + "step": 631 + }, + { + "epoch": 0.81, + "learning_rate": 9.646431511713204e-09, + "logits/chosen": -3.1766562461853027, + "logits/rejected": -3.0769057273864746, + "logps/chosen": -265.0703125, + "logps/rejected": -466.82330322265625, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.142798662185669, + "rewards/margins": 3.0998573303222656, + "rewards/rejected": -1.9570586681365967, + "step": 632 + }, + { + "epoch": 0.81, + "learning_rate": 9.524898870137827e-09, + "logits/chosen": -3.246769905090332, + "logits/rejected": -3.110858917236328, + "logps/chosen": -250.1650390625, + "logps/rejected": -410.495361328125, + "loss": 0.3427, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8843650817871094, + "rewards/margins": 2.586956024169922, + "rewards/rejected": -1.7025909423828125, + "step": 633 + }, + { + "epoch": 0.81, + "learning_rate": 9.40405601993407e-09, + "logits/chosen": -3.205592632293701, + "logits/rejected": -3.057408332824707, + "logps/chosen": -295.82904052734375, + "logps/rejected": -777.1162109375, + "loss": 0.3475, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8572632074356079, + "rewards/margins": 3.7057619094848633, + "rewards/rejected": -2.848498582839966, + "step": 634 + }, + { + "epoch": 0.81, + "learning_rate": 9.283905020549653e-09, + "logits/chosen": -3.225004196166992, + "logits/rejected": -3.2221264839172363, + "logps/chosen": -309.5726318359375, + "logps/rejected": -654.26171875, + "loss": 0.3106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8623901605606079, + "rewards/margins": 3.583270311355591, + "rewards/rejected": -2.7208800315856934, + "step": 635 + }, + { + "epoch": 0.81, + "learning_rate": 9.164447919641538e-09, + "logits/chosen": -3.2448041439056396, + "logits/rejected": -3.1433093547821045, + "logps/chosen": -247.80284118652344, + "logps/rejected": -553.369384765625, + "loss": 0.3102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7392303943634033, + "rewards/margins": 3.2394747734069824, + "rewards/rejected": -2.500244140625, + "step": 636 + }, + { + "epoch": 0.81, + "learning_rate": 9.045686753041016e-09, + "logits/chosen": -3.21915602684021, + "logits/rejected": -3.1345417499542236, + "logps/chosen": -263.84210205078125, + "logps/rejected": -790.50146484375, + "loss": 0.3227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8022888898849487, + "rewards/margins": 4.307033061981201, + "rewards/rejected": -3.504744052886963, + "step": 637 + }, + { + "epoch": 0.81, + "learning_rate": 8.92762354471901e-09, + "logits/chosen": -3.282750129699707, + "logits/rejected": -3.1674227714538574, + "logps/chosen": -309.6311950683594, + "logps/rejected": -365.8955078125, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.888470470905304, + "rewards/margins": 2.197859287261963, + "rewards/rejected": -1.3093887567520142, + "step": 638 + }, + { + "epoch": 0.81, + "learning_rate": 8.81026030675161e-09, + "logits/chosen": -3.20297908782959, + "logits/rejected": -2.999485492706299, + "logps/chosen": -310.2333068847656, + "logps/rejected": -758.2830810546875, + "loss": 0.3348, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1324020624160767, + "rewards/margins": 3.947261333465576, + "rewards/rejected": -2.81485915184021, + "step": 639 + }, + { + "epoch": 0.82, + "learning_rate": 8.693599039285715e-09, + "logits/chosen": -3.265778064727783, + "logits/rejected": -3.1854522228240967, + "logps/chosen": -257.3260498046875, + "logps/rejected": -342.0694580078125, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6907135248184204, + "rewards/margins": 1.8427276611328125, + "rewards/rejected": -1.152014136314392, + "step": 640 + }, + { + "epoch": 0.82, + "learning_rate": 8.57764173050503e-09, + "logits/chosen": -3.2471158504486084, + "logits/rejected": -3.1763830184936523, + "logps/chosen": -253.8968505859375, + "logps/rejected": -622.990966796875, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7287315130233765, + "rewards/margins": 3.520049571990967, + "rewards/rejected": -2.791317939758301, + "step": 641 + }, + { + "epoch": 0.82, + "learning_rate": 8.462390356596116e-09, + "logits/chosen": -3.2582409381866455, + "logits/rejected": -3.139667510986328, + "logps/chosen": -262.6158447265625, + "logps/rejected": -602.608154296875, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9222488403320312, + "rewards/margins": 3.608116388320923, + "rewards/rejected": -2.6858673095703125, + "step": 642 + }, + { + "epoch": 0.82, + "learning_rate": 8.347846881714715e-09, + "logits/chosen": -3.2264137268066406, + "logits/rejected": -2.976250648498535, + "logps/chosen": -258.9403381347656, + "logps/rejected": -1293.9122314453125, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7751579284667969, + "rewards/margins": 5.2603631019592285, + "rewards/rejected": -4.485205173492432, + "step": 643 + }, + { + "epoch": 0.82, + "learning_rate": 8.234013257952355e-09, + "logits/chosen": -3.278862714767456, + "logits/rejected": -3.091895580291748, + "logps/chosen": -243.2866973876953, + "logps/rejected": -437.4513244628906, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7378342151641846, + "rewards/margins": 2.3796191215515137, + "rewards/rejected": -1.64178466796875, + "step": 644 + }, + { + "epoch": 0.82, + "learning_rate": 8.12089142530296e-09, + "logits/chosen": -3.2027792930603027, + "logits/rejected": -3.098857879638672, + "logps/chosen": -252.629150390625, + "logps/rejected": -608.3524780273438, + "loss": 0.3496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8744858503341675, + "rewards/margins": 3.4049072265625, + "rewards/rejected": -2.530421495437622, + "step": 645 + }, + { + "epoch": 0.82, + "learning_rate": 8.00848331162991e-09, + "logits/chosen": -3.2145590782165527, + "logits/rejected": -3.0802860260009766, + "logps/chosen": -269.2569885253906, + "logps/rejected": -429.92095947265625, + "loss": 0.3057, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9102004766464233, + "rewards/margins": 2.7734787464141846, + "rewards/rejected": -1.8632782697677612, + "step": 646 + }, + { + "epoch": 0.82, + "learning_rate": 7.896790832633072e-09, + "logits/chosen": -3.245332717895508, + "logits/rejected": -3.16104793548584, + "logps/chosen": -254.72457885742188, + "logps/rejected": -753.9669189453125, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6259399652481079, + "rewards/margins": 4.245312690734863, + "rewards/rejected": -3.619372606277466, + "step": 647 + }, + { + "epoch": 0.83, + "learning_rate": 7.785815891816256e-09, + "logits/chosen": -3.237276315689087, + "logits/rejected": -3.110905170440674, + "logps/chosen": -250.73495483398438, + "logps/rejected": -807.7649536132812, + "loss": 0.3297, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7370750904083252, + "rewards/margins": 3.9652795791625977, + "rewards/rejected": -3.2282042503356934, + "step": 648 + }, + { + "epoch": 0.83, + "learning_rate": 7.675560380454692e-09, + "logits/chosen": -3.238654613494873, + "logits/rejected": -3.0983781814575195, + "logps/chosen": -275.40869140625, + "logps/rejected": -322.437744140625, + "loss": 0.3445, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6527794003486633, + "rewards/margins": 1.9698371887207031, + "rewards/rejected": -1.3170578479766846, + "step": 649 + }, + { + "epoch": 0.83, + "learning_rate": 7.566026177562845e-09, + "logits/chosen": -3.210435390472412, + "logits/rejected": -3.0764217376708984, + "logps/chosen": -271.7040710449219, + "logps/rejected": -925.9759521484375, + "loss": 0.3211, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9937187433242798, + "rewards/margins": 4.683446407318115, + "rewards/rejected": -3.689727783203125, + "step": 650 + }, + { + "epoch": 0.83, + "learning_rate": 7.457215149862372e-09, + "logits/chosen": -3.1437602043151855, + "logits/rejected": -3.067821502685547, + "logps/chosen": -279.82989501953125, + "logps/rejected": -735.0548095703125, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7615966796875, + "rewards/margins": 4.092092990875244, + "rewards/rejected": -3.330496311187744, + "step": 651 + }, + { + "epoch": 0.83, + "learning_rate": 7.349129151750311e-09, + "logits/chosen": -3.2190775871276855, + "logits/rejected": -3.0901546478271484, + "logps/chosen": -279.00732421875, + "logps/rejected": -555.630615234375, + "loss": 0.3376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8659775257110596, + "rewards/margins": 3.386897087097168, + "rewards/rejected": -2.5209197998046875, + "step": 652 + }, + { + "epoch": 0.83, + "learning_rate": 7.241770025267519e-09, + "logits/chosen": -3.259298801422119, + "logits/rejected": -3.126735210418701, + "logps/chosen": -255.18426513671875, + "logps/rejected": -387.69219970703125, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6904510259628296, + "rewards/margins": 2.141697645187378, + "rewards/rejected": -1.4512466192245483, + "step": 653 + }, + { + "epoch": 0.83, + "learning_rate": 7.135139600067202e-09, + "logits/chosen": -3.1963253021240234, + "logits/rejected": -3.0717523097991943, + "logps/chosen": -253.12933349609375, + "logps/rejected": -940.6087036132812, + "loss": 0.3304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8492035269737244, + "rewards/margins": 4.3157148361206055, + "rewards/rejected": -3.4665114879608154, + "step": 654 + }, + { + "epoch": 0.83, + "learning_rate": 7.029239693383776e-09, + "logits/chosen": -3.196666717529297, + "logits/rejected": -3.11808443069458, + "logps/chosen": -246.62550354003906, + "logps/rejected": -548.4185791015625, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9548377990722656, + "rewards/margins": 3.0834574699401855, + "rewards/rejected": -2.128619432449341, + "step": 655 + }, + { + "epoch": 0.84, + "learning_rate": 6.9240721100019324e-09, + "logits/chosen": -3.210493564605713, + "logits/rejected": -3.070244789123535, + "logps/chosen": -265.69818115234375, + "logps/rejected": -658.337646484375, + "loss": 0.3316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9996474981307983, + "rewards/margins": 3.519151210784912, + "rewards/rejected": -2.5195038318634033, + "step": 656 + }, + { + "epoch": 0.84, + "learning_rate": 6.819638642225795e-09, + "logits/chosen": -3.2643911838531494, + "logits/rejected": -3.0795226097106934, + "logps/chosen": -257.6524658203125, + "logps/rejected": -707.4072265625, + "loss": 0.2986, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8037430047988892, + "rewards/margins": 3.4003632068634033, + "rewards/rejected": -2.5966200828552246, + "step": 657 + }, + { + "epoch": 0.84, + "learning_rate": 6.7159410698484575e-09, + "logits/chosen": -3.221360206604004, + "logits/rejected": -3.1324048042297363, + "logps/chosen": -290.82513427734375, + "logps/rejected": -821.9508056640625, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.788970947265625, + "rewards/margins": 4.068869113922119, + "rewards/rejected": -3.279898166656494, + "step": 658 + }, + { + "epoch": 0.84, + "learning_rate": 6.612981160121611e-09, + "logits/chosen": -3.2563834190368652, + "logits/rejected": -3.1823315620422363, + "logps/chosen": -265.136962890625, + "logps/rejected": -440.8941650390625, + "loss": 0.335, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2297966480255127, + "rewards/margins": 3.1084115505218506, + "rewards/rejected": -1.878614902496338, + "step": 659 + }, + { + "epoch": 0.84, + "learning_rate": 6.510760667725407e-09, + "logits/chosen": -3.0964841842651367, + "logits/rejected": -3.0987586975097656, + "logps/chosen": -269.4061279296875, + "logps/rejected": -672.33203125, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7540161609649658, + "rewards/margins": 3.7451415061950684, + "rewards/rejected": -2.9911255836486816, + "step": 660 + }, + { + "epoch": 0.84, + "learning_rate": 6.4092813347386145e-09, + "logits/chosen": -3.260005474090576, + "logits/rejected": -3.160648822784424, + "logps/chosen": -264.69378662109375, + "logps/rejected": -433.42913818359375, + "loss": 0.3098, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7336212396621704, + "rewards/margins": 2.4034714698791504, + "rewards/rejected": -1.6698501110076904, + "step": 661 + }, + { + "epoch": 0.84, + "learning_rate": 6.308544890608863e-09, + "logits/chosen": -3.208181381225586, + "logits/rejected": -3.1059112548828125, + "logps/chosen": -251.56549072265625, + "logps/rejected": -573.4783935546875, + "loss": 0.2973, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7011330127716064, + "rewards/margins": 3.126431941986084, + "rewards/rejected": -2.4252991676330566, + "step": 662 + }, + { + "epoch": 0.84, + "learning_rate": 6.208553052123233e-09, + "logits/chosen": -3.2162210941314697, + "logits/rejected": -3.0825183391571045, + "logps/chosen": -260.6407775878906, + "logps/rejected": -1413.282470703125, + "loss": 0.343, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0542283058166504, + "rewards/margins": 6.644221305847168, + "rewards/rejected": -5.589993476867676, + "step": 663 + }, + { + "epoch": 0.85, + "learning_rate": 6.109307523378937e-09, + "logits/chosen": -3.2817001342773438, + "logits/rejected": -3.152174949645996, + "logps/chosen": -247.85507202148438, + "logps/rejected": -1269.677001953125, + "loss": 0.3289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8159767389297485, + "rewards/margins": 5.487276554107666, + "rewards/rejected": -4.671299934387207, + "step": 664 + }, + { + "epoch": 0.85, + "learning_rate": 6.010809995754307e-09, + "logits/chosen": -3.3018321990966797, + "logits/rejected": -3.1131248474121094, + "logps/chosen": -241.98245239257812, + "logps/rejected": -297.614990234375, + "loss": 0.3095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7327064275741577, + "rewards/margins": 2.05373477935791, + "rewards/rejected": -1.3210281133651733, + "step": 665 + }, + { + "epoch": 0.85, + "learning_rate": 5.913062147879994e-09, + "logits/chosen": -3.19700288772583, + "logits/rejected": -3.012904644012451, + "logps/chosen": -252.0400390625, + "logps/rejected": -1417.710693359375, + "loss": 0.285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.821667492389679, + "rewards/margins": 5.997473239898682, + "rewards/rejected": -5.175806045532227, + "step": 666 + }, + { + "epoch": 0.85, + "learning_rate": 5.816065645610313e-09, + "logits/chosen": -3.233210802078247, + "logits/rejected": -3.097968578338623, + "logps/chosen": -278.1906433105469, + "logps/rejected": -505.1903991699219, + "loss": 0.3368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7267730832099915, + "rewards/margins": 2.775106906890869, + "rewards/rejected": -2.0483336448669434, + "step": 667 + }, + { + "epoch": 0.85, + "learning_rate": 5.719822141994873e-09, + "logits/chosen": -3.3024797439575195, + "logits/rejected": -3.1776809692382812, + "logps/chosen": -285.5911865234375, + "logps/rejected": -655.3472900390625, + "loss": 0.318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9217498898506165, + "rewards/margins": 3.584973096847534, + "rewards/rejected": -2.6632232666015625, + "step": 668 + }, + { + "epoch": 0.85, + "learning_rate": 5.624333277250415e-09, + "logits/chosen": -3.248100757598877, + "logits/rejected": -3.0697269439697266, + "logps/chosen": -274.30401611328125, + "logps/rejected": -1345.7003173828125, + "loss": 0.3005, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7903373837471008, + "rewards/margins": 4.976232528686523, + "rewards/rejected": -4.185894966125488, + "step": 669 + }, + { + "epoch": 0.85, + "learning_rate": 5.529600678732843e-09, + "logits/chosen": -3.283932685852051, + "logits/rejected": -3.2351858615875244, + "logps/chosen": -242.6866912841797, + "logps/rejected": -588.457275390625, + "loss": 0.2972, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8437637090682983, + "rewards/margins": 3.4589478969573975, + "rewards/rejected": -2.6151840686798096, + "step": 670 + }, + { + "epoch": 0.85, + "learning_rate": 5.435625960909512e-09, + "logits/chosen": -3.2554712295532227, + "logits/rejected": -3.1085739135742188, + "logps/chosen": -258.0395202636719, + "logps/rejected": -1030.390380859375, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6396087408065796, + "rewards/margins": 4.933300971984863, + "rewards/rejected": -4.293692111968994, + "step": 671 + }, + { + "epoch": 0.86, + "learning_rate": 5.342410725331681e-09, + "logits/chosen": -3.200897216796875, + "logits/rejected": -3.112725257873535, + "logps/chosen": -266.7928466796875, + "logps/rejected": -588.7217407226562, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0075409412384033, + "rewards/margins": 4.178794860839844, + "rewards/rejected": -3.1712539196014404, + "step": 672 + }, + { + "epoch": 0.86, + "learning_rate": 5.249956560607255e-09, + "logits/chosen": -3.2445790767669678, + "logits/rejected": -3.157881259918213, + "logps/chosen": -268.6827087402344, + "logps/rejected": -655.0014038085938, + "loss": 0.3046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.762860119342804, + "rewards/margins": 3.5961427688598633, + "rewards/rejected": -2.833282470703125, + "step": 673 + }, + { + "epoch": 0.86, + "learning_rate": 5.158265042373672e-09, + "logits/chosen": -3.237290859222412, + "logits/rejected": -3.1341934204101562, + "logps/chosen": -245.56268310546875, + "logps/rejected": -530.3914794921875, + "loss": 0.314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8804763555526733, + "rewards/margins": 2.9840989112854004, + "rewards/rejected": -2.1036224365234375, + "step": 674 + }, + { + "epoch": 0.86, + "learning_rate": 5.067337733271082e-09, + "logits/chosen": -3.2041006088256836, + "logits/rejected": -3.096949577331543, + "logps/chosen": -269.8416748046875, + "logps/rejected": -981.416015625, + "loss": 0.3539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8439438343048096, + "rewards/margins": 4.113197326660156, + "rewards/rejected": -3.2692534923553467, + "step": 675 + }, + { + "epoch": 0.86, + "learning_rate": 4.977176182915726e-09, + "logits/chosen": -3.2294840812683105, + "logits/rejected": -3.1373467445373535, + "logps/chosen": -262.7878112792969, + "logps/rejected": -721.1845092773438, + "loss": 0.304, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9218529462814331, + "rewards/margins": 3.829038619995117, + "rewards/rejected": -2.9071855545043945, + "step": 676 + }, + { + "epoch": 0.86, + "learning_rate": 4.8877819278734576e-09, + "logits/chosen": -3.1907124519348145, + "logits/rejected": -3.114042282104492, + "logps/chosen": -307.41168212890625, + "logps/rejected": -794.269287109375, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8706619143486023, + "rewards/margins": 4.530613899230957, + "rewards/rejected": -3.65995192527771, + "step": 677 + }, + { + "epoch": 0.86, + "learning_rate": 4.7991564916336545e-09, + "logits/chosen": -3.23860502243042, + "logits/rejected": -3.1402087211608887, + "logps/chosen": -244.30706787109375, + "logps/rejected": -433.3944091796875, + "loss": 0.3408, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.085566759109497, + "rewards/margins": 2.6869232654571533, + "rewards/rejected": -1.6013565063476562, + "step": 678 + }, + { + "epoch": 0.86, + "learning_rate": 4.711301384583183e-09, + "logits/chosen": -3.230724334716797, + "logits/rejected": -3.108285903930664, + "logps/chosen": -230.29574584960938, + "logps/rejected": -720.772705078125, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0205070972442627, + "rewards/margins": 3.88529896736145, + "rewards/rejected": -2.8647918701171875, + "step": 679 + }, + { + "epoch": 0.87, + "learning_rate": 4.624218103980665e-09, + "logits/chosen": -3.2682723999023438, + "logits/rejected": -3.206707715988159, + "logps/chosen": -256.8439636230469, + "logps/rejected": -632.926513671875, + "loss": 0.3142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7091522216796875, + "rewards/margins": 3.8693270683288574, + "rewards/rejected": -3.160174608230591, + "step": 680 + }, + { + "epoch": 0.87, + "learning_rate": 4.537908133931018e-09, + "logits/chosen": -3.268744468688965, + "logits/rejected": -3.068570137023926, + "logps/chosen": -224.7566375732422, + "logps/rejected": -1540.5281982421875, + "loss": 0.3026, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.907543957233429, + "rewards/margins": 5.680253505706787, + "rewards/rejected": -4.772709846496582, + "step": 681 + }, + { + "epoch": 0.87, + "learning_rate": 4.452372945360072e-09, + "logits/chosen": -3.2834362983703613, + "logits/rejected": -3.156602621078491, + "logps/chosen": -283.1791687011719, + "logps/rejected": -813.052490234375, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9533408880233765, + "rewards/margins": 4.320839881896973, + "rewards/rejected": -3.3674988746643066, + "step": 682 + }, + { + "epoch": 0.87, + "learning_rate": 4.3676139959895885e-09, + "logits/chosen": -3.206792116165161, + "logits/rejected": -3.186924457550049, + "logps/chosen": -245.8408203125, + "logps/rejected": -983.8006591796875, + "loss": 0.2934, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8673546314239502, + "rewards/margins": 5.335933685302734, + "rewards/rejected": -4.468579292297363, + "step": 683 + }, + { + "epoch": 0.87, + "learning_rate": 4.283632730312348e-09, + "logits/chosen": -3.1904091835021973, + "logits/rejected": -3.114902973175049, + "logps/chosen": -254.2130126953125, + "logps/rejected": -356.11639404296875, + "loss": 0.3436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7965530157089233, + "rewards/margins": 2.357452392578125, + "rewards/rejected": -1.5608994960784912, + "step": 684 + }, + { + "epoch": 0.87, + "learning_rate": 4.200430579567571e-09, + "logits/chosen": -3.171769618988037, + "logits/rejected": -3.086663246154785, + "logps/chosen": -268.3459777832031, + "logps/rejected": -1190.4432373046875, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9547562003135681, + "rewards/margins": 5.299724102020264, + "rewards/rejected": -4.344967842102051, + "step": 685 + }, + { + "epoch": 0.87, + "learning_rate": 4.118008961716551e-09, + "logits/chosen": -3.199955940246582, + "logits/rejected": -3.1380727291107178, + "logps/chosen": -286.3379211425781, + "logps/rejected": -568.9063720703125, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8166275024414062, + "rewards/margins": 2.9582581520080566, + "rewards/rejected": -2.1416306495666504, + "step": 686 + }, + { + "epoch": 0.88, + "learning_rate": 4.0363692814184e-09, + "logits/chosen": -3.1763877868652344, + "logits/rejected": -3.1162965297698975, + "logps/chosen": -289.0440673828125, + "logps/rejected": -689.514892578125, + "loss": 0.3361, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0411591529846191, + "rewards/margins": 3.6776230335235596, + "rewards/rejected": -2.6364641189575195, + "step": 687 + }, + { + "epoch": 0.88, + "learning_rate": 3.955512930006222e-09, + "logits/chosen": -3.251647472381592, + "logits/rejected": -3.000619649887085, + "logps/chosen": -248.40061950683594, + "logps/rejected": -634.1207275390625, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7389450073242188, + "rewards/margins": 3.4812393188476562, + "rewards/rejected": -2.7422943115234375, + "step": 688 + }, + { + "epoch": 0.88, + "learning_rate": 3.87544128546331e-09, + "logits/chosen": -3.2100870609283447, + "logits/rejected": -3.094453811645508, + "logps/chosen": -282.15716552734375, + "logps/rejected": -828.5225830078125, + "loss": 0.3663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9579498767852783, + "rewards/margins": 4.464731216430664, + "rewards/rejected": -3.5067811012268066, + "step": 689 + }, + { + "epoch": 0.88, + "learning_rate": 3.796155712399701e-09, + "logits/chosen": -3.2760097980499268, + "logits/rejected": -3.1432151794433594, + "logps/chosen": -261.7902526855469, + "logps/rejected": -911.416015625, + "loss": 0.2978, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8822372555732727, + "rewards/margins": 4.3991804122924805, + "rewards/rejected": -3.5169434547424316, + "step": 690 + }, + { + "epoch": 0.88, + "learning_rate": 3.7176575620289364e-09, + "logits/chosen": -3.2713990211486816, + "logits/rejected": -3.1964616775512695, + "logps/chosen": -273.8138732910156, + "logps/rejected": -614.9442749023438, + "loss": 0.3051, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9919922351837158, + "rewards/margins": 3.7578659057617188, + "rewards/rejected": -2.765873908996582, + "step": 691 + }, + { + "epoch": 0.88, + "learning_rate": 3.6399481721449856e-09, + "logits/chosen": -3.2426838874816895, + "logits/rejected": -3.1015453338623047, + "logps/chosen": -267.95819091796875, + "logps/rejected": -639.8734130859375, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7270447015762329, + "rewards/margins": 3.2591657638549805, + "rewards/rejected": -2.532121181488037, + "step": 692 + }, + { + "epoch": 0.88, + "learning_rate": 3.5630288670995044e-09, + "logits/chosen": -3.2611680030822754, + "logits/rejected": -3.157470703125, + "logps/chosen": -283.4297180175781, + "logps/rejected": -489.6226501464844, + "loss": 0.3151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6186859607696533, + "rewards/margins": 2.7074174880981445, + "rewards/rejected": -2.088731288909912, + "step": 693 + }, + { + "epoch": 0.88, + "learning_rate": 3.486900957779215e-09, + "logits/chosen": -3.171936511993408, + "logits/rejected": -3.1401987075805664, + "logps/chosen": -298.2890930175781, + "logps/rejected": -585.9473876953125, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8903274536132812, + "rewards/margins": 3.4143877029418945, + "rewards/rejected": -2.5240602493286133, + "step": 694 + }, + { + "epoch": 0.89, + "learning_rate": 3.411565741583583e-09, + "logits/chosen": -3.224396228790283, + "logits/rejected": -3.0938005447387695, + "logps/chosen": -276.5197448730469, + "logps/rejected": -431.67486572265625, + "loss": 0.3171, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7680587768554688, + "rewards/margins": 2.5077056884765625, + "rewards/rejected": -1.7396469116210938, + "step": 695 + }, + { + "epoch": 0.89, + "learning_rate": 3.3370245024027412e-09, + "logits/chosen": -3.1979660987854004, + "logits/rejected": -3.1473681926727295, + "logps/chosen": -252.368408203125, + "logps/rejected": -574.451416015625, + "loss": 0.3043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8074966669082642, + "rewards/margins": 3.3596208095550537, + "rewards/rejected": -2.5521240234375, + "step": 696 + }, + { + "epoch": 0.89, + "learning_rate": 3.263278510595546e-09, + "logits/chosen": -3.2203731536865234, + "logits/rejected": -3.0423030853271484, + "logps/chosen": -211.5723419189453, + "logps/rejected": -597.5908813476562, + "loss": 0.2658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9821029901504517, + "rewards/margins": 3.4185409545898438, + "rewards/rejected": -2.4364380836486816, + "step": 697 + }, + { + "epoch": 0.89, + "learning_rate": 3.1903290229679748e-09, + "logits/chosen": -3.289616823196411, + "logits/rejected": -3.056469440460205, + "logps/chosen": -271.5527648925781, + "logps/rejected": -3739.642578125, + "loss": 0.3317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8480789661407471, + "rewards/margins": 6.814697265625, + "rewards/rejected": -5.966618537902832, + "step": 698 + }, + { + "epoch": 0.89, + "learning_rate": 3.1181772827516663e-09, + "logits/chosen": -3.1786744594573975, + "logits/rejected": -3.1105024814605713, + "logps/chosen": -201.785888671875, + "logps/rejected": -645.7391357421875, + "loss": 0.2785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6504898071289062, + "rewards/margins": 3.4290878772735596, + "rewards/rejected": -2.7785980701446533, + "step": 699 + }, + { + "epoch": 0.89, + "learning_rate": 3.046824519582808e-09, + "logits/chosen": -3.2143311500549316, + "logits/rejected": -3.1586601734161377, + "logps/chosen": -222.63894653320312, + "logps/rejected": -419.13946533203125, + "loss": 0.3231, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8356300592422485, + "rewards/margins": 2.7315423488616943, + "rewards/rejected": -1.8959121704101562, + "step": 700 + }, + { + "epoch": 0.89, + "learning_rate": 2.976271949481085e-09, + "logits/chosen": -3.186410427093506, + "logits/rejected": -3.0207738876342773, + "logps/chosen": -233.3813934326172, + "logps/rejected": -786.7786254882812, + "loss": 0.3, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.05670166015625, + "rewards/margins": 3.1656012535095215, + "rewards/rejected": -2.1088995933532715, + "step": 701 + }, + { + "epoch": 0.89, + "learning_rate": 2.9065207748290133e-09, + "logits/chosen": -3.1611132621765137, + "logits/rejected": -3.0911107063293457, + "logps/chosen": -307.4350280761719, + "logps/rejected": -706.2040405273438, + "loss": 0.3203, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9803451895713806, + "rewards/margins": 3.9684858322143555, + "rewards/rejected": -2.988140821456909, + "step": 702 + }, + { + "epoch": 0.9, + "learning_rate": 2.83757218435145e-09, + "logits/chosen": -3.2224676609039307, + "logits/rejected": -3.1915605068206787, + "logps/chosen": -253.396484375, + "logps/rejected": -1951.078369140625, + "loss": 0.2945, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.591112494468689, + "rewards/margins": 7.220553398132324, + "rewards/rejected": -6.629440784454346, + "step": 703 + }, + { + "epoch": 0.9, + "learning_rate": 2.769427353095316e-09, + "logits/chosen": -3.2347233295440674, + "logits/rejected": -3.0716052055358887, + "logps/chosen": -246.82278442382812, + "logps/rejected": -2136.714111328125, + "loss": 0.3045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8048057556152344, + "rewards/margins": 9.227944374084473, + "rewards/rejected": -8.423138618469238, + "step": 704 + }, + { + "epoch": 0.9, + "learning_rate": 2.702087442409551e-09, + "logits/chosen": -3.250631809234619, + "logits/rejected": -3.06327223777771, + "logps/chosen": -244.91357421875, + "logps/rejected": -464.70477294921875, + "loss": 0.3551, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8344917297363281, + "rewards/margins": 2.545020341873169, + "rewards/rejected": -1.7105286121368408, + "step": 705 + }, + { + "epoch": 0.9, + "learning_rate": 2.6355535999253887e-09, + "logits/chosen": -3.214693784713745, + "logits/rejected": -3.0910043716430664, + "logps/chosen": -252.61737060546875, + "logps/rejected": -484.42181396484375, + "loss": 0.3239, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7403297424316406, + "rewards/margins": 2.4902138710021973, + "rewards/rejected": -1.7498841285705566, + "step": 706 + }, + { + "epoch": 0.9, + "learning_rate": 2.569826959536725e-09, + "logits/chosen": -3.255439519882202, + "logits/rejected": -3.2431435585021973, + "logps/chosen": -263.81341552734375, + "logps/rejected": -650.0031127929688, + "loss": 0.3141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9849510192871094, + "rewards/margins": 3.5759193897247314, + "rewards/rejected": -2.590968370437622, + "step": 707 + }, + { + "epoch": 0.9, + "learning_rate": 2.5049086413808373e-09, + "logits/chosen": -3.1921510696411133, + "logits/rejected": -3.0761971473693848, + "logps/chosen": -267.0522766113281, + "logps/rejected": -407.75537109375, + "loss": 0.3196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8645569086074829, + "rewards/margins": 2.1589157581329346, + "rewards/rejected": -1.2943588495254517, + "step": 708 + }, + { + "epoch": 0.9, + "learning_rate": 2.4407997518192725e-09, + "logits/chosen": -3.2593555450439453, + "logits/rejected": -3.1699414253234863, + "logps/chosen": -260.0784912109375, + "logps/rejected": -647.94970703125, + "loss": 0.3265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8564407229423523, + "rewards/margins": 4.062180995941162, + "rewards/rejected": -3.205740451812744, + "step": 709 + }, + { + "epoch": 0.9, + "learning_rate": 2.377501383419006e-09, + "logits/chosen": -3.2410964965820312, + "logits/rejected": -3.0671095848083496, + "logps/chosen": -242.4148712158203, + "logps/rejected": -418.3597412109375, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0211960077285767, + "rewards/margins": 2.225686550140381, + "rewards/rejected": -1.2044906616210938, + "step": 710 + }, + { + "epoch": 0.91, + "learning_rate": 2.3150146149338246e-09, + "logits/chosen": -3.167846202850342, + "logits/rejected": -3.0606489181518555, + "logps/chosen": -292.3104248046875, + "logps/rejected": -831.892333984375, + "loss": 0.3336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6666885614395142, + "rewards/margins": 3.3159656524658203, + "rewards/rejected": -2.6492767333984375, + "step": 711 + }, + { + "epoch": 0.91, + "learning_rate": 2.2533405112859226e-09, + "logits/chosen": -3.186563014984131, + "logits/rejected": -3.113769292831421, + "logps/chosen": -260.7579650878906, + "logps/rejected": -1137.25634765625, + "loss": 0.2947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8456329107284546, + "rewards/margins": 5.643365383148193, + "rewards/rejected": -4.797732353210449, + "step": 712 + }, + { + "epoch": 0.91, + "learning_rate": 2.1924801235477743e-09, + "logits/chosen": -3.219627857208252, + "logits/rejected": -3.1495845317840576, + "logps/chosen": -291.95318603515625, + "logps/rejected": -885.9666748046875, + "loss": 0.3059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8208115100860596, + "rewards/margins": 4.5671796798706055, + "rewards/rejected": -3.746368408203125, + "step": 713 + }, + { + "epoch": 0.91, + "learning_rate": 2.1324344889242117e-09, + "logits/chosen": -3.169576644897461, + "logits/rejected": -3.1402339935302734, + "logps/chosen": -265.7103271484375, + "logps/rejected": -475.8633728027344, + "loss": 0.3122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7189849615097046, + "rewards/margins": 2.7740044593811035, + "rewards/rejected": -2.0550193786621094, + "step": 714 + }, + { + "epoch": 0.91, + "learning_rate": 2.0732046307347427e-09, + "logits/chosen": -3.2140865325927734, + "logits/rejected": -3.1001248359680176, + "logps/chosen": -272.3880920410156, + "logps/rejected": -367.2703552246094, + "loss": 0.3371, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8797942996025085, + "rewards/margins": 2.264277696609497, + "rewards/rejected": -1.3844833374023438, + "step": 715 + }, + { + "epoch": 0.91, + "learning_rate": 2.0147915583961172e-09, + "logits/chosen": -3.1718530654907227, + "logits/rejected": -3.1196680068969727, + "logps/chosen": -228.8412322998047, + "logps/rejected": -532.2218017578125, + "loss": 0.2871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8847107291221619, + "rewards/margins": 3.3059539794921875, + "rewards/rejected": -2.421243190765381, + "step": 716 + }, + { + "epoch": 0.91, + "learning_rate": 1.9571962674051203e-09, + "logits/chosen": -3.2294726371765137, + "logits/rejected": -3.0172762870788574, + "logps/chosen": -272.65911865234375, + "logps/rejected": -384.3207702636719, + "loss": 0.3494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8934425115585327, + "rewards/margins": 2.087583065032959, + "rewards/rejected": -1.1941406726837158, + "step": 717 + }, + { + "epoch": 0.91, + "learning_rate": 1.900419739321629e-09, + "logits/chosen": -3.298821449279785, + "logits/rejected": -3.1427457332611084, + "logps/chosen": -236.90121459960938, + "logps/rejected": -1180.9755859375, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8000404834747314, + "rewards/margins": 6.29093074798584, + "rewards/rejected": -5.4908905029296875, + "step": 718 + }, + { + "epoch": 0.92, + "learning_rate": 1.8444629417518398e-09, + "logits/chosen": -3.2006731033325195, + "logits/rejected": -3.1228723526000977, + "logps/chosen": -272.462890625, + "logps/rejected": -612.383544921875, + "loss": 0.3396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8084762692451477, + "rewards/margins": 3.335432529449463, + "rewards/rejected": -2.52695631980896, + "step": 719 + }, + { + "epoch": 0.92, + "learning_rate": 1.7893268283318276e-09, + "logits/chosen": -3.216866970062256, + "logits/rejected": -3.1153573989868164, + "logps/chosen": -245.91763305664062, + "logps/rejected": -875.0869140625, + "loss": 0.2881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8272583484649658, + "rewards/margins": 4.482092380523682, + "rewards/rejected": -3.654834270477295, + "step": 720 + }, + { + "epoch": 0.92, + "learning_rate": 1.7350123387112558e-09, + "logits/chosen": -3.22952938079834, + "logits/rejected": -2.9958810806274414, + "logps/chosen": -249.60302734375, + "logps/rejected": -1147.4185791015625, + "loss": 0.3105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8361533880233765, + "rewards/margins": 4.67275333404541, + "rewards/rejected": -3.836599826812744, + "step": 721 + }, + { + "epoch": 0.92, + "learning_rate": 1.6815203985373728e-09, + "logits/chosen": -3.23962140083313, + "logits/rejected": -3.121837615966797, + "logps/chosen": -285.0335693359375, + "logps/rejected": -383.9288330078125, + "loss": 0.3166, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0709824562072754, + "rewards/margins": 2.4544386863708496, + "rewards/rejected": -1.3834564685821533, + "step": 722 + }, + { + "epoch": 0.92, + "learning_rate": 1.6288519194392614e-09, + "logits/chosen": -3.2653579711914062, + "logits/rejected": -3.1436805725097656, + "logps/chosen": -256.1167297363281, + "logps/rejected": -505.941162109375, + "loss": 0.3183, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9325249195098877, + "rewards/margins": 2.9981255531311035, + "rewards/rejected": -2.065600633621216, + "step": 723 + }, + { + "epoch": 0.92, + "learning_rate": 1.5770077990122643e-09, + "logits/chosen": -3.236294746398926, + "logits/rejected": -3.1561439037323, + "logps/chosen": -305.9063720703125, + "logps/rejected": -578.0526123046875, + "loss": 0.3707, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.094874620437622, + "rewards/margins": 3.114802837371826, + "rewards/rejected": -2.019927978515625, + "step": 724 + }, + { + "epoch": 0.92, + "learning_rate": 1.525988920802701e-09, + "logits/chosen": -3.2229294776916504, + "logits/rejected": -3.230250358581543, + "logps/chosen": -283.9895324707031, + "logps/rejected": -529.664306640625, + "loss": 0.3245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0059401988983154, + "rewards/margins": 3.540818691253662, + "rewards/rejected": -2.5348784923553467, + "step": 725 + }, + { + "epoch": 0.92, + "learning_rate": 1.4757961542928354e-09, + "logits/chosen": -3.2101402282714844, + "logits/rejected": -3.128431797027588, + "logps/chosen": -243.05935668945312, + "logps/rejected": -792.3958740234375, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6949394345283508, + "rewards/margins": 3.9738149642944336, + "rewards/rejected": -3.2788758277893066, + "step": 726 + }, + { + "epoch": 0.93, + "learning_rate": 1.426430354885999e-09, + "logits/chosen": -3.1376805305480957, + "logits/rejected": -3.1359786987304688, + "logps/chosen": -256.273681640625, + "logps/rejected": -712.1527709960938, + "loss": 0.2989, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7707718014717102, + "rewards/margins": 4.200982570648193, + "rewards/rejected": -3.430210828781128, + "step": 727 + }, + { + "epoch": 0.93, + "learning_rate": 1.377892363892097e-09, + "logits/chosen": -3.309478282928467, + "logits/rejected": -3.111279010772705, + "logps/chosen": -253.35948181152344, + "logps/rejected": -1276.223876953125, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7385421991348267, + "rewards/margins": 5.485346794128418, + "rewards/rejected": -4.746805191040039, + "step": 728 + }, + { + "epoch": 0.93, + "learning_rate": 1.330183008513186e-09, + "logits/chosen": -3.2613651752471924, + "logits/rejected": -3.105022430419922, + "logps/chosen": -243.84579467773438, + "logps/rejected": -718.3277587890625, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8094383478164673, + "rewards/margins": 4.109737396240234, + "rewards/rejected": -3.3002991676330566, + "step": 729 + }, + { + "epoch": 0.93, + "learning_rate": 1.283303101829425e-09, + "logits/chosen": -3.2587573528289795, + "logits/rejected": -3.105581045150757, + "logps/chosen": -277.1041564941406, + "logps/rejected": -827.7388916015625, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7477248907089233, + "rewards/margins": 3.9269609451293945, + "rewards/rejected": -3.1792359352111816, + "step": 730 + }, + { + "epoch": 0.93, + "learning_rate": 1.2372534427852078e-09, + "logits/chosen": -3.2280731201171875, + "logits/rejected": -3.061709403991699, + "logps/chosen": -256.53936767578125, + "logps/rejected": -661.32568359375, + "loss": 0.2975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9727089405059814, + "rewards/margins": 4.25475549697876, + "rewards/rejected": -3.2820465564727783, + "step": 731 + }, + { + "epoch": 0.93, + "learning_rate": 1.1920348161755412e-09, + "logits/chosen": -3.289909839630127, + "logits/rejected": -3.166923999786377, + "logps/chosen": -254.65113830566406, + "logps/rejected": -807.712158203125, + "loss": 0.2933, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7489288449287415, + "rewards/margins": 4.085577487945557, + "rewards/rejected": -3.336648464202881, + "step": 732 + }, + { + "epoch": 0.93, + "learning_rate": 1.1476479926326942e-09, + "logits/chosen": -3.2336442470550537, + "logits/rejected": -3.100999116897583, + "logps/chosen": -292.4952392578125, + "logps/rejected": -1293.2177734375, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0570358037948608, + "rewards/margins": 5.805940628051758, + "rewards/rejected": -4.748904228210449, + "step": 733 + }, + { + "epoch": 0.94, + "learning_rate": 1.1040937286130259e-09, + "logits/chosen": -3.2463982105255127, + "logits/rejected": -3.071342706680298, + "logps/chosen": -255.5460662841797, + "logps/rejected": -372.39337158203125, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.958574652671814, + "rewards/margins": 2.0186972618103027, + "rewards/rejected": -1.0601227283477783, + "step": 734 + }, + { + "epoch": 0.94, + "learning_rate": 1.061372766384111e-09, + "logits/chosen": -3.213210105895996, + "logits/rejected": -3.1522679328918457, + "logps/chosen": -263.88446044921875, + "logps/rejected": -589.6300659179688, + "loss": 0.312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8807121515274048, + "rewards/margins": 3.306874990463257, + "rewards/rejected": -2.4261627197265625, + "step": 735 + }, + { + "epoch": 0.94, + "learning_rate": 1.0194858340121182e-09, + "logits/chosen": -3.238389492034912, + "logits/rejected": -3.079584836959839, + "logps/chosen": -280.17816162109375, + "logps/rejected": -662.317626953125, + "loss": 0.3381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7401748895645142, + "rewards/margins": 3.5877151489257812, + "rewards/rejected": -2.8475403785705566, + "step": 736 + }, + { + "epoch": 0.94, + "learning_rate": 9.784336453493414e-10, + "logits/chosen": -3.164694309234619, + "logits/rejected": -3.1305060386657715, + "logps/chosen": -311.02777099609375, + "logps/rejected": -947.8565673828125, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6634002923965454, + "rewards/margins": 5.272473335266113, + "rewards/rejected": -4.609072685241699, + "step": 737 + }, + { + "epoch": 0.94, + "learning_rate": 9.38216900022104e-10, + "logits/chosen": -3.232832193374634, + "logits/rejected": -3.082284927368164, + "logps/chosen": -285.7615051269531, + "logps/rejected": -869.1145629882812, + "loss": 0.3208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8284866809844971, + "rewards/margins": 4.568565368652344, + "rewards/rejected": -3.740078926086426, + "step": 738 + }, + { + "epoch": 0.94, + "learning_rate": 8.988362834187746e-10, + "logits/chosen": -3.2503814697265625, + "logits/rejected": -3.1051363945007324, + "logps/chosen": -262.52850341796875, + "logps/rejected": -555.1604614257812, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.846479058265686, + "rewards/margins": 2.997823476791382, + "rewards/rejected": -2.1513442993164062, + "step": 739 + }, + { + "epoch": 0.94, + "learning_rate": 8.602924666781252e-10, + "logits/chosen": -3.208521842956543, + "logits/rejected": -3.055482864379883, + "logps/chosen": -275.65423583984375, + "logps/rejected": -507.0080871582031, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8825485706329346, + "rewards/margins": 2.7211334705352783, + "rewards/rejected": -1.8385848999023438, + "step": 740 + }, + { + "epoch": 0.94, + "learning_rate": 8.225861066778805e-10, + "logits/chosen": -3.2364768981933594, + "logits/rejected": -2.880552291870117, + "logps/chosen": -231.6939239501953, + "logps/rejected": -1321.1400146484375, + "loss": 0.333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8020065426826477, + "rewards/margins": 5.477400302886963, + "rewards/rejected": -4.675394058227539, + "step": 741 + }, + { + "epoch": 0.95, + "learning_rate": 7.857178460235148e-10, + "logits/chosen": -3.241684913635254, + "logits/rejected": -3.1109871864318848, + "logps/chosen": -226.60162353515625, + "logps/rejected": -552.8715209960938, + "loss": 0.299, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9359893798828125, + "rewards/margins": 2.977935791015625, + "rewards/rejected": -2.0419464111328125, + "step": 742 + }, + { + "epoch": 0.95, + "learning_rate": 7.496883130373166e-10, + "logits/chosen": -3.182877540588379, + "logits/rejected": -3.062311887741089, + "logps/chosen": -230.58551025390625, + "logps/rejected": -1019.1348876953125, + "loss": 0.303, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8302993774414062, + "rewards/margins": 4.527284622192383, + "rewards/rejected": -3.6969847679138184, + "step": 743 + }, + { + "epoch": 0.95, + "learning_rate": 7.144981217476753e-10, + "logits/chosen": -3.156250476837158, + "logits/rejected": -3.109238624572754, + "logps/chosen": -253.8656768798828, + "logps/rejected": -530.2658081054688, + "loss": 0.3156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9162895679473877, + "rewards/margins": 3.144948720932007, + "rewards/rejected": -2.228659152984619, + "step": 744 + }, + { + "epoch": 0.95, + "learning_rate": 6.801478718785947e-10, + "logits/chosen": -3.2646121978759766, + "logits/rejected": -3.088197708129883, + "logps/chosen": -256.6007995605469, + "logps/rejected": -696.5112915039062, + "loss": 0.2971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8193275332450867, + "rewards/margins": 3.7385597229003906, + "rewards/rejected": -2.9192323684692383, + "step": 745 + }, + { + "epoch": 0.95, + "learning_rate": 6.46638148839529e-10, + "logits/chosen": -3.2468204498291016, + "logits/rejected": -3.064473867416382, + "logps/chosen": -247.80767822265625, + "logps/rejected": -329.44915771484375, + "loss": 0.3439, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8766357898712158, + "rewards/margins": 2.050062656402588, + "rewards/rejected": -1.173426866531372, + "step": 746 + }, + { + "epoch": 0.95, + "learning_rate": 6.139695237153297e-10, + "logits/chosen": -3.2067785263061523, + "logits/rejected": -3.0040407180786133, + "logps/chosen": -287.16973876953125, + "logps/rejected": -296.9962158203125, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7208831906318665, + "rewards/margins": 1.743922472000122, + "rewards/rejected": -1.0230392217636108, + "step": 747 + }, + { + "epoch": 0.95, + "learning_rate": 5.821425532565815e-10, + "logits/chosen": -3.2652530670166016, + "logits/rejected": -3.159440755844116, + "logps/chosen": -241.41661071777344, + "logps/rejected": -804.9925537109375, + "loss": 0.3118, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7387901544570923, + "rewards/margins": 4.094674110412598, + "rewards/rejected": -3.355883836746216, + "step": 748 + }, + { + "epoch": 0.95, + "learning_rate": 5.511577798700596e-10, + "logits/chosen": -3.267746925354004, + "logits/rejected": -3.145688533782959, + "logps/chosen": -257.8382873535156, + "logps/rejected": -575.3578491210938, + "loss": 0.316, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8595382571220398, + "rewards/margins": 3.3007750511169434, + "rewards/rejected": -2.441236972808838, + "step": 749 + }, + { + "epoch": 0.96, + "learning_rate": 5.21015731609531e-10, + "logits/chosen": -3.1853904724121094, + "logits/rejected": -2.9591662883758545, + "logps/chosen": -260.99127197265625, + "logps/rejected": -1254.229248046875, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0995872020721436, + "rewards/margins": 5.246209144592285, + "rewards/rejected": -4.1466217041015625, + "step": 750 + }, + { + "epoch": 0.96, + "learning_rate": 4.91716922166735e-10, + "logits/chosen": -3.244603157043457, + "logits/rejected": -3.166945457458496, + "logps/chosen": -286.13641357421875, + "logps/rejected": -739.1593017578125, + "loss": 0.3194, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8903793096542358, + "rewards/margins": 3.6796278953552246, + "rewards/rejected": -2.7892487049102783, + "step": 751 + }, + { + "epoch": 0.96, + "learning_rate": 4.6326185086260626e-10, + "logits/chosen": -3.1955323219299316, + "logits/rejected": -3.130490779876709, + "logps/chosen": -266.62322998046875, + "logps/rejected": -356.099365234375, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9017212390899658, + "rewards/margins": 2.415461540222168, + "rewards/rejected": -1.5137405395507812, + "step": 752 + }, + { + "epoch": 0.96, + "learning_rate": 4.3565100263881493e-10, + "logits/chosen": -3.2231783866882324, + "logits/rejected": -2.985842704772949, + "logps/chosen": -327.181640625, + "logps/rejected": -855.7127685546875, + "loss": 0.3557, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9265106320381165, + "rewards/margins": 3.809525966644287, + "rewards/rejected": -2.8830153942108154, + "step": 753 + }, + { + "epoch": 0.96, + "learning_rate": 4.0888484804945666e-10, + "logits/chosen": -3.2640109062194824, + "logits/rejected": -3.2029924392700195, + "logps/chosen": -270.47760009765625, + "logps/rejected": -480.70367431640625, + "loss": 0.32, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7971054315567017, + "rewards/margins": 2.925245761871338, + "rewards/rejected": -2.128140449523926, + "step": 754 + }, + { + "epoch": 0.96, + "learning_rate": 3.829638432530702e-10, + "logits/chosen": -3.254096031188965, + "logits/rejected": -3.203315019607544, + "logps/chosen": -306.3249206542969, + "logps/rejected": -873.78564453125, + "loss": 0.3015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7978881597518921, + "rewards/margins": 4.810779094696045, + "rewards/rejected": -4.012890815734863, + "step": 755 + }, + { + "epoch": 0.96, + "learning_rate": 3.578884300048157e-10, + "logits/chosen": -3.249399185180664, + "logits/rejected": -3.1269853115081787, + "logps/chosen": -264.9843444824219, + "logps/rejected": -524.0511474609375, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9120041131973267, + "rewards/margins": 2.8901567459106445, + "rewards/rejected": -1.9781525135040283, + "step": 756 + }, + { + "epoch": 0.96, + "learning_rate": 3.3365903564899764e-10, + "logits/chosen": -3.2620954513549805, + "logits/rejected": -3.149833917617798, + "logps/chosen": -289.2571105957031, + "logps/rejected": -955.1342163085938, + "loss": 0.3368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9744446277618408, + "rewards/margins": 5.228189468383789, + "rewards/rejected": -4.253744602203369, + "step": 757 + }, + { + "epoch": 0.97, + "learning_rate": 3.102760731117593e-10, + "logits/chosen": -3.2373046875, + "logits/rejected": -3.053622245788574, + "logps/chosen": -243.01840209960938, + "logps/rejected": -467.41680908203125, + "loss": 0.3328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8981903195381165, + "rewards/margins": 2.7906312942504883, + "rewards/rejected": -1.892440915107727, + "step": 758 + }, + { + "epoch": 0.97, + "learning_rate": 2.877399408940273e-10, + "logits/chosen": -3.165796995162964, + "logits/rejected": -3.0296781063079834, + "logps/chosen": -250.68980407714844, + "logps/rejected": -236.2838134765625, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.707440197467804, + "rewards/margins": 1.3485679626464844, + "rewards/rejected": -0.6411277651786804, + "step": 759 + }, + { + "epoch": 0.97, + "learning_rate": 2.660510230647672e-10, + "logits/chosen": -3.234842300415039, + "logits/rejected": -3.1038196086883545, + "logps/chosen": -262.67633056640625, + "logps/rejected": -1042.1624755859375, + "loss": 0.2968, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9485428333282471, + "rewards/margins": 5.209013938903809, + "rewards/rejected": -4.260470867156982, + "step": 760 + }, + { + "epoch": 0.97, + "learning_rate": 2.452096892543776e-10, + "logits/chosen": -3.2267110347747803, + "logits/rejected": -3.1766977310180664, + "logps/chosen": -274.51904296875, + "logps/rejected": -700.71484375, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7510055303573608, + "rewards/margins": 3.778041124343872, + "rewards/rejected": -3.027035713195801, + "step": 761 + }, + { + "epoch": 0.97, + "learning_rate": 2.2521629464844484e-10, + "logits/chosen": -3.206711769104004, + "logits/rejected": -3.103701591491699, + "logps/chosen": -274.660400390625, + "logps/rejected": -638.694091796875, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9735908508300781, + "rewards/margins": 3.5707406997680664, + "rewards/rejected": -2.5971498489379883, + "step": 762 + }, + { + "epoch": 0.97, + "learning_rate": 2.0607117998165947e-10, + "logits/chosen": -3.2257604598999023, + "logits/rejected": -3.0436644554138184, + "logps/chosen": -257.8638610839844, + "logps/rejected": -283.80218505859375, + "loss": 0.3179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8619521856307983, + "rewards/margins": 1.853785753250122, + "rewards/rejected": -0.991833508014679, + "step": 763 + }, + { + "epoch": 0.97, + "learning_rate": 1.8777467153202052e-10, + "logits/chosen": -3.210765838623047, + "logits/rejected": -3.146442413330078, + "logps/chosen": -227.00088500976562, + "logps/rejected": -419.3211364746094, + "loss": 0.2938, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8957474231719971, + "rewards/margins": 2.653237819671631, + "rewards/rejected": -1.7574905157089233, + "step": 764 + }, + { + "epoch": 0.97, + "learning_rate": 1.703270811152624e-10, + "logits/chosen": -3.316166877746582, + "logits/rejected": -3.1670713424682617, + "logps/chosen": -255.31585693359375, + "logps/rejected": -410.19525146484375, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7974869012832642, + "rewards/margins": 2.613285779953003, + "rewards/rejected": -1.8157989978790283, + "step": 765 + }, + { + "epoch": 0.98, + "learning_rate": 1.5372870607956445e-10, + "logits/chosen": -3.2683768272399902, + "logits/rejected": -3.053925037384033, + "logps/chosen": -256.74609375, + "logps/rejected": -411.16351318359375, + "loss": 0.3225, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8076279163360596, + "rewards/margins": 2.4199233055114746, + "rewards/rejected": -1.6122955083847046, + "step": 766 + }, + { + "epoch": 0.98, + "learning_rate": 1.379798293004497e-10, + "logits/chosen": -3.2351346015930176, + "logits/rejected": -3.2115957736968994, + "logps/chosen": -245.04873657226562, + "logps/rejected": -935.33203125, + "loss": 0.2823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.703155517578125, + "rewards/margins": 4.095498561859131, + "rewards/rejected": -3.392343044281006, + "step": 767 + }, + { + "epoch": 0.98, + "learning_rate": 1.230807191760108e-10, + "logits/chosen": -3.2479352951049805, + "logits/rejected": -3.109595775604248, + "logps/chosen": -268.040771484375, + "logps/rejected": -667.1778564453125, + "loss": 0.3264, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8994621634483337, + "rewards/margins": 3.789296865463257, + "rewards/rejected": -2.8898346424102783, + "step": 768 + }, + { + "epoch": 0.98, + "learning_rate": 1.0903162962228596e-10, + "logits/chosen": -3.189232349395752, + "logits/rejected": -3.100224256515503, + "logps/chosen": -305.913330078125, + "logps/rejected": -527.5679321289062, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.028944492340088, + "rewards/margins": 3.0225632190704346, + "rewards/rejected": -1.9936187267303467, + "step": 769 + }, + { + "epoch": 0.98, + "learning_rate": 9.583280006895678e-11, + "logits/chosen": -3.247292995452881, + "logits/rejected": -3.1173062324523926, + "logps/chosen": -239.53309631347656, + "logps/rejected": -995.3394165039062, + "loss": 0.3199, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8782303333282471, + "rewards/margins": 4.7225141525268555, + "rewards/rejected": -3.8442840576171875, + "step": 770 + }, + { + "epoch": 0.98, + "learning_rate": 8.348445545527938e-11, + "logits/chosen": -3.2200331687927246, + "logits/rejected": -3.1576192378997803, + "logps/chosen": -261.85809326171875, + "logps/rejected": -372.19110107421875, + "loss": 0.3349, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0591439008712769, + "rewards/margins": 2.6060404777526855, + "rewards/rejected": -1.5468964576721191, + "step": 771 + }, + { + "epoch": 0.98, + "learning_rate": 7.198680622621523e-11, + "logits/chosen": -3.1871323585510254, + "logits/rejected": -3.142486810684204, + "logps/chosen": -261.7677001953125, + "logps/rejected": -622.2526245117188, + "loss": 0.3102, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7751541137695312, + "rewards/margins": 2.960906982421875, + "rewards/rejected": -2.1857528686523438, + "step": 772 + }, + { + "epoch": 0.98, + "learning_rate": 6.134004832888396e-11, + "logits/chosen": -3.3127856254577637, + "logits/rejected": -2.9899449348449707, + "logps/chosen": -251.42889404296875, + "logps/rejected": -488.830078125, + "loss": 0.3309, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8592239618301392, + "rewards/margins": 2.6619162559509277, + "rewards/rejected": -1.8026924133300781, + "step": 773 + }, + { + "epoch": 0.99, + "learning_rate": 5.1544363209199414e-11, + "logits/chosen": -3.251497745513916, + "logits/rejected": -3.126047372817993, + "logps/chosen": -259.3839111328125, + "logps/rejected": -955.2108764648438, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.060316562652588, + "rewards/margins": 4.6081862449646, + "rewards/rejected": -3.5478696823120117, + "step": 774 + }, + { + "epoch": 0.99, + "learning_rate": 4.25999178087888e-11, + "logits/chosen": -3.199685573577881, + "logits/rejected": -3.1156601905822754, + "logps/chosen": -254.69680786132812, + "logps/rejected": -621.605224609375, + "loss": 0.2904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6734542846679688, + "rewards/margins": 3.454329013824463, + "rewards/rejected": -2.780874729156494, + "step": 775 + }, + { + "epoch": 0.99, + "learning_rate": 3.450686456213381e-11, + "logits/chosen": -3.2396130561828613, + "logits/rejected": -3.118246555328369, + "logps/chosen": -267.993896484375, + "logps/rejected": -935.10546875, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8112396597862244, + "rewards/margins": 4.463354587554932, + "rewards/rejected": -3.6521148681640625, + "step": 776 + }, + { + "epoch": 0.99, + "learning_rate": 2.7265341393983843e-11, + "logits/chosen": -3.210446357727051, + "logits/rejected": -3.1169471740722656, + "logps/chosen": -245.7869873046875, + "logps/rejected": -625.0173950195312, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.803015947341919, + "rewards/margins": 3.6322760581970215, + "rewards/rejected": -2.8292603492736816, + "step": 777 + }, + { + "epoch": 0.99, + "learning_rate": 2.0875471717013425e-11, + "logits/chosen": -3.315657615661621, + "logits/rejected": -3.1861448287963867, + "logps/chosen": -243.48379516601562, + "logps/rejected": -513.0025634765625, + "loss": 0.3175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9368667602539062, + "rewards/margins": 2.8556365966796875, + "rewards/rejected": -1.9187698364257812, + "step": 778 + }, + { + "epoch": 0.99, + "learning_rate": 1.5337364429696132e-11, + "logits/chosen": -3.2512288093566895, + "logits/rejected": -3.117311477661133, + "logps/chosen": -273.45550537109375, + "logps/rejected": -669.3309326171875, + "loss": 0.3058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7838805913925171, + "rewards/margins": 3.687072992324829, + "rewards/rejected": -2.9031922817230225, + "step": 779 + }, + { + "epoch": 0.99, + "learning_rate": 1.0651113914472709e-11, + "logits/chosen": -3.2500853538513184, + "logits/rejected": -3.2134203910827637, + "logps/chosen": -244.47789001464844, + "logps/rejected": -798.9764404296875, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.976574718952179, + "rewards/margins": 4.795926094055176, + "rewards/rejected": -3.8193511962890625, + "step": 780 + }, + { + "epoch": 0.99, + "learning_rate": 6.8168000361246145e-12, + "logits/chosen": -3.189894199371338, + "logits/rejected": -3.110352039337158, + "logps/chosen": -233.24119567871094, + "logps/rejected": -500.050537109375, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.78103107213974, + "rewards/margins": 2.9030051231384277, + "rewards/rejected": -2.121974229812622, + "step": 781 + }, + { + "epoch": 1.0, + "learning_rate": 3.834488140419534e-12, + "logits/chosen": -3.178893566131592, + "logits/rejected": -3.146693706512451, + "logps/chosen": -277.33795166015625, + "logps/rejected": -757.741943359375, + "loss": 0.3073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9782577753067017, + "rewards/margins": 4.212095737457275, + "rewards/rejected": -3.2338380813598633, + "step": 782 + }, + { + "epoch": 1.0, + "learning_rate": 1.7042290529956093e-12, + "logits/chosen": -3.1718039512634277, + "logits/rejected": -3.0512022972106934, + "logps/chosen": -327.13330078125, + "logps/rejected": -481.5792236328125, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8093246221542358, + "rewards/margins": 2.5383574962615967, + "rewards/rejected": -1.7290328741073608, + "step": 783 + }, + { + "epoch": 1.0, + "learning_rate": 4.2605907851211985e-13, + "logits/chosen": -3.2299277782440186, + "logits/rejected": -3.134103775024414, + "logps/chosen": -263.41162109375, + "logps/rejected": -669.1932373046875, + "loss": 0.3364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8978004455566406, + "rewards/margins": 4.256969451904297, + "rewards/rejected": -3.3591690063476562, + "step": 784 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -3.223703384399414, + "logits/rejected": -3.0697996616363525, + "logps/chosen": -283.63037109375, + "logps/rejected": -1069.81103515625, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8890656232833862, + "rewards/margins": 4.728815078735352, + "rewards/rejected": -3.839749336242676, + "step": 785 + }, + { + "epoch": 1.0, + "step": 785, + "total_flos": 0.0, + "train_loss": 0.40759647632860074, + "train_runtime": 1388.9394, + "train_samples_per_second": 4.519, + "train_steps_per_second": 0.565 + } + ], + "logging_steps": 1.0, + "max_steps": 785, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..193a5109a01a66c33fd80812e287ce6cd1102209 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "down_proj", + "o_proj", + "gate_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e6844ff1a4d01af77c388ef7c3696c30fd4ae48 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515e94b8f46d40b602f31bb53faaa705e5de030ce0bf2e2f804ac95283df583c +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..19739cc1c9dd179cce82dcbe2082b06a4b624a5a --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_lora/trainer_state.json @@ -0,0 +1,7506 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 534, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.88235294117647e-09, + "logits/chosen": -3.0411376953125, + "logits/rejected": -3.0689902305603027, + "logps/chosen": -184.51116943359375, + "logps/rejected": -655.3782958984375, + "loss": 0.8963, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.014492797665297985, + "rewards/margins": -0.02717895433306694, + "rewards/rejected": 0.012686156667768955, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.176470588235294e-08, + "logits/chosen": -3.0553126335144043, + "logits/rejected": -3.1174941062927246, + "logps/chosen": -137.346923828125, + "logps/rejected": -536.9688110351562, + "loss": 0.8739, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.023029327392578125, + "rewards/margins": -0.06814346462488174, + "rewards/rejected": 0.04511413723230362, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.7647058823529412e-08, + "logits/chosen": -3.0930190086364746, + "logits/rejected": -3.06168794631958, + "logps/chosen": -137.61471557617188, + "logps/rejected": -444.93096923828125, + "loss": 0.8792, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.015667343512177467, + "rewards/margins": 0.024943161755800247, + "rewards/rejected": -0.00927581824362278, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 2.352941176470588e-08, + "logits/chosen": -3.1048102378845215, + "logits/rejected": -3.0810656547546387, + "logps/chosen": -153.20172119140625, + "logps/rejected": -396.3240966796875, + "loss": 0.8591, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0012687677517533302, + "rewards/margins": 0.00675659254193306, + "rewards/rejected": -0.00802536029368639, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 2.941176470588235e-08, + "logits/chosen": -3.100088119506836, + "logits/rejected": -3.0949032306671143, + "logps/chosen": -160.4691162109375, + "logps/rejected": -633.2630615234375, + "loss": 0.845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017342377454042435, + "rewards/margins": 0.11436843872070312, + "rewards/rejected": -0.13171082735061646, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 3.5294117647058824e-08, + "logits/chosen": -3.096184730529785, + "logits/rejected": -3.13116192817688, + "logps/chosen": -141.38934326171875, + "logps/rejected": -478.851806640625, + "loss": 0.8395, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.02450599893927574, + "rewards/margins": -0.02246132120490074, + "rewards/rejected": -0.002044677734375, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 4.1176470588235293e-08, + "logits/chosen": -3.0861446857452393, + "logits/rejected": -3.108443021774292, + "logps/chosen": -191.34518432617188, + "logps/rejected": -457.20770263671875, + "loss": 0.9386, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.018772125244140625, + "rewards/margins": -0.09078750759363174, + "rewards/rejected": 0.07201538234949112, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 4.705882352941176e-08, + "logits/chosen": -3.025179386138916, + "logits/rejected": -2.990495204925537, + "logps/chosen": -149.64447021484375, + "logps/rejected": -405.57373046875, + "loss": 0.8835, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.013240051455795765, + "rewards/margins": -0.084991455078125, + "rewards/rejected": 0.07175140082836151, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 5.294117647058823e-08, + "logits/chosen": -3.0792737007141113, + "logits/rejected": -3.016489267349243, + "logps/chosen": -152.46737670898438, + "logps/rejected": -445.66357421875, + "loss": 0.8801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.010989761911332607, + "rewards/margins": 0.07000008225440979, + "rewards/rejected": -0.05901031568646431, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 5.88235294117647e-08, + "logits/chosen": -3.14316463470459, + "logits/rejected": -3.1992883682250977, + "logps/chosen": -182.32083129882812, + "logps/rejected": -773.039306640625, + "loss": 0.9042, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.014411162585020065, + "rewards/margins": 0.06346511840820312, + "rewards/rejected": -0.04905395209789276, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 6.470588235294118e-08, + "logits/chosen": -3.0817432403564453, + "logits/rejected": -3.017056465148926, + "logps/chosen": -164.07785034179688, + "logps/rejected": -215.2300567626953, + "loss": 0.8708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013666534796357155, + "rewards/margins": 0.016265105456113815, + "rewards/rejected": -0.02993164025247097, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 7.058823529411765e-08, + "logits/chosen": -3.094412326812744, + "logits/rejected": -3.0485363006591797, + "logps/chosen": -125.90737915039062, + "logps/rejected": -338.43621826171875, + "loss": 0.844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030100632458925247, + "rewards/margins": 0.07515525817871094, + "rewards/rejected": -0.04505462571978569, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 7.64705882352941e-08, + "logits/chosen": -3.0921192169189453, + "logits/rejected": -3.122070789337158, + "logps/chosen": -172.8285369873047, + "logps/rejected": -531.8868408203125, + "loss": 0.854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014007950201630592, + "rewards/margins": 0.05099983140826225, + "rewards/rejected": -0.03699188306927681, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 8.235294117647059e-08, + "logits/chosen": -3.036288261413574, + "logits/rejected": -3.0849173069000244, + "logps/chosen": -105.21189880371094, + "logps/rejected": -231.66683959960938, + "loss": 0.8397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03608245775103569, + "rewards/margins": 0.009260558523237705, + "rewards/rejected": -0.04534301906824112, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 8.823529411764706e-08, + "logits/chosen": -3.0623645782470703, + "logits/rejected": -3.0680785179138184, + "logps/chosen": -100.0694808959961, + "logps/rejected": -204.0735626220703, + "loss": 0.8029, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.005744553171098232, + "rewards/margins": -0.015510939061641693, + "rewards/rejected": 0.0212554931640625, + "step": 15 + }, + { + "epoch": 0.03, + "learning_rate": 9.411764705882353e-08, + "logits/chosen": -2.9746618270874023, + "logits/rejected": -2.9653263092041016, + "logps/chosen": -99.60082244873047, + "logps/rejected": -324.7501525878906, + "loss": 0.8104, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.040557861328125, + "rewards/margins": 0.13975295424461365, + "rewards/rejected": -0.09919510036706924, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -3.0572993755340576, + "logits/rejected": -3.0215487480163574, + "logps/chosen": -155.76748657226562, + "logps/rejected": -221.12490844726562, + "loss": 0.845, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.025026703253388405, + "rewards/margins": -0.034951020032167435, + "rewards/rejected": 0.009924315847456455, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 9.999907688176173e-08, + "logits/chosen": -3.0391039848327637, + "logits/rejected": -3.1367557048797607, + "logps/chosen": -145.89862060546875, + "logps/rejected": -909.7462158203125, + "loss": 0.8301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019702911376953125, + "rewards/margins": 0.1000007688999176, + "rewards/rejected": -0.08029785752296448, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 9.999630756113277e-08, + "logits/chosen": -3.0479109287261963, + "logits/rejected": -3.0844457149505615, + "logps/chosen": -141.04736328125, + "logps/rejected": -303.6735534667969, + "loss": 0.7922, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024964142590761185, + "rewards/margins": 0.08844299614429474, + "rewards/rejected": -0.06347884982824326, + "step": 19 + }, + { + "epoch": 0.04, + "learning_rate": 9.999169214036957e-08, + "logits/chosen": -3.1172924041748047, + "logits/rejected": -3.055333137512207, + "logps/chosen": -137.72853088378906, + "logps/rejected": -527.1491088867188, + "loss": 0.8343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005391692742705345, + "rewards/margins": 0.061435699462890625, + "rewards/rejected": -0.05604400858283043, + "step": 20 + }, + { + "epoch": 0.04, + "learning_rate": 9.998523078989529e-08, + "logits/chosen": -3.1175358295440674, + "logits/rejected": -3.1071043014526367, + "logps/chosen": -154.10000610351562, + "logps/rejected": -455.3203430175781, + "loss": 0.8361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010537720285356045, + "rewards/margins": 0.00430602952837944, + "rewards/rejected": -0.014843749813735485, + "step": 21 + }, + { + "epoch": 0.04, + "learning_rate": 9.997692374829352e-08, + "logits/chosen": -3.0928573608398438, + "logits/rejected": -3.1630825996398926, + "logps/chosen": -127.1988525390625, + "logps/rejected": -363.9710693359375, + "loss": 0.8368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021335983648896217, + "rewards/margins": 0.1085536926984787, + "rewards/rejected": -0.08721771091222763, + "step": 22 + }, + { + "epoch": 0.04, + "learning_rate": 9.996677132229956e-08, + "logits/chosen": -3.098963499069214, + "logits/rejected": -3.107921838760376, + "logps/chosen": -127.81380462646484, + "logps/rejected": -383.45599365234375, + "loss": 0.7954, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0014537812676280737, + "rewards/margins": 0.051894765347242355, + "rewards/rejected": -0.05044098198413849, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 9.995477388678896e-08, + "logits/chosen": -3.167232036590576, + "logits/rejected": -3.082847833633423, + "logps/chosen": -127.94723510742188, + "logps/rejected": -278.6302185058594, + "loss": 0.7794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.024155426770448685, + "rewards/margins": 0.047751620411872864, + "rewards/rejected": -0.02359619177877903, + "step": 24 + }, + { + "epoch": 0.05, + "learning_rate": 9.994093188476381e-08, + "logits/chosen": -3.0381908416748047, + "logits/rejected": -3.079357624053955, + "logps/chosen": -114.33000946044922, + "logps/rejected": -433.8147888183594, + "loss": 0.8129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004452514462172985, + "rewards/margins": 0.09935913234949112, + "rewards/rejected": -0.10381165146827698, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 9.992524582733627e-08, + "logits/chosen": -3.100212335586548, + "logits/rejected": -3.077624797821045, + "logps/chosen": -178.69337463378906, + "logps/rejected": -713.02783203125, + "loss": 0.7715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01014099083840847, + "rewards/margins": 0.07089996337890625, + "rewards/rejected": -0.06075897440314293, + "step": 26 + }, + { + "epoch": 0.05, + "learning_rate": 9.99077162937098e-08, + "logits/chosen": -3.07360577583313, + "logits/rejected": -3.075355052947998, + "logps/chosen": -152.56033325195312, + "logps/rejected": -338.12982177734375, + "loss": 0.7973, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06078987568616867, + "rewards/margins": 0.1077907532453537, + "rewards/rejected": -0.047000885009765625, + "step": 27 + }, + { + "epoch": 0.05, + "learning_rate": 9.988834393115765e-08, + "logits/chosen": -3.052241563796997, + "logits/rejected": -3.0480966567993164, + "logps/chosen": -163.875732421875, + "logps/rejected": -411.67352294921875, + "loss": 0.7624, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02096099779009819, + "rewards/margins": 0.14486999809741974, + "rewards/rejected": -0.12390899658203125, + "step": 28 + }, + { + "epoch": 0.05, + "learning_rate": 9.986712945499908e-08, + "logits/chosen": -3.1006131172180176, + "logits/rejected": -3.085541248321533, + "logps/chosen": -174.07553100585938, + "logps/rejected": -538.4234619140625, + "loss": 0.8008, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.018138885498046875, + "rewards/margins": 0.09250259399414062, + "rewards/rejected": -0.1106414794921875, + "step": 29 + }, + { + "epoch": 0.06, + "learning_rate": 9.984407364857291e-08, + "logits/chosen": -3.12211537361145, + "logits/rejected": -3.1720988750457764, + "logps/chosen": -189.80044555664062, + "logps/rejected": -425.6622619628906, + "loss": 0.8021, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021506499499082565, + "rewards/margins": 0.1858619749546051, + "rewards/rejected": -0.16435547173023224, + "step": 30 + }, + { + "epoch": 0.06, + "learning_rate": 9.98191773632085e-08, + "logits/chosen": -3.0976040363311768, + "logits/rejected": -3.103477954864502, + "logps/chosen": -117.91120147705078, + "logps/rejected": -354.82708740234375, + "loss": 0.7642, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021997833624482155, + "rewards/margins": 0.15422439575195312, + "rewards/rejected": -0.13222657144069672, + "step": 31 + }, + { + "epoch": 0.06, + "learning_rate": 9.979244151819453e-08, + "logits/chosen": -3.040595531463623, + "logits/rejected": -3.0555477142333984, + "logps/chosen": -155.21815490722656, + "logps/rejected": -385.66229248046875, + "loss": 0.7798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02458992227911949, + "rewards/margins": 0.10576210170984268, + "rewards/rejected": -0.08117218315601349, + "step": 32 + }, + { + "epoch": 0.06, + "learning_rate": 9.976386710074478e-08, + "logits/chosen": -3.0906858444213867, + "logits/rejected": -3.096956729888916, + "logps/chosen": -114.12242126464844, + "logps/rejected": -319.24713134765625, + "loss": 0.7314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0012687686830759048, + "rewards/margins": 0.07241897284984589, + "rewards/rejected": -0.07115020602941513, + "step": 33 + }, + { + "epoch": 0.06, + "learning_rate": 9.97334551659619e-08, + "logits/chosen": -3.0730574131011963, + "logits/rejected": -3.03106689453125, + "logps/chosen": -133.31800842285156, + "logps/rejected": -681.562744140625, + "loss": 0.7105, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03903923183679581, + "rewards/margins": 0.26671257615089417, + "rewards/rejected": -0.22767335176467896, + "step": 34 + }, + { + "epoch": 0.07, + "learning_rate": 9.970120683679837e-08, + "logits/chosen": -3.0721654891967773, + "logits/rejected": -3.130153179168701, + "logps/chosen": -125.16744232177734, + "logps/rejected": -435.24407958984375, + "loss": 0.6768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.056151580065488815, + "rewards/margins": 0.11973648518323898, + "rewards/rejected": -0.06358490139245987, + "step": 35 + }, + { + "epoch": 0.07, + "learning_rate": 9.966712330401503e-08, + "logits/chosen": -3.058094024658203, + "logits/rejected": -3.0543994903564453, + "logps/chosen": -216.9531707763672, + "logps/rejected": -679.5852661132812, + "loss": 0.7295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05828247219324112, + "rewards/margins": 0.3266158998012543, + "rewards/rejected": -0.26833343505859375, + "step": 36 + }, + { + "epoch": 0.07, + "learning_rate": 9.963120582613708e-08, + "logits/chosen": -3.124467372894287, + "logits/rejected": -3.114549160003662, + "logps/chosen": -159.54644775390625, + "logps/rejected": -418.9359436035156, + "loss": 0.7732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01427383441478014, + "rewards/margins": 0.1621391326189041, + "rewards/rejected": -0.14786529541015625, + "step": 37 + }, + { + "epoch": 0.07, + "learning_rate": 9.95934557294077e-08, + "logits/chosen": -3.0496649742126465, + "logits/rejected": -3.067847490310669, + "logps/chosen": -131.59060668945312, + "logps/rejected": -319.07232666015625, + "loss": 0.7099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.043590545654296875, + "rewards/margins": 0.2315879762172699, + "rewards/rejected": -0.18799743056297302, + "step": 38 + }, + { + "epoch": 0.07, + "learning_rate": 9.9553874407739e-08, + "logits/chosen": -3.0911808013916016, + "logits/rejected": -3.0304131507873535, + "logps/chosen": -147.90444946289062, + "logps/rejected": -701.7747192382812, + "loss": 0.7041, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11252746731042862, + "rewards/margins": 0.38537752628326416, + "rewards/rejected": -0.27285003662109375, + "step": 39 + }, + { + "epoch": 0.07, + "learning_rate": 9.951246332266056e-08, + "logits/chosen": -3.0966525077819824, + "logits/rejected": -3.141207695007324, + "logps/chosen": -145.07672119140625, + "logps/rejected": -329.6766357421875, + "loss": 0.7698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023496245965361595, + "rewards/margins": 0.1289817839860916, + "rewards/rejected": -0.15247802436351776, + "step": 40 + }, + { + "epoch": 0.08, + "learning_rate": 9.946922400326553e-08, + "logits/chosen": -3.106942653656006, + "logits/rejected": -3.1140313148498535, + "logps/chosen": -138.85733032226562, + "logps/rejected": -382.07916259765625, + "loss": 0.7423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05340385437011719, + "rewards/margins": 0.19516105949878693, + "rewards/rejected": -0.14175720512866974, + "step": 41 + }, + { + "epoch": 0.08, + "learning_rate": 9.942415804615405e-08, + "logits/chosen": -3.0712199211120605, + "logits/rejected": -3.0456366539001465, + "logps/chosen": -208.62551879882812, + "logps/rejected": -467.1952819824219, + "loss": 0.6751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019980622455477715, + "rewards/margins": 0.17825087904930115, + "rewards/rejected": -0.15827025473117828, + "step": 42 + }, + { + "epoch": 0.08, + "learning_rate": 9.93772671153744e-08, + "logits/chosen": -3.1226329803466797, + "logits/rejected": -3.1669535636901855, + "logps/chosen": -117.70262908935547, + "logps/rejected": -529.146240234375, + "loss": 0.7099, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09015998989343643, + "rewards/margins": 0.3647388517856598, + "rewards/rejected": -0.27457886934280396, + "step": 43 + }, + { + "epoch": 0.08, + "learning_rate": 9.932855294236153e-08, + "logits/chosen": -2.9945130348205566, + "logits/rejected": -3.0607786178588867, + "logps/chosen": -139.55384826660156, + "logps/rejected": -296.2081298828125, + "loss": 0.6761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04958953708410263, + "rewards/margins": 0.24640198051929474, + "rewards/rejected": -0.1968124508857727, + "step": 44 + }, + { + "epoch": 0.08, + "learning_rate": 9.927801732587312e-08, + "logits/chosen": -3.1161086559295654, + "logits/rejected": -3.1370840072631836, + "logps/chosen": -131.07217407226562, + "logps/rejected": -518.37109375, + "loss": 0.6532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022408295422792435, + "rewards/margins": 0.25492173433303833, + "rewards/rejected": -0.232513427734375, + "step": 45 + }, + { + "epoch": 0.09, + "learning_rate": 9.922566213192309e-08, + "logits/chosen": -3.081432819366455, + "logits/rejected": -3.0362539291381836, + "logps/chosen": -167.77581787109375, + "logps/rejected": -331.3094787597656, + "loss": 0.681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05214233323931694, + "rewards/margins": 0.14326858520507812, + "rewards/rejected": -0.09112624824047089, + "step": 46 + }, + { + "epoch": 0.09, + "learning_rate": 9.917148929371287e-08, + "logits/chosen": -3.0683887004852295, + "logits/rejected": -3.068500518798828, + "logps/chosen": -127.74578094482422, + "logps/rejected": -460.54364013671875, + "loss": 0.6925, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07111816108226776, + "rewards/margins": 0.34772491455078125, + "rewards/rejected": -0.2766067385673523, + "step": 47 + }, + { + "epoch": 0.09, + "learning_rate": 9.911550081155981e-08, + "logits/chosen": -3.0481557846069336, + "logits/rejected": -3.0627617835998535, + "logps/chosen": -154.930908203125, + "logps/rejected": -265.7994079589844, + "loss": 0.6939, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.073272705078125, + "rewards/margins": 0.19399109482765198, + "rewards/rejected": -0.12071838229894638, + "step": 48 + }, + { + "epoch": 0.09, + "learning_rate": 9.905769875282351e-08, + "logits/chosen": -3.1186704635620117, + "logits/rejected": -3.0700016021728516, + "logps/chosen": -172.4241180419922, + "logps/rejected": -625.5354614257812, + "loss": 0.637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.089441679418087, + "rewards/margins": 0.5393714904785156, + "rewards/rejected": -0.449929803609848, + "step": 49 + }, + { + "epoch": 0.09, + "learning_rate": 9.899808525182933e-08, + "logits/chosen": -3.0329208374023438, + "logits/rejected": -3.052865982055664, + "logps/chosen": -149.6173095703125, + "logps/rejected": -519.6468505859375, + "loss": 0.6246, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1121162474155426, + "rewards/margins": 0.5559547543525696, + "rewards/rejected": -0.44383853673934937, + "step": 50 + }, + { + "epoch": 0.1, + "learning_rate": 9.893666250978969e-08, + "logits/chosen": -3.0562851428985596, + "logits/rejected": -3.086427688598633, + "logps/chosen": -144.43685913085938, + "logps/rejected": -475.0194091796875, + "loss": 0.6494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0382017157971859, + "rewards/margins": 0.4031553268432617, + "rewards/rejected": -0.3649536371231079, + "step": 51 + }, + { + "epoch": 0.1, + "learning_rate": 9.887343279472271e-08, + "logits/chosen": -3.1468591690063477, + "logits/rejected": -3.107079029083252, + "logps/chosen": -149.77337646484375, + "logps/rejected": -415.36688232421875, + "loss": 0.6696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04149170219898224, + "rewards/margins": 0.3059936761856079, + "rewards/rejected": -0.2645019590854645, + "step": 52 + }, + { + "epoch": 0.1, + "learning_rate": 9.880839844136853e-08, + "logits/chosen": -3.1217241287231445, + "logits/rejected": -3.0885934829711914, + "logps/chosen": -122.68134307861328, + "logps/rejected": -410.42913818359375, + "loss": 0.6773, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019306564703583717, + "rewards/margins": 0.2871013879776001, + "rewards/rejected": -0.26779481768608093, + "step": 53 + }, + { + "epoch": 0.1, + "learning_rate": 9.874156185110305e-08, + "logits/chosen": -3.1772565841674805, + "logits/rejected": -3.1481895446777344, + "logps/chosen": -145.79733276367188, + "logps/rejected": -374.3147888183594, + "loss": 0.6718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07339401543140411, + "rewards/margins": 0.3349204957485199, + "rewards/rejected": -0.261526495218277, + "step": 54 + }, + { + "epoch": 0.1, + "learning_rate": 9.867292549184929e-08, + "logits/chosen": -3.071831226348877, + "logits/rejected": -3.1122403144836426, + "logps/chosen": -157.36663818359375, + "logps/rejected": -344.62811279296875, + "loss": 0.6501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.051763154566287994, + "rewards/margins": 0.25410234928131104, + "rewards/rejected": -0.20233917236328125, + "step": 55 + }, + { + "epoch": 0.1, + "learning_rate": 9.860249189798626e-08, + "logits/chosen": -3.066768169403076, + "logits/rejected": -3.075204372406006, + "logps/chosen": -139.35516357421875, + "logps/rejected": -372.6615905761719, + "loss": 0.6501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05716095119714737, + "rewards/margins": 0.27088165283203125, + "rewards/rejected": -0.21372070908546448, + "step": 56 + }, + { + "epoch": 0.11, + "learning_rate": 9.853026367025534e-08, + "logits/chosen": -3.0981554985046387, + "logits/rejected": -3.082209348678589, + "logps/chosen": -122.2550048828125, + "logps/rejected": -397.64813232421875, + "loss": 0.6619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0900779739022255, + "rewards/margins": 0.32467880845069885, + "rewards/rejected": -0.23460084199905396, + "step": 57 + }, + { + "epoch": 0.11, + "learning_rate": 9.845624347566432e-08, + "logits/chosen": -3.115840435028076, + "logits/rejected": -3.134711265563965, + "logps/chosen": -153.3360137939453, + "logps/rejected": -380.00225830078125, + "loss": 0.7069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03903999552130699, + "rewards/margins": 0.3481708765029907, + "rewards/rejected": -0.30913087725639343, + "step": 58 + }, + { + "epoch": 0.11, + "learning_rate": 9.838043404738888e-08, + "logits/chosen": -3.0782766342163086, + "logits/rejected": -3.129624128341675, + "logps/chosen": -128.83377075195312, + "logps/rejected": -616.8156127929688, + "loss": 0.6218, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.060645293444395065, + "rewards/margins": 0.553296685218811, + "rewards/rejected": -0.49265140295028687, + "step": 59 + }, + { + "epoch": 0.11, + "learning_rate": 9.830283818467163e-08, + "logits/chosen": -3.1197257041931152, + "logits/rejected": -3.22835636138916, + "logps/chosen": -207.733154296875, + "logps/rejected": -4207.140625, + "loss": 0.5608, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.046907808631658554, + "rewards/margins": 1.7518730163574219, + "rewards/rejected": -1.7049652338027954, + "step": 60 + }, + { + "epoch": 0.11, + "learning_rate": 9.822345875271882e-08, + "logits/chosen": -3.0536301136016846, + "logits/rejected": -3.053011178970337, + "logps/chosen": -146.29666137695312, + "logps/rejected": -419.369384765625, + "loss": 0.6266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08518409729003906, + "rewards/margins": 0.37280771136283875, + "rewards/rejected": -0.2876236140727997, + "step": 61 + }, + { + "epoch": 0.12, + "learning_rate": 9.814229868259451e-08, + "logits/chosen": -3.117441177368164, + "logits/rejected": -3.1234331130981445, + "logps/chosen": -146.49032592773438, + "logps/rejected": -433.0301818847656, + "loss": 0.6328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.033319856971502304, + "rewards/margins": 0.454132080078125, + "rewards/rejected": -0.4208122193813324, + "step": 62 + }, + { + "epoch": 0.12, + "learning_rate": 9.805936097111234e-08, + "logits/chosen": -3.0705642700195312, + "logits/rejected": -3.058208703994751, + "logps/chosen": -143.10308837890625, + "logps/rejected": -258.48345947265625, + "loss": 0.6179, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06503677368164062, + "rewards/margins": 0.2698715329170227, + "rewards/rejected": -0.20483475923538208, + "step": 63 + }, + { + "epoch": 0.12, + "learning_rate": 9.797464868072487e-08, + "logits/chosen": -3.149134635925293, + "logits/rejected": -3.089573383331299, + "logps/chosen": -149.08230590820312, + "logps/rejected": -704.3507080078125, + "loss": 0.539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08149490505456924, + "rewards/margins": 0.7221962213516235, + "rewards/rejected": -0.6407012939453125, + "step": 64 + }, + { + "epoch": 0.12, + "learning_rate": 9.78881649394105e-08, + "logits/chosen": -3.008868932723999, + "logits/rejected": -3.0593137741088867, + "logps/chosen": -164.2830810546875, + "logps/rejected": -357.7618408203125, + "loss": 0.6344, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01828308030962944, + "rewards/margins": 0.237782284617424, + "rewards/rejected": -0.21949920058250427, + "step": 65 + }, + { + "epoch": 0.12, + "learning_rate": 9.779991294055801e-08, + "logits/chosen": -3.081058979034424, + "logits/rejected": -3.104982852935791, + "logps/chosen": -115.42083740234375, + "logps/rejected": -297.8736877441406, + "loss": 0.6053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06250610202550888, + "rewards/margins": 0.29280394315719604, + "rewards/rejected": -0.23029786348342896, + "step": 66 + }, + { + "epoch": 0.13, + "learning_rate": 9.770989594284857e-08, + "logits/chosen": -3.1261677742004395, + "logits/rejected": -3.1548385620117188, + "logps/chosen": -160.0076904296875, + "logps/rejected": -762.2308349609375, + "loss": 0.5632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03137855604290962, + "rewards/margins": 0.9220371246337891, + "rewards/rejected": -0.8906586170196533, + "step": 67 + }, + { + "epoch": 0.13, + "learning_rate": 9.761811727013546e-08, + "logits/chosen": -3.0876078605651855, + "logits/rejected": -2.98945951461792, + "logps/chosen": -149.64822387695312, + "logps/rejected": -501.8586730957031, + "loss": 0.5753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06664200127124786, + "rewards/margins": 0.5327125787734985, + "rewards/rejected": -0.46607059240341187, + "step": 68 + }, + { + "epoch": 0.13, + "learning_rate": 9.752458031132141e-08, + "logits/chosen": -3.0454580783843994, + "logits/rejected": -3.0243301391601562, + "logps/chosen": -153.89013671875, + "logps/rejected": -395.6580505371094, + "loss": 0.6443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11991577595472336, + "rewards/margins": 0.4400482475757599, + "rewards/rejected": -0.32013246417045593, + "step": 69 + }, + { + "epoch": 0.13, + "learning_rate": 9.742928852023324e-08, + "logits/chosen": -3.154998779296875, + "logits/rejected": -3.0993270874023438, + "logps/chosen": -155.8370361328125, + "logps/rejected": -361.3126525878906, + "loss": 0.5498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11256180703639984, + "rewards/margins": 0.35690537095069885, + "rewards/rejected": -0.2443435788154602, + "step": 70 + }, + { + "epoch": 0.13, + "learning_rate": 9.733224541549462e-08, + "logits/chosen": -3.1266963481903076, + "logits/rejected": -3.052725315093994, + "logps/chosen": -145.01431274414062, + "logps/rejected": -709.4693603515625, + "loss": 0.5336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09305419772863388, + "rewards/margins": 0.9406814575195312, + "rewards/rejected": -0.8476272821426392, + "step": 71 + }, + { + "epoch": 0.13, + "learning_rate": 9.723345458039594e-08, + "logits/chosen": -3.100769519805908, + "logits/rejected": -3.1403512954711914, + "logps/chosen": -165.10008239746094, + "logps/rejected": -516.2559204101562, + "loss": 0.5518, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10950545966625214, + "rewards/margins": 0.6930260062217712, + "rewards/rejected": -0.5835205316543579, + "step": 72 + }, + { + "epoch": 0.14, + "learning_rate": 9.713291966276205e-08, + "logits/chosen": -3.03362774848938, + "logits/rejected": -3.0860047340393066, + "logps/chosen": -166.9038543701172, + "logps/rejected": -523.265869140625, + "loss": 0.5862, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14818954467773438, + "rewards/margins": 0.6273704767227173, + "rewards/rejected": -0.4791809320449829, + "step": 73 + }, + { + "epoch": 0.14, + "learning_rate": 9.703064437481758e-08, + "logits/chosen": -3.1211652755737305, + "logits/rejected": -3.0757224559783936, + "logps/chosen": -122.44220733642578, + "logps/rejected": -1310.39697265625, + "loss": 0.5422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13980256021022797, + "rewards/margins": 1.404097080230713, + "rewards/rejected": -1.2642945051193237, + "step": 74 + }, + { + "epoch": 0.14, + "learning_rate": 9.69266324930499e-08, + "logits/chosen": -3.014009952545166, + "logits/rejected": -3.030397891998291, + "logps/chosen": -145.6109619140625, + "logps/rejected": -1114.3826904296875, + "loss": 0.4914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13307571411132812, + "rewards/margins": 1.5098381042480469, + "rewards/rejected": -1.3767623901367188, + "step": 75 + }, + { + "epoch": 0.14, + "learning_rate": 9.682088785806962e-08, + "logits/chosen": -3.146559715270996, + "logits/rejected": -3.126997470855713, + "logps/chosen": -130.97874450683594, + "logps/rejected": -386.6065673828125, + "loss": 0.5946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13984107971191406, + "rewards/margins": 0.5451176166534424, + "rewards/rejected": -0.40527650713920593, + "step": 76 + }, + { + "epoch": 0.14, + "learning_rate": 9.671341437446876e-08, + "logits/chosen": -3.114895820617676, + "logits/rejected": -3.114790439605713, + "logps/chosen": -112.02957916259766, + "logps/rejected": -575.4122314453125, + "loss": 0.5122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1432487517595291, + "rewards/margins": 1.0338646173477173, + "rewards/rejected": -0.8906158208847046, + "step": 77 + }, + { + "epoch": 0.15, + "learning_rate": 9.660421601067665e-08, + "logits/chosen": -3.133030891418457, + "logits/rejected": -3.1136181354522705, + "logps/chosen": -146.79696655273438, + "logps/rejected": -704.5775756835938, + "loss": 0.4851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12328299880027771, + "rewards/margins": 1.0427868366241455, + "rewards/rejected": -0.9195038080215454, + "step": 78 + }, + { + "epoch": 0.15, + "learning_rate": 9.649329679881332e-08, + "logits/chosen": -3.0666818618774414, + "logits/rejected": -3.074164628982544, + "logps/chosen": -105.08897399902344, + "logps/rejected": -297.69354248046875, + "loss": 0.6018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0935436263680458, + "rewards/margins": 0.39640313386917114, + "rewards/rejected": -0.30285948514938354, + "step": 79 + }, + { + "epoch": 0.15, + "learning_rate": 9.63806608345407e-08, + "logits/chosen": -3.027312755584717, + "logits/rejected": -2.983745813369751, + "logps/chosen": -144.31024169921875, + "logps/rejected": -409.4296875, + "loss": 0.591, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1130577027797699, + "rewards/margins": 0.5578452944755554, + "rewards/rejected": -0.4447875916957855, + "step": 80 + }, + { + "epoch": 0.15, + "learning_rate": 9.626631227691126e-08, + "logits/chosen": -3.092179536819458, + "logits/rejected": -3.102905750274658, + "logps/chosen": -138.69161987304688, + "logps/rejected": -408.43975830078125, + "loss": 0.5283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07065658271312714, + "rewards/margins": 0.6368057727813721, + "rewards/rejected": -0.566149115562439, + "step": 81 + }, + { + "epoch": 0.15, + "learning_rate": 9.615025534821462e-08, + "logits/chosen": -3.0868492126464844, + "logits/rejected": -3.148726463317871, + "logps/chosen": -126.40211486816406, + "logps/rejected": -375.29876708984375, + "loss": 0.6138, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10038833320140839, + "rewards/margins": 0.5723594427108765, + "rewards/rejected": -0.47197115421295166, + "step": 82 + }, + { + "epoch": 0.16, + "learning_rate": 9.603249433382143e-08, + "logits/chosen": -3.0199832916259766, + "logits/rejected": -3.0670204162597656, + "logps/chosen": -148.22592163085938, + "logps/rejected": -406.43109130859375, + "loss": 0.5635, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1217193603515625, + "rewards/margins": 0.5497207641601562, + "rewards/rejected": -0.42800140380859375, + "step": 83 + }, + { + "epoch": 0.16, + "learning_rate": 9.591303358202534e-08, + "logits/chosen": -3.0461010932922363, + "logits/rejected": -3.0684821605682373, + "logps/chosen": -117.47139739990234, + "logps/rejected": -423.5786437988281, + "loss": 0.5867, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09084434807300568, + "rewards/margins": 0.5112636685371399, + "rewards/rejected": -0.420419305562973, + "step": 84 + }, + { + "epoch": 0.16, + "learning_rate": 9.579187750388225e-08, + "logits/chosen": -2.917633295059204, + "logits/rejected": -2.9472813606262207, + "logps/chosen": -183.558837890625, + "logps/rejected": -425.62432861328125, + "loss": 0.5333, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09096069633960724, + "rewards/margins": 0.5395797491073608, + "rewards/rejected": -0.4486190676689148, + "step": 85 + }, + { + "epoch": 0.16, + "learning_rate": 9.566903057304763e-08, + "logits/chosen": -3.0699312686920166, + "logits/rejected": -3.0950827598571777, + "logps/chosen": -123.12996673583984, + "logps/rejected": -376.41839599609375, + "loss": 0.6175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10778732597827911, + "rewards/margins": 0.4516410827636719, + "rewards/rejected": -0.34385377168655396, + "step": 86 + }, + { + "epoch": 0.16, + "learning_rate": 9.554449732561113e-08, + "logits/chosen": -3.1316311359405518, + "logits/rejected": -3.1666173934936523, + "logps/chosen": -116.79902648925781, + "logps/rejected": -400.06097412109375, + "loss": 0.5869, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12367210537195206, + "rewards/margins": 0.6598857641220093, + "rewards/rejected": -0.5362136960029602, + "step": 87 + }, + { + "epoch": 0.16, + "learning_rate": 9.541828235992925e-08, + "logits/chosen": -3.0869405269622803, + "logits/rejected": -3.0899136066436768, + "logps/chosen": -134.35630798339844, + "logps/rejected": -721.3387451171875, + "loss": 0.482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0798133835196495, + "rewards/margins": 1.0273643732070923, + "rewards/rejected": -0.9475510120391846, + "step": 88 + }, + { + "epoch": 0.17, + "learning_rate": 9.529039033645548e-08, + "logits/chosen": -3.0658822059631348, + "logits/rejected": -3.0784618854522705, + "logps/chosen": -108.74337768554688, + "logps/rejected": -457.757080078125, + "loss": 0.5423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08264350891113281, + "rewards/margins": 0.9119503498077393, + "rewards/rejected": -0.8293068408966064, + "step": 89 + }, + { + "epoch": 0.17, + "learning_rate": 9.516082597756815e-08, + "logits/chosen": -3.0617895126342773, + "logits/rejected": -3.028872489929199, + "logps/chosen": -119.67318725585938, + "logps/rejected": -363.35943603515625, + "loss": 0.5707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10228195786476135, + "rewards/margins": 0.5435875058174133, + "rewards/rejected": -0.44130557775497437, + "step": 90 + }, + { + "epoch": 0.17, + "learning_rate": 9.50295940673962e-08, + "logits/chosen": -3.0679616928100586, + "logits/rejected": -3.0680532455444336, + "logps/chosen": -145.33889770507812, + "logps/rejected": -541.4093627929688, + "loss": 0.4857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13452300429344177, + "rewards/margins": 0.8919113874435425, + "rewards/rejected": -0.7573883533477783, + "step": 91 + }, + { + "epoch": 0.17, + "learning_rate": 9.48966994516424e-08, + "logits/chosen": -3.070192337036133, + "logits/rejected": -2.9945425987243652, + "logps/chosen": -261.6451721191406, + "logps/rejected": -716.8310546875, + "loss": 0.4562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16301575303077698, + "rewards/margins": 1.016218662261963, + "rewards/rejected": -0.8532028794288635, + "step": 92 + }, + { + "epoch": 0.17, + "learning_rate": 9.476214703740453e-08, + "logits/chosen": -3.1421403884887695, + "logits/rejected": -3.153069019317627, + "logps/chosen": -140.22323608398438, + "logps/rejected": -403.0403747558594, + "loss": 0.5314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07180824875831604, + "rewards/margins": 0.6426044702529907, + "rewards/rejected": -0.5707962512969971, + "step": 93 + }, + { + "epoch": 0.18, + "learning_rate": 9.462594179299406e-08, + "logits/chosen": -3.087860345840454, + "logits/rejected": -3.0945193767547607, + "logps/chosen": -124.17326354980469, + "logps/rejected": -455.65643310546875, + "loss": 0.5293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10440024733543396, + "rewards/margins": 0.8292400240898132, + "rewards/rejected": -0.7248398065567017, + "step": 94 + }, + { + "epoch": 0.18, + "learning_rate": 9.44880887477528e-08, + "logits/chosen": -3.037456512451172, + "logits/rejected": -2.976985454559326, + "logps/chosen": -167.25924682617188, + "logps/rejected": -746.448486328125, + "loss": 0.4634, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14955444633960724, + "rewards/margins": 1.111541748046875, + "rewards/rejected": -0.961987316608429, + "step": 95 + }, + { + "epoch": 0.18, + "learning_rate": 9.434859299186719e-08, + "logits/chosen": -3.1144423484802246, + "logits/rejected": -3.1075315475463867, + "logps/chosen": -168.0101776123047, + "logps/rejected": -567.3807373046875, + "loss": 0.5072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12075576931238174, + "rewards/margins": 1.1031745672225952, + "rewards/rejected": -0.9824188351631165, + "step": 96 + }, + { + "epoch": 0.18, + "learning_rate": 9.420745967618025e-08, + "logits/chosen": -3.098034143447876, + "logits/rejected": -3.0997745990753174, + "logps/chosen": -146.21661376953125, + "logps/rejected": -547.8779296875, + "loss": 0.4844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11241073906421661, + "rewards/margins": 0.9151412844657898, + "rewards/rejected": -0.8027305603027344, + "step": 97 + }, + { + "epoch": 0.18, + "learning_rate": 9.40646940120015e-08, + "logits/chosen": -3.0551929473876953, + "logits/rejected": -3.066638946533203, + "logps/chosen": -109.98143005371094, + "logps/rejected": -258.299560546875, + "loss": 0.5767, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06340332329273224, + "rewards/margins": 0.3536369204521179, + "rewards/rejected": -0.2902336120605469, + "step": 98 + }, + { + "epoch": 0.19, + "learning_rate": 9.39203012709145e-08, + "logits/chosen": -3.0903940200805664, + "logits/rejected": -3.0686984062194824, + "logps/chosen": -201.74166870117188, + "logps/rejected": -392.7958068847656, + "loss": 0.4432, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15237121284008026, + "rewards/margins": 0.768707275390625, + "rewards/rejected": -0.6163360476493835, + "step": 99 + }, + { + "epoch": 0.19, + "learning_rate": 9.377428678458213e-08, + "logits/chosen": -2.9997825622558594, + "logits/rejected": -3.0114493370056152, + "logps/chosen": -169.32742309570312, + "logps/rejected": -379.3037109375, + "loss": 0.5569, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07300186157226562, + "rewards/margins": 0.539903998374939, + "rewards/rejected": -0.4669021666049957, + "step": 100 + }, + { + "epoch": 0.19, + "learning_rate": 9.362665594454984e-08, + "logits/chosen": -3.077376365661621, + "logits/rejected": -3.0454440116882324, + "logps/chosen": -167.7301025390625, + "logps/rejected": -248.06088256835938, + "loss": 0.5577, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1055271178483963, + "rewards/margins": 0.3565746247768402, + "rewards/rejected": -0.2510475218296051, + "step": 101 + }, + { + "epoch": 0.19, + "learning_rate": 9.347741420204643e-08, + "logits/chosen": -3.0519423484802246, + "logits/rejected": -3.079662322998047, + "logps/chosen": -165.34036254882812, + "logps/rejected": -505.29791259765625, + "loss": 0.4855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19465598464012146, + "rewards/margins": 1.0506634712219238, + "rewards/rejected": -0.85600745677948, + "step": 102 + }, + { + "epoch": 0.19, + "learning_rate": 9.332656706778289e-08, + "logits/chosen": -3.041663646697998, + "logits/rejected": -3.1004695892333984, + "logps/chosen": -162.7839813232422, + "logps/rejected": -574.2347412109375, + "loss": 0.5221, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1327030211687088, + "rewards/margins": 0.858023464679718, + "rewards/rejected": -0.7253204584121704, + "step": 103 + }, + { + "epoch": 0.19, + "learning_rate": 9.317412011174885e-08, + "logits/chosen": -3.1043505668640137, + "logits/rejected": -3.1067428588867188, + "logps/chosen": -130.97207641601562, + "logps/rejected": -566.049072265625, + "loss": 0.4255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17347107827663422, + "rewards/margins": 1.142085313796997, + "rewards/rejected": -0.9686142206192017, + "step": 104 + }, + { + "epoch": 0.2, + "learning_rate": 9.302007896300696e-08, + "logits/chosen": -3.0601701736450195, + "logits/rejected": -3.074604034423828, + "logps/chosen": -165.1224822998047, + "logps/rejected": -567.1432495117188, + "loss": 0.4782, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16741257905960083, + "rewards/margins": 1.1250618696212769, + "rewards/rejected": -0.9576492309570312, + "step": 105 + }, + { + "epoch": 0.2, + "learning_rate": 9.286444930948495e-08, + "logits/chosen": -3.118370771408081, + "logits/rejected": -3.157607316970825, + "logps/chosen": -137.81646728515625, + "logps/rejected": -436.73760986328125, + "loss": 0.5195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11629067361354828, + "rewards/margins": 0.8322575092315674, + "rewards/rejected": -0.7159668207168579, + "step": 106 + }, + { + "epoch": 0.2, + "learning_rate": 9.270723689776567e-08, + "logits/chosen": -3.09091854095459, + "logits/rejected": -3.097170352935791, + "logps/chosen": -121.76219940185547, + "logps/rejected": -300.3966369628906, + "loss": 0.5053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15979652106761932, + "rewards/margins": 0.7098773717880249, + "rewards/rejected": -0.5500808954238892, + "step": 107 + }, + { + "epoch": 0.2, + "learning_rate": 9.254844753287492e-08, + "logits/chosen": -3.109797477722168, + "logits/rejected": -3.138103485107422, + "logps/chosen": -134.86244201660156, + "logps/rejected": -428.8330078125, + "loss": 0.4593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1383201628923416, + "rewards/margins": 0.7949790954589844, + "rewards/rejected": -0.656658947467804, + "step": 108 + }, + { + "epoch": 0.2, + "learning_rate": 9.238808707806705e-08, + "logits/chosen": -3.090503692626953, + "logits/rejected": -2.976184606552124, + "logps/chosen": -217.07879638671875, + "logps/rejected": -1000.3609008789062, + "loss": 0.4318, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19042283296585083, + "rewards/margins": 1.833958625793457, + "rewards/rejected": -1.6435357332229614, + "step": 109 + }, + { + "epoch": 0.21, + "learning_rate": 9.222616145460848e-08, + "logits/chosen": -3.119096279144287, + "logits/rejected": -3.1207971572875977, + "logps/chosen": -182.77590942382812, + "logps/rejected": -432.6871337890625, + "loss": 0.4625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2125999480485916, + "rewards/margins": 0.8809410333633423, + "rewards/rejected": -0.6683410406112671, + "step": 110 + }, + { + "epoch": 0.21, + "learning_rate": 9.206267664155906e-08, + "logits/chosen": -3.0700764656066895, + "logits/rejected": -3.086308717727661, + "logps/chosen": -145.32583618164062, + "logps/rejected": -823.9697265625, + "loss": 0.4106, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2031150907278061, + "rewards/margins": 1.676567792892456, + "rewards/rejected": -1.4734528064727783, + "step": 111 + }, + { + "epoch": 0.21, + "learning_rate": 9.189763867555128e-08, + "logits/chosen": -3.1026670932769775, + "logits/rejected": -3.1242868900299072, + "logps/chosen": -151.7227325439453, + "logps/rejected": -446.1968688964844, + "loss": 0.4771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15309448540210724, + "rewards/margins": 0.8575226068496704, + "rewards/rejected": -0.7044280767440796, + "step": 112 + }, + { + "epoch": 0.21, + "learning_rate": 9.173105365056741e-08, + "logits/chosen": -3.1594018936157227, + "logits/rejected": -3.116617202758789, + "logps/chosen": -106.65141296386719, + "logps/rejected": -550.1068115234375, + "loss": 0.4342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15418891608715057, + "rewards/margins": 1.1533679962158203, + "rewards/rejected": -0.9991791248321533, + "step": 113 + }, + { + "epoch": 0.21, + "learning_rate": 9.156292771771445e-08, + "logits/chosen": -3.0492217540740967, + "logits/rejected": -3.0745859146118164, + "logps/chosen": -168.49696350097656, + "logps/rejected": -389.1912841796875, + "loss": 0.5369, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16526184976100922, + "rewards/margins": 0.6433807611465454, + "rewards/rejected": -0.478118896484375, + "step": 114 + }, + { + "epoch": 0.22, + "learning_rate": 9.1393267084997e-08, + "logits/chosen": -3.1082515716552734, + "logits/rejected": -3.139052391052246, + "logps/chosen": -267.2115173339844, + "logps/rejected": -857.3230590820312, + "loss": 0.4314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24358978867530823, + "rewards/margins": 1.839680552482605, + "rewards/rejected": -1.5960907936096191, + "step": 115 + }, + { + "epoch": 0.22, + "learning_rate": 9.122207801708802e-08, + "logits/chosen": -3.1676511764526367, + "logits/rejected": -3.1364190578460693, + "logps/chosen": -147.47096252441406, + "logps/rejected": -468.424560546875, + "loss": 0.4423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1830802857875824, + "rewards/margins": 0.8810966610908508, + "rewards/rejected": -0.6980164051055908, + "step": 116 + }, + { + "epoch": 0.22, + "learning_rate": 9.104936683509754e-08, + "logits/chosen": -3.122553586959839, + "logits/rejected": -3.072927474975586, + "logps/chosen": -110.1961669921875, + "logps/rejected": -494.167724609375, + "loss": 0.4438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19443054497241974, + "rewards/margins": 0.8462189435958862, + "rewards/rejected": -0.6517883539199829, + "step": 117 + }, + { + "epoch": 0.22, + "learning_rate": 9.087513991633923e-08, + "logits/chosen": -3.189775228500366, + "logits/rejected": -3.1867823600769043, + "logps/chosen": -156.18287658691406, + "logps/rejected": -454.39117431640625, + "loss": 0.4076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1625213623046875, + "rewards/margins": 0.8810303211212158, + "rewards/rejected": -0.7185089588165283, + "step": 118 + }, + { + "epoch": 0.22, + "learning_rate": 9.069940369409498e-08, + "logits/chosen": -3.0254712104797363, + "logits/rejected": -3.0980076789855957, + "logps/chosen": -156.47695922851562, + "logps/rejected": -564.5385131835938, + "loss": 0.4177, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20042800903320312, + "rewards/margins": 1.4114478826522827, + "rewards/rejected": -1.2110198736190796, + "step": 119 + }, + { + "epoch": 0.22, + "learning_rate": 9.052216465737725e-08, + "logits/chosen": -3.1209192276000977, + "logits/rejected": -3.1087708473205566, + "logps/chosen": -127.07907104492188, + "logps/rejected": -301.79217529296875, + "loss": 0.4483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1675090789794922, + "rewards/margins": 0.5922176837921143, + "rewards/rejected": -0.4247085750102997, + "step": 120 + }, + { + "epoch": 0.23, + "learning_rate": 9.034342935068952e-08, + "logits/chosen": -3.169917106628418, + "logits/rejected": -3.1283116340637207, + "logps/chosen": -158.75308227539062, + "logps/rejected": -490.4541320800781, + "loss": 0.4714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2083488404750824, + "rewards/margins": 0.9963615536689758, + "rewards/rejected": -0.788012683391571, + "step": 121 + }, + { + "epoch": 0.23, + "learning_rate": 9.016320437378465e-08, + "logits/chosen": -3.138373851776123, + "logits/rejected": -3.1711044311523438, + "logps/chosen": -106.46746826171875, + "logps/rejected": -519.9852294921875, + "loss": 0.4726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2025146484375, + "rewards/margins": 1.4847092628479004, + "rewards/rejected": -1.2821946144104004, + "step": 122 + }, + { + "epoch": 0.23, + "learning_rate": 8.998149638142119e-08, + "logits/chosen": -3.0655713081359863, + "logits/rejected": -3.099855422973633, + "logps/chosen": -140.7733612060547, + "logps/rejected": -683.9154052734375, + "loss": 0.3753, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16420936584472656, + "rewards/margins": 1.5231021642684937, + "rewards/rejected": -1.358892798423767, + "step": 123 + }, + { + "epoch": 0.23, + "learning_rate": 8.979831208311757e-08, + "logits/chosen": -3.021638870239258, + "logits/rejected": -3.1392478942871094, + "logps/chosen": -146.2952117919922, + "logps/rejected": -525.5653686523438, + "loss": 0.4674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16863135993480682, + "rewards/margins": 1.0946629047393799, + "rewards/rejected": -0.9260315299034119, + "step": 124 + }, + { + "epoch": 0.23, + "learning_rate": 8.96136582429045e-08, + "logits/chosen": -3.16888689994812, + "logits/rejected": -3.181912899017334, + "logps/chosen": -103.69701385498047, + "logps/rejected": -483.1859130859375, + "loss": 0.466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1593216061592102, + "rewards/margins": 1.088008165359497, + "rewards/rejected": -0.9286864995956421, + "step": 125 + }, + { + "epoch": 0.24, + "learning_rate": 8.942754167907506e-08, + "logits/chosen": -3.072411060333252, + "logits/rejected": -3.0875394344329834, + "logps/chosen": -130.0809326171875, + "logps/rejected": -348.6944580078125, + "loss": 0.4759, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13476943969726562, + "rewards/margins": 0.7771415710449219, + "rewards/rejected": -0.6423721313476562, + "step": 126 + }, + { + "epoch": 0.24, + "learning_rate": 8.923996926393305e-08, + "logits/chosen": -3.1303257942199707, + "logits/rejected": -3.1192359924316406, + "logps/chosen": -169.9324951171875, + "logps/rejected": -497.0655212402344, + "loss": 0.4302, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14552079141139984, + "rewards/margins": 1.2417824268341064, + "rewards/rejected": -1.0962616205215454, + "step": 127 + }, + { + "epoch": 0.24, + "learning_rate": 8.905094792353916e-08, + "logits/chosen": -2.9814562797546387, + "logits/rejected": -2.9904112815856934, + "logps/chosen": -133.07339477539062, + "logps/rejected": -427.66131591796875, + "loss": 0.4399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1687774658203125, + "rewards/margins": 0.8755844831466675, + "rewards/rejected": -0.7068069577217102, + "step": 128 + }, + { + "epoch": 0.24, + "learning_rate": 8.886048463745524e-08, + "logits/chosen": -3.071864604949951, + "logits/rejected": -3.1114721298217773, + "logps/chosen": -117.20301818847656, + "logps/rejected": -368.5315856933594, + "loss": 0.4422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1759941130876541, + "rewards/margins": 0.9067832827568054, + "rewards/rejected": -0.7307891845703125, + "step": 129 + }, + { + "epoch": 0.24, + "learning_rate": 8.866858643848663e-08, + "logits/chosen": -3.084655284881592, + "logits/rejected": -3.0578103065490723, + "logps/chosen": -165.38238525390625, + "logps/rejected": -577.7196655273438, + "loss": 0.3718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.143138125538826, + "rewards/margins": 1.271183729171753, + "rewards/rejected": -1.128045678138733, + "step": 130 + }, + { + "epoch": 0.25, + "learning_rate": 8.847526041242245e-08, + "logits/chosen": -2.984198570251465, + "logits/rejected": -3.057990789413452, + "logps/chosen": -184.3570556640625, + "logps/rejected": -702.1917114257812, + "loss": 0.4363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09988174587488174, + "rewards/margins": 1.5120353698730469, + "rewards/rejected": -1.4121536016464233, + "step": 131 + }, + { + "epoch": 0.25, + "learning_rate": 8.828051369777389e-08, + "logits/chosen": -3.090363025665283, + "logits/rejected": -3.1254634857177734, + "logps/chosen": -112.08699035644531, + "logps/rejected": -334.5144958496094, + "loss": 0.4358, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20699423551559448, + "rewards/margins": 0.8537647128105164, + "rewards/rejected": -0.6467704772949219, + "step": 132 + }, + { + "epoch": 0.25, + "learning_rate": 8.808435348551072e-08, + "logits/chosen": -3.1258444786071777, + "logits/rejected": -3.075976848602295, + "logps/chosen": -185.4570770263672, + "logps/rejected": -495.1496887207031, + "loss": 0.4633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1946006715297699, + "rewards/margins": 0.996538519859314, + "rewards/rejected": -0.8019378781318665, + "step": 133 + }, + { + "epoch": 0.25, + "learning_rate": 8.788678701879573e-08, + "logits/chosen": -3.040581703186035, + "logits/rejected": -3.053168535232544, + "logps/chosen": -126.69790649414062, + "logps/rejected": -358.28594970703125, + "loss": 0.4161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16798058152198792, + "rewards/margins": 0.645687460899353, + "rewards/rejected": -0.4777069091796875, + "step": 134 + }, + { + "epoch": 0.25, + "learning_rate": 8.768782159271726e-08, + "logits/chosen": -3.111799716949463, + "logits/rejected": -3.134915828704834, + "logps/chosen": -163.035888671875, + "logps/rejected": -362.9132385253906, + "loss": 0.4677, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17090149223804474, + "rewards/margins": 0.7644149661064148, + "rewards/rejected": -0.5935134887695312, + "step": 135 + }, + { + "epoch": 0.25, + "learning_rate": 8.748746455401986e-08, + "logits/chosen": -3.113753318786621, + "logits/rejected": -3.138115406036377, + "logps/chosen": -132.03292846679688, + "logps/rejected": -433.7405700683594, + "loss": 0.4686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25986748933792114, + "rewards/margins": 0.9728328585624695, + "rewards/rejected": -0.7129654288291931, + "step": 136 + }, + { + "epoch": 0.26, + "learning_rate": 8.7285723300833e-08, + "logits/chosen": -3.105445146560669, + "logits/rejected": -3.1081247329711914, + "logps/chosen": -150.33023071289062, + "logps/rejected": -327.6942138671875, + "loss": 0.4699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1790260374546051, + "rewards/margins": 0.5935661792755127, + "rewards/rejected": -0.4145401120185852, + "step": 137 + }, + { + "epoch": 0.26, + "learning_rate": 8.708260528239788e-08, + "logits/chosen": -3.010998487472534, + "logits/rejected": -3.066967010498047, + "logps/chosen": -144.4120635986328, + "logps/rejected": -390.4621887207031, + "loss": 0.3726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.267355352640152, + "rewards/margins": 0.9715942740440369, + "rewards/rejected": -0.7042388916015625, + "step": 138 + }, + { + "epoch": 0.26, + "learning_rate": 8.687811799879239e-08, + "logits/chosen": -3.0592503547668457, + "logits/rejected": -3.0189576148986816, + "logps/chosen": -151.8365020751953, + "logps/rejected": -366.52764892578125, + "loss": 0.4501, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.263876348733902, + "rewards/margins": 0.7744407653808594, + "rewards/rejected": -0.5105644464492798, + "step": 139 + }, + { + "epoch": 0.26, + "learning_rate": 8.667226900065418e-08, + "logits/chosen": -3.042419672012329, + "logits/rejected": -3.0159525871276855, + "logps/chosen": -151.744873046875, + "logps/rejected": -517.6112670898438, + "loss": 0.4599, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14609260857105255, + "rewards/margins": 0.9927074909210205, + "rewards/rejected": -0.8466148376464844, + "step": 140 + }, + { + "epoch": 0.26, + "learning_rate": 8.646506588890182e-08, + "logits/chosen": -3.072047233581543, + "logits/rejected": -3.040619134902954, + "logps/chosen": -125.91270446777344, + "logps/rejected": -276.9568786621094, + "loss": 0.4658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1311519742012024, + "rewards/margins": 0.559983491897583, + "rewards/rejected": -0.4288314878940582, + "step": 141 + }, + { + "epoch": 0.27, + "learning_rate": 8.625651631445419e-08, + "logits/chosen": -3.1003661155700684, + "logits/rejected": -3.0848124027252197, + "logps/chosen": -142.74180603027344, + "logps/rejected": -620.009521484375, + "loss": 0.3684, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15693169832229614, + "rewards/margins": 1.493919014930725, + "rewards/rejected": -1.3369872570037842, + "step": 142 + }, + { + "epoch": 0.27, + "learning_rate": 8.60466279779479e-08, + "logits/chosen": -3.0697405338287354, + "logits/rejected": -3.0409960746765137, + "logps/chosen": -113.85780334472656, + "logps/rejected": -446.3238220214844, + "loss": 0.3693, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.150096133351326, + "rewards/margins": 1.0657562017440796, + "rewards/rejected": -0.9156600832939148, + "step": 143 + }, + { + "epoch": 0.27, + "learning_rate": 8.5835408629453e-08, + "logits/chosen": -3.045011520385742, + "logits/rejected": -2.9880895614624023, + "logps/chosen": -135.05197143554688, + "logps/rejected": -322.490234375, + "loss": 0.4254, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16464386880397797, + "rewards/margins": 0.8704513907432556, + "rewards/rejected": -0.7058075070381165, + "step": 144 + }, + { + "epoch": 0.27, + "learning_rate": 8.562286606818683e-08, + "logits/chosen": -3.132445812225342, + "logits/rejected": -3.1968131065368652, + "logps/chosen": -130.9744415283203, + "logps/rejected": -515.9541625976562, + "loss": 0.3647, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1467266082763672, + "rewards/margins": 1.254909873008728, + "rewards/rejected": -1.1081832647323608, + "step": 145 + }, + { + "epoch": 0.27, + "learning_rate": 8.540900814222597e-08, + "logits/chosen": -3.0603253841400146, + "logits/rejected": -3.0761523246765137, + "logps/chosen": -172.5789337158203, + "logps/rejected": -518.6675415039062, + "loss": 0.3418, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2662513852119446, + "rewards/margins": 1.4048638343811035, + "rewards/rejected": -1.1386123895645142, + "step": 146 + }, + { + "epoch": 0.28, + "learning_rate": 8.519384274821648e-08, + "logits/chosen": -3.1498398780822754, + "logits/rejected": -3.1709048748016357, + "logps/chosen": -145.49169921875, + "logps/rejected": -337.00616455078125, + "loss": 0.4327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2250465452671051, + "rewards/margins": 0.9370415210723877, + "rewards/rejected": -0.7119949460029602, + "step": 147 + }, + { + "epoch": 0.28, + "learning_rate": 8.497737783108236e-08, + "logits/chosen": -3.1476213932037354, + "logits/rejected": -3.155515193939209, + "logps/chosen": -166.57260131835938, + "logps/rejected": -361.56304931640625, + "loss": 0.4346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15434494614601135, + "rewards/margins": 0.9965004324913025, + "rewards/rejected": -0.8421554565429688, + "step": 148 + }, + { + "epoch": 0.28, + "learning_rate": 8.475962138373212e-08, + "logits/chosen": -3.107166290283203, + "logits/rejected": -3.075732707977295, + "logps/chosen": -164.667724609375, + "logps/rejected": -564.3240356445312, + "loss": 0.3788, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17501601576805115, + "rewards/margins": 1.5689584016799927, + "rewards/rejected": -1.3939423561096191, + "step": 149 + }, + { + "epoch": 0.28, + "learning_rate": 8.454058144676365e-08, + "logits/chosen": -3.104138135910034, + "logits/rejected": -3.123447895050049, + "logps/chosen": -112.65327453613281, + "logps/rejected": -329.87786865234375, + "loss": 0.3847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20284995436668396, + "rewards/margins": 0.8799358606338501, + "rewards/rejected": -0.6770858764648438, + "step": 150 + }, + { + "epoch": 0.28, + "learning_rate": 8.432026610816743e-08, + "logits/chosen": -3.1393113136291504, + "logits/rejected": -3.128964900970459, + "logps/chosen": -135.4465789794922, + "logps/rejected": -547.0640869140625, + "loss": 0.3898, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1959587037563324, + "rewards/margins": 1.338021159172058, + "rewards/rejected": -1.1420624256134033, + "step": 151 + }, + { + "epoch": 0.28, + "learning_rate": 8.409868350302774e-08, + "logits/chosen": -3.057896137237549, + "logits/rejected": -3.075503349304199, + "logps/chosen": -161.33297729492188, + "logps/rejected": -572.254638671875, + "loss": 0.3975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20335960388183594, + "rewards/margins": 1.4352383613586426, + "rewards/rejected": -1.2318787574768066, + "step": 152 + }, + { + "epoch": 0.29, + "learning_rate": 8.387584181322231e-08, + "logits/chosen": -3.0175719261169434, + "logits/rejected": -3.0427067279815674, + "logps/chosen": -157.06658935546875, + "logps/rejected": -640.1492309570312, + "loss": 0.4113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24829331040382385, + "rewards/margins": 1.7538925409317017, + "rewards/rejected": -1.5055992603302002, + "step": 153 + }, + { + "epoch": 0.29, + "learning_rate": 8.365174926712031e-08, + "logits/chosen": -3.1239688396453857, + "logits/rejected": -3.1347804069519043, + "logps/chosen": -127.88595581054688, + "logps/rejected": -460.97393798828125, + "loss": 0.3641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2217605710029602, + "rewards/margins": 1.2949097156524658, + "rewards/rejected": -1.0731490850448608, + "step": 154 + }, + { + "epoch": 0.29, + "learning_rate": 8.342641413927836e-08, + "logits/chosen": -3.119363307952881, + "logits/rejected": -3.1114578247070312, + "logps/chosen": -128.977783203125, + "logps/rejected": -493.179931640625, + "loss": 0.396, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20037461817264557, + "rewards/margins": 1.386695146560669, + "rewards/rejected": -1.1863205432891846, + "step": 155 + }, + { + "epoch": 0.29, + "learning_rate": 8.319984475013511e-08, + "logits/chosen": -3.128628730773926, + "logits/rejected": -3.084048271179199, + "logps/chosen": -134.41416931152344, + "logps/rejected": -382.800537109375, + "loss": 0.4389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26560288667678833, + "rewards/margins": 1.1294257640838623, + "rewards/rejected": -0.8638229370117188, + "step": 156 + }, + { + "epoch": 0.29, + "learning_rate": 8.297204946570397e-08, + "logits/chosen": -3.124553680419922, + "logits/rejected": -3.094208240509033, + "logps/chosen": -152.77996826171875, + "logps/rejected": -438.76580810546875, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2242988646030426, + "rewards/margins": 1.0927727222442627, + "rewards/rejected": -0.8684737682342529, + "step": 157 + }, + { + "epoch": 0.3, + "learning_rate": 8.274303669726426e-08, + "logits/chosen": -3.1274261474609375, + "logits/rejected": -3.1199870109558105, + "logps/chosen": -143.03759765625, + "logps/rejected": -461.93115234375, + "loss": 0.4249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17229652404785156, + "rewards/margins": 1.1714115142822266, + "rewards/rejected": -0.999114990234375, + "step": 158 + }, + { + "epoch": 0.3, + "learning_rate": 8.251281490105044e-08, + "logits/chosen": -3.07041072845459, + "logits/rejected": -3.0654239654541016, + "logps/chosen": -111.69403076171875, + "logps/rejected": -311.1728820800781, + "loss": 0.4208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24965591728687286, + "rewards/margins": 0.988555908203125, + "rewards/rejected": -0.7389000058174133, + "step": 159 + }, + { + "epoch": 0.3, + "learning_rate": 8.228139257794011e-08, + "logits/chosen": -3.0866289138793945, + "logits/rejected": -3.1681299209594727, + "logps/chosen": -121.09650421142578, + "logps/rejected": -418.9101867675781, + "loss": 0.4037, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15311890840530396, + "rewards/margins": 0.9386398792266846, + "rewards/rejected": -0.7855209112167358, + "step": 160 + }, + { + "epoch": 0.3, + "learning_rate": 8.204877827313996e-08, + "logits/chosen": -3.143423080444336, + "logits/rejected": -3.201051712036133, + "logps/chosen": -168.31689453125, + "logps/rejected": -531.93798828125, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2655372619628906, + "rewards/margins": 1.544690728187561, + "rewards/rejected": -1.2791534662246704, + "step": 161 + }, + { + "epoch": 0.3, + "learning_rate": 8.181498057587027e-08, + "logits/chosen": -3.009154796600342, + "logits/rejected": -3.07800555229187, + "logps/chosen": -144.35983276367188, + "logps/rejected": -393.1241760253906, + "loss": 0.3836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27937623858451843, + "rewards/margins": 0.9930267333984375, + "rewards/rejected": -0.7136505246162415, + "step": 162 + }, + { + "epoch": 0.31, + "learning_rate": 8.158000811904778e-08, + "logits/chosen": -3.1126770973205566, + "logits/rejected": -3.1358866691589355, + "logps/chosen": -128.4503173828125, + "logps/rejected": -352.627197265625, + "loss": 0.4117, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28469008207321167, + "rewards/margins": 0.9934929013252258, + "rewards/rejected": -0.7088028192520142, + "step": 163 + }, + { + "epoch": 0.31, + "learning_rate": 8.134386957896688e-08, + "logits/chosen": -3.0977401733398438, + "logits/rejected": -3.0735599994659424, + "logps/chosen": -156.75912475585938, + "logps/rejected": -363.0420837402344, + "loss": 0.3745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.275076299905777, + "rewards/margins": 1.0772919654846191, + "rewards/rejected": -0.802215576171875, + "step": 164 + }, + { + "epoch": 0.31, + "learning_rate": 8.110657367497931e-08, + "logits/chosen": -3.150848388671875, + "logits/rejected": -3.1347789764404297, + "logps/chosen": -109.29672241210938, + "logps/rejected": -451.596435546875, + "loss": 0.3644, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2485305815935135, + "rewards/margins": 1.4713006019592285, + "rewards/rejected": -1.2227699756622314, + "step": 165 + }, + { + "epoch": 0.31, + "learning_rate": 8.086812916917218e-08, + "logits/chosen": -3.0719268321990967, + "logits/rejected": -3.048916816711426, + "logps/chosen": -171.05020141601562, + "logps/rejected": -860.4120483398438, + "loss": 0.3573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2701801359653473, + "rewards/margins": 2.0843265056610107, + "rewards/rejected": -1.8141463994979858, + "step": 166 + }, + { + "epoch": 0.31, + "learning_rate": 8.062854486604434e-08, + "logits/chosen": -3.0874786376953125, + "logits/rejected": -3.0919647216796875, + "logps/chosen": -140.21209716796875, + "logps/rejected": -468.8741455078125, + "loss": 0.3866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1646263152360916, + "rewards/margins": 1.2207329273223877, + "rewards/rejected": -1.0561065673828125, + "step": 167 + }, + { + "epoch": 0.31, + "learning_rate": 8.038782961218136e-08, + "logits/chosen": -3.11396861076355, + "logits/rejected": -3.159646987915039, + "logps/chosen": -140.23974609375, + "logps/rejected": -565.9717407226562, + "loss": 0.3241, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13854217529296875, + "rewards/margins": 1.4974122047424316, + "rewards/rejected": -1.358870029449463, + "step": 168 + }, + { + "epoch": 0.32, + "learning_rate": 8.014599229592894e-08, + "logits/chosen": -3.0894038677215576, + "logits/rejected": -3.1218271255493164, + "logps/chosen": -138.22906494140625, + "logps/rejected": -385.6571350097656, + "loss": 0.4122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20462380349636078, + "rewards/margins": 1.1783115863800049, + "rewards/rejected": -0.9736877679824829, + "step": 169 + }, + { + "epoch": 0.32, + "learning_rate": 7.990304184706454e-08, + "logits/chosen": -3.032294750213623, + "logits/rejected": -3.0112080574035645, + "logps/chosen": -138.4254913330078, + "logps/rejected": -489.7154846191406, + "loss": 0.3587, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23791390657424927, + "rewards/margins": 1.3587009906768799, + "rewards/rejected": -1.1207870244979858, + "step": 170 + }, + { + "epoch": 0.32, + "learning_rate": 7.965898723646775e-08, + "logits/chosen": -3.0965795516967773, + "logits/rejected": -3.1196603775024414, + "logps/chosen": -178.02145385742188, + "logps/rejected": -670.2030029296875, + "loss": 0.3464, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17137451469898224, + "rewards/margins": 2.153146266937256, + "rewards/rejected": -1.9817719459533691, + "step": 171 + }, + { + "epoch": 0.32, + "learning_rate": 7.941383747578912e-08, + "logits/chosen": -3.0417559146881104, + "logits/rejected": -3.050403118133545, + "logps/chosen": -138.2800750732422, + "logps/rejected": -528.7360229492188, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.226939395070076, + "rewards/margins": 1.4341827630996704, + "rewards/rejected": -1.2072433233261108, + "step": 172 + }, + { + "epoch": 0.32, + "learning_rate": 7.91676016171172e-08, + "logits/chosen": -3.125019073486328, + "logits/rejected": -3.111180543899536, + "logps/chosen": -135.67886352539062, + "logps/rejected": -283.5468444824219, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1581367552280426, + "rewards/margins": 0.8059806823730469, + "rewards/rejected": -0.6478439569473267, + "step": 173 + }, + { + "epoch": 0.33, + "learning_rate": 7.89202887526445e-08, + "logits/chosen": -3.0502262115478516, + "logits/rejected": -3.096996784210205, + "logps/chosen": -163.48724365234375, + "logps/rejected": -446.4747009277344, + "loss": 0.3805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21134833991527557, + "rewards/margins": 1.1128208637237549, + "rewards/rejected": -0.9014724493026733, + "step": 174 + }, + { + "epoch": 0.33, + "learning_rate": 7.867190801433165e-08, + "logits/chosen": -3.0919034481048584, + "logits/rejected": -3.082836627960205, + "logps/chosen": -142.4695587158203, + "logps/rejected": -292.0237121582031, + "loss": 0.4329, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21006011962890625, + "rewards/margins": 0.7753769159317017, + "rewards/rejected": -0.5653167963027954, + "step": 175 + }, + { + "epoch": 0.33, + "learning_rate": 7.842246857357022e-08, + "logits/chosen": -3.120022773742676, + "logits/rejected": -3.1595611572265625, + "logps/chosen": -105.54061126708984, + "logps/rejected": -478.34600830078125, + "loss": 0.366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2542923092842102, + "rewards/margins": 1.3349777460098267, + "rewards/rejected": -1.0806854963302612, + "step": 176 + }, + { + "epoch": 0.33, + "learning_rate": 7.81719796408441e-08, + "logits/chosen": -2.9801721572875977, + "logits/rejected": -3.0193076133728027, + "logps/chosen": -120.82072448730469, + "logps/rejected": -548.0423583984375, + "loss": 0.3779, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20052489638328552, + "rewards/margins": 1.6665276288986206, + "rewards/rejected": -1.4660027027130127, + "step": 177 + }, + { + "epoch": 0.33, + "learning_rate": 7.792045046538939e-08, + "logits/chosen": -3.0578842163085938, + "logits/rejected": -3.0333991050720215, + "logps/chosen": -128.26905822753906, + "logps/rejected": -439.92486572265625, + "loss": 0.36, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22348670661449432, + "rewards/margins": 1.2647159099578857, + "rewards/rejected": -1.041229248046875, + "step": 178 + }, + { + "epoch": 0.34, + "learning_rate": 7.766789033485287e-08, + "logits/chosen": -3.0480527877807617, + "logits/rejected": -3.079054594039917, + "logps/chosen": -129.29696655273438, + "logps/rejected": -443.5643615722656, + "loss": 0.3506, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2144927978515625, + "rewards/margins": 1.135899305343628, + "rewards/rejected": -0.9214065670967102, + "step": 179 + }, + { + "epoch": 0.34, + "learning_rate": 7.741430857494902e-08, + "logits/chosen": -3.133478879928589, + "logits/rejected": -3.1139793395996094, + "logps/chosen": -146.3385009765625, + "logps/rejected": -683.3201904296875, + "loss": 0.3276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2718261778354645, + "rewards/margins": 2.5130157470703125, + "rewards/rejected": -2.241189479827881, + "step": 180 + }, + { + "epoch": 0.34, + "learning_rate": 7.715971454911576e-08, + "logits/chosen": -3.1502833366394043, + "logits/rejected": -3.1300578117370605, + "logps/chosen": -151.34323120117188, + "logps/rejected": -360.4054260253906, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24483871459960938, + "rewards/margins": 1.162651777267456, + "rewards/rejected": -0.9178131222724915, + "step": 181 + }, + { + "epoch": 0.34, + "learning_rate": 7.690411765816863e-08, + "logits/chosen": -3.026279926300049, + "logits/rejected": -3.0114760398864746, + "logps/chosen": -133.19659423828125, + "logps/rejected": -384.7335205078125, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27122271060943604, + "rewards/margins": 1.0533318519592285, + "rewards/rejected": -0.7821090817451477, + "step": 182 + }, + { + "epoch": 0.34, + "learning_rate": 7.66475273399537e-08, + "logits/chosen": -3.003805160522461, + "logits/rejected": -3.010894775390625, + "logps/chosen": -174.76341247558594, + "logps/rejected": -366.78070068359375, + "loss": 0.4027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19791793823242188, + "rewards/margins": 0.8336327075958252, + "rewards/rejected": -0.6357147693634033, + "step": 183 + }, + { + "epoch": 0.34, + "learning_rate": 7.638995306899907e-08, + "logits/chosen": -3.1363601684570312, + "logits/rejected": -3.1454434394836426, + "logps/chosen": -165.67306518554688, + "logps/rejected": -441.46905517578125, + "loss": 0.4187, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2907875180244446, + "rewards/margins": 1.2665879726409912, + "rewards/rejected": -0.9758003950119019, + "step": 184 + }, + { + "epoch": 0.35, + "learning_rate": 7.613140435616502e-08, + "logits/chosen": -3.0711264610290527, + "logits/rejected": -3.0201244354248047, + "logps/chosen": -134.23565673828125, + "logps/rejected": -318.7680969238281, + "loss": 0.4027, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27338409423828125, + "rewards/margins": 1.0023276805877686, + "rewards/rejected": -0.7289436459541321, + "step": 185 + }, + { + "epoch": 0.35, + "learning_rate": 7.587189074829284e-08, + "logits/chosen": -2.9982187747955322, + "logits/rejected": -3.0321502685546875, + "logps/chosen": -187.86297607421875, + "logps/rejected": -1057.730224609375, + "loss": 0.2778, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3316024839878082, + "rewards/margins": 3.1583938598632812, + "rewards/rejected": -2.826791524887085, + "step": 186 + }, + { + "epoch": 0.35, + "learning_rate": 7.561142182785233e-08, + "logits/chosen": -3.1959924697875977, + "logits/rejected": -3.1643824577331543, + "logps/chosen": -168.19232177734375, + "logps/rejected": -578.1363525390625, + "loss": 0.3054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23027116060256958, + "rewards/margins": 2.113675594329834, + "rewards/rejected": -1.8834044933319092, + "step": 187 + }, + { + "epoch": 0.35, + "learning_rate": 7.53500072125879e-08, + "logits/chosen": -3.128018379211426, + "logits/rejected": -3.074352979660034, + "logps/chosen": -130.65704345703125, + "logps/rejected": -561.9429321289062, + "loss": 0.4067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40116652846336365, + "rewards/margins": 1.7442193031311035, + "rewards/rejected": -1.343052625656128, + "step": 188 + }, + { + "epoch": 0.35, + "learning_rate": 7.508765655516357e-08, + "logits/chosen": -3.101489543914795, + "logits/rejected": -3.0862302780151367, + "logps/chosen": -120.83788299560547, + "logps/rejected": -563.9649658203125, + "loss": 0.3087, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2769813537597656, + "rewards/margins": 1.5209541320800781, + "rewards/rejected": -1.2439727783203125, + "step": 189 + }, + { + "epoch": 0.36, + "learning_rate": 7.482437954280635e-08, + "logits/chosen": -3.079007148742676, + "logits/rejected": -3.091247320175171, + "logps/chosen": -179.1421356201172, + "logps/rejected": -456.065185546875, + "loss": 0.3653, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3220718502998352, + "rewards/margins": 1.4695618152618408, + "rewards/rejected": -1.1474899053573608, + "step": 190 + }, + { + "epoch": 0.36, + "learning_rate": 7.456018589694873e-08, + "logits/chosen": -3.1029818058013916, + "logits/rejected": -3.083306312561035, + "logps/chosen": -116.45255279541016, + "logps/rejected": -335.4932861328125, + "loss": 0.3459, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23546981811523438, + "rewards/margins": 0.9613624811172485, + "rewards/rejected": -0.7258926630020142, + "step": 191 + }, + { + "epoch": 0.36, + "learning_rate": 7.429508537286962e-08, + "logits/chosen": -3.185397148132324, + "logits/rejected": -3.1348862648010254, + "logps/chosen": -143.47225952148438, + "logps/rejected": -648.783203125, + "loss": 0.2847, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28070297837257385, + "rewards/margins": 1.8738763332366943, + "rewards/rejected": -1.5931732654571533, + "step": 192 + }, + { + "epoch": 0.36, + "learning_rate": 7.402908775933419e-08, + "logits/chosen": -3.0737929344177246, + "logits/rejected": -3.0830910205841064, + "logps/chosen": -99.85086059570312, + "logps/rejected": -644.4613647460938, + "loss": 0.3774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22198373079299927, + "rewards/margins": 1.9104312658309937, + "rewards/rejected": -1.6884475946426392, + "step": 193 + }, + { + "epoch": 0.36, + "learning_rate": 7.376220287823236e-08, + "logits/chosen": -3.1346194744110107, + "logits/rejected": -3.1075453758239746, + "logps/chosen": -134.85079956054688, + "logps/rejected": -382.0467834472656, + "loss": 0.3683, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21582183241844177, + "rewards/margins": 1.1056045293807983, + "rewards/rejected": -0.889782726764679, + "step": 194 + }, + { + "epoch": 0.37, + "learning_rate": 7.349444058421619e-08, + "logits/chosen": -3.052872657775879, + "logits/rejected": -3.0430397987365723, + "logps/chosen": -99.17848205566406, + "logps/rejected": -324.671875, + "loss": 0.4311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19719086587429047, + "rewards/margins": 0.912251353263855, + "rewards/rejected": -0.7150604724884033, + "step": 195 + }, + { + "epoch": 0.37, + "learning_rate": 7.322581076433596e-08, + "logits/chosen": -3.1142826080322266, + "logits/rejected": -3.103149890899658, + "logps/chosen": -135.0367431640625, + "logps/rejected": -397.2926025390625, + "loss": 0.3694, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19353103637695312, + "rewards/margins": 1.1570823192596436, + "rewards/rejected": -0.9635512828826904, + "step": 196 + }, + { + "epoch": 0.37, + "learning_rate": 7.295632333767511e-08, + "logits/chosen": -3.0561118125915527, + "logits/rejected": -3.0701236724853516, + "logps/chosen": -143.77894592285156, + "logps/rejected": -488.31585693359375, + "loss": 0.3619, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24809953570365906, + "rewards/margins": 1.5931649208068848, + "rewards/rejected": -1.3450653553009033, + "step": 197 + }, + { + "epoch": 0.37, + "learning_rate": 7.2685988254984e-08, + "logits/chosen": -3.1441235542297363, + "logits/rejected": -3.182021379470825, + "logps/chosen": -111.35599517822266, + "logps/rejected": -320.8213806152344, + "loss": 0.3762, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26290664076805115, + "rewards/margins": 1.0056846141815186, + "rewards/rejected": -0.7427780628204346, + "step": 198 + }, + { + "epoch": 0.37, + "learning_rate": 7.241481549831243e-08, + "logits/chosen": -3.1249852180480957, + "logits/rejected": -3.0078768730163574, + "logps/chosen": -118.59858703613281, + "logps/rejected": -308.3047180175781, + "loss": 0.3554, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28826218843460083, + "rewards/margins": 1.0878288745880127, + "rewards/rejected": -0.7995666265487671, + "step": 199 + }, + { + "epoch": 0.37, + "learning_rate": 7.214281508064106e-08, + "logits/chosen": -3.1103806495666504, + "logits/rejected": -3.1423544883728027, + "logps/chosen": -119.29349517822266, + "logps/rejected": -387.6959533691406, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15581893920898438, + "rewards/margins": 0.9921013116836548, + "rewards/rejected": -0.8362823724746704, + "step": 200 + }, + { + "epoch": 0.38, + "learning_rate": 7.18699970455118e-08, + "logits/chosen": -3.1214535236358643, + "logits/rejected": -3.1330199241638184, + "logps/chosen": -182.51937866210938, + "logps/rejected": -622.013671875, + "loss": 0.3567, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28909531235694885, + "rewards/margins": 1.8827965259552002, + "rewards/rejected": -1.5937011241912842, + "step": 201 + }, + { + "epoch": 0.38, + "learning_rate": 7.15963714666568e-08, + "logits/chosen": -3.1520802974700928, + "logits/rejected": -3.1895439624786377, + "logps/chosen": -158.7707977294922, + "logps/rejected": -675.6400756835938, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33384743332862854, + "rewards/margins": 2.050158977508545, + "rewards/rejected": -1.7163116931915283, + "step": 202 + }, + { + "epoch": 0.38, + "learning_rate": 7.132194844762653e-08, + "logits/chosen": -3.1072258949279785, + "logits/rejected": -3.157679557800293, + "logps/chosen": -166.12222290039062, + "logps/rejected": -509.66375732421875, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14273911714553833, + "rewards/margins": 1.3185653686523438, + "rewards/rejected": -1.1758263111114502, + "step": 203 + }, + { + "epoch": 0.38, + "learning_rate": 7.104673812141675e-08, + "logits/chosen": -3.1887729167938232, + "logits/rejected": -3.2023355960845947, + "logps/chosen": -126.24608612060547, + "logps/rejected": -542.4002075195312, + "loss": 0.3321, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24906998872756958, + "rewards/margins": 1.7065361738204956, + "rewards/rejected": -1.4574661254882812, + "step": 204 + }, + { + "epoch": 0.38, + "learning_rate": 7.077075065009432e-08, + "logits/chosen": -3.196582794189453, + "logits/rejected": -3.234156370162964, + "logps/chosen": -142.7355194091797, + "logps/rejected": -706.07666015625, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4345497190952301, + "rewards/margins": 2.7163033485412598, + "rewards/rejected": -2.2817535400390625, + "step": 205 + }, + { + "epoch": 0.39, + "learning_rate": 7.049399622442198e-08, + "logits/chosen": -3.012101173400879, + "logits/rejected": -3.041718006134033, + "logps/chosen": -163.47854614257812, + "logps/rejected": -327.2855224609375, + "loss": 0.3895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1968032866716385, + "rewards/margins": 0.7822052240371704, + "rewards/rejected": -0.5854018926620483, + "step": 206 + }, + { + "epoch": 0.39, + "learning_rate": 7.021648506348203e-08, + "logits/chosen": -3.1372761726379395, + "logits/rejected": -3.11433482170105, + "logps/chosen": -138.8792724609375, + "logps/rejected": -449.33294677734375, + "loss": 0.3091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2484787106513977, + "rewards/margins": 1.496089220046997, + "rewards/rejected": -1.2476104497909546, + "step": 207 + }, + { + "epoch": 0.39, + "learning_rate": 6.993822741429905e-08, + "logits/chosen": -3.113503932952881, + "logits/rejected": -3.109076738357544, + "logps/chosen": -96.64332580566406, + "logps/rejected": -286.7460632324219, + "loss": 0.3975, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26781922578811646, + "rewards/margins": 0.9517158269882202, + "rewards/rejected": -0.6838966608047485, + "step": 208 + }, + { + "epoch": 0.39, + "learning_rate": 6.965923355146148e-08, + "logits/chosen": -3.13552188873291, + "logits/rejected": -3.1185572147369385, + "logps/chosen": -135.99850463867188, + "logps/rejected": -465.9122314453125, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18137627840042114, + "rewards/margins": 1.4905705451965332, + "rewards/rejected": -1.3091942071914673, + "step": 209 + }, + { + "epoch": 0.39, + "learning_rate": 6.93795137767422e-08, + "logits/chosen": -3.1855292320251465, + "logits/rejected": -3.1403372287750244, + "logps/chosen": -148.40907287597656, + "logps/rejected": -473.2942810058594, + "loss": 0.3276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3238334655761719, + "rewards/margins": 1.4347343444824219, + "rewards/rejected": -1.11090087890625, + "step": 210 + }, + { + "epoch": 0.4, + "learning_rate": 6.909907841871829e-08, + "logits/chosen": -3.0895376205444336, + "logits/rejected": -3.0891776084899902, + "logps/chosen": -205.66014099121094, + "logps/rejected": -578.239013671875, + "loss": 0.3296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39497798681259155, + "rewards/margins": 1.9324185848236084, + "rewards/rejected": -1.537440538406372, + "step": 211 + }, + { + "epoch": 0.4, + "learning_rate": 6.881793783238947e-08, + "logits/chosen": -3.0858983993530273, + "logits/rejected": -3.075383186340332, + "logps/chosen": -138.259033203125, + "logps/rejected": -260.78961181640625, + "loss": 0.3685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3107612729072571, + "rewards/margins": 1.1723682880401611, + "rewards/rejected": -0.8616069555282593, + "step": 212 + }, + { + "epoch": 0.4, + "learning_rate": 6.853610239879584e-08, + "logits/chosen": -3.076812505722046, + "logits/rejected": -3.087517499923706, + "logps/chosen": -110.74034118652344, + "logps/rejected": -408.36944580078125, + "loss": 0.2953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3056327998638153, + "rewards/margins": 1.560187578201294, + "rewards/rejected": -1.2545547485351562, + "step": 213 + }, + { + "epoch": 0.4, + "learning_rate": 6.82535825246346e-08, + "logits/chosen": -3.0803065299987793, + "logits/rejected": -3.0744991302490234, + "logps/chosen": -128.37149047851562, + "logps/rejected": -450.6481018066406, + "loss": 0.3267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24122773110866547, + "rewards/margins": 1.2830588817596436, + "rewards/rejected": -1.0418312549591064, + "step": 214 + }, + { + "epoch": 0.4, + "learning_rate": 6.797038864187564e-08, + "logits/chosen": -3.092517137527466, + "logits/rejected": -3.0764334201812744, + "logps/chosen": -120.40005493164062, + "logps/rejected": -371.488525390625, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23061561584472656, + "rewards/margins": 1.2288944721221924, + "rewards/rejected": -0.9982788562774658, + "step": 215 + }, + { + "epoch": 0.4, + "learning_rate": 6.768653120737652e-08, + "logits/chosen": -3.0641653537750244, + "logits/rejected": -3.037285566329956, + "logps/chosen": -143.79818725585938, + "logps/rejected": -365.3166809082031, + "loss": 0.3521, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07943115383386612, + "rewards/margins": 1.1224174499511719, + "rewards/rejected": -1.0429863929748535, + "step": 216 + }, + { + "epoch": 0.41, + "learning_rate": 6.740202070249621e-08, + "logits/chosen": -3.1340785026550293, + "logits/rejected": -3.1113715171813965, + "logps/chosen": -185.7567138671875, + "logps/rejected": -406.3802185058594, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27201730012893677, + "rewards/margins": 1.3650639057159424, + "rewards/rejected": -1.0930466651916504, + "step": 217 + }, + { + "epoch": 0.41, + "learning_rate": 6.711686763270817e-08, + "logits/chosen": -3.1108651161193848, + "logits/rejected": -3.041684627532959, + "logps/chosen": -138.54278564453125, + "logps/rejected": -319.34698486328125, + "loss": 0.3449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3608253598213196, + "rewards/margins": 1.1173515319824219, + "rewards/rejected": -0.7565262317657471, + "step": 218 + }, + { + "epoch": 0.41, + "learning_rate": 6.683108252721238e-08, + "logits/chosen": -3.1108598709106445, + "logits/rejected": -3.130713939666748, + "logps/chosen": -164.52552795410156, + "logps/rejected": -591.65576171875, + "loss": 0.3352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2514396607875824, + "rewards/margins": 2.042208194732666, + "rewards/rejected": -1.7907685041427612, + "step": 219 + }, + { + "epoch": 0.41, + "learning_rate": 6.654467593854656e-08, + "logits/chosen": -3.105079174041748, + "logits/rejected": -3.082455635070801, + "logps/chosen": -151.17462158203125, + "logps/rejected": -354.5472412109375, + "loss": 0.3058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28347283601760864, + "rewards/margins": 1.185598373413086, + "rewards/rejected": -0.9021255970001221, + "step": 220 + }, + { + "epoch": 0.41, + "learning_rate": 6.62576584421965e-08, + "logits/chosen": -3.158926010131836, + "logits/rejected": -3.1493148803710938, + "logps/chosen": -168.6816864013672, + "logps/rejected": -613.63916015625, + "loss": 0.295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.268087774515152, + "rewards/margins": 2.105883836746216, + "rewards/rejected": -1.8377959728240967, + "step": 221 + }, + { + "epoch": 0.42, + "learning_rate": 6.597004063620567e-08, + "logits/chosen": -3.0890145301818848, + "logits/rejected": -3.1202235221862793, + "logps/chosen": -136.68333435058594, + "logps/rejected": -346.3002014160156, + "loss": 0.3833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2608833312988281, + "rewards/margins": 1.092108964920044, + "rewards/rejected": -0.8312256336212158, + "step": 222 + }, + { + "epoch": 0.42, + "learning_rate": 6.568183314078377e-08, + "logits/chosen": -3.1143221855163574, + "logits/rejected": -3.103496789932251, + "logps/chosen": -104.32266235351562, + "logps/rejected": -359.3351135253906, + "loss": 0.3093, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2735714018344879, + "rewards/margins": 1.1769360303878784, + "rewards/rejected": -0.9033645391464233, + "step": 223 + }, + { + "epoch": 0.42, + "learning_rate": 6.539304659791455e-08, + "logits/chosen": -3.070297956466675, + "logits/rejected": -3.146482467651367, + "logps/chosen": -118.19391632080078, + "logps/rejected": -415.15008544921875, + "loss": 0.3242, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.372427374124527, + "rewards/margins": 1.4952789545059204, + "rewards/rejected": -1.1228516101837158, + "step": 224 + }, + { + "epoch": 0.42, + "learning_rate": 6.510369167096307e-08, + "logits/chosen": -3.1022181510925293, + "logits/rejected": -3.093017101287842, + "logps/chosen": -186.8883056640625, + "logps/rejected": -660.1473999023438, + "loss": 0.3207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3798721432685852, + "rewards/margins": 2.147749423980713, + "rewards/rejected": -1.7678773403167725, + "step": 225 + }, + { + "epoch": 0.42, + "learning_rate": 6.48137790442817e-08, + "logits/chosen": -3.1240806579589844, + "logits/rejected": -3.1490426063537598, + "logps/chosen": -142.44540405273438, + "logps/rejected": -455.0549621582031, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23443222045898438, + "rewards/margins": 1.5128593444824219, + "rewards/rejected": -1.2784271240234375, + "step": 226 + }, + { + "epoch": 0.43, + "learning_rate": 6.452331942281579e-08, + "logits/chosen": -3.168149471282959, + "logits/rejected": -3.1612815856933594, + "logps/chosen": -142.93612670898438, + "logps/rejected": -437.5288391113281, + "loss": 0.3422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1619010865688324, + "rewards/margins": 1.4272277355194092, + "rewards/rejected": -1.265326738357544, + "step": 227 + }, + { + "epoch": 0.43, + "learning_rate": 6.423232353170829e-08, + "logits/chosen": -3.101463794708252, + "logits/rejected": -3.0379104614257812, + "logps/chosen": -196.18170166015625, + "logps/rejected": -361.72357177734375, + "loss": 0.3181, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32583847641944885, + "rewards/margins": 1.350797414779663, + "rewards/rejected": -1.024958848953247, + "step": 228 + }, + { + "epoch": 0.43, + "learning_rate": 6.39408021159038e-08, + "logits/chosen": -3.1531615257263184, + "logits/rejected": -3.131930351257324, + "logps/chosen": -146.54295349121094, + "logps/rejected": -641.0851440429688, + "loss": 0.3279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34445494413375854, + "rewards/margins": 2.227456569671631, + "rewards/rejected": -1.8830018043518066, + "step": 229 + }, + { + "epoch": 0.43, + "learning_rate": 6.364876593975173e-08, + "logits/chosen": -3.144228458404541, + "logits/rejected": -3.1195549964904785, + "logps/chosen": -139.59925842285156, + "logps/rejected": -439.294921875, + "loss": 0.2954, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16955414414405823, + "rewards/margins": 1.258039951324463, + "rewards/rejected": -1.0884857177734375, + "step": 230 + }, + { + "epoch": 0.43, + "learning_rate": 6.335622578660888e-08, + "logits/chosen": -3.142523765563965, + "logits/rejected": -3.1508922576904297, + "logps/chosen": -128.2875213623047, + "logps/rejected": -525.2568359375, + "loss": 0.3058, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13116303086280823, + "rewards/margins": 2.138741970062256, + "rewards/rejected": -2.0075790882110596, + "step": 231 + }, + { + "epoch": 0.43, + "learning_rate": 6.306319245844133e-08, + "logits/chosen": -3.091243267059326, + "logits/rejected": -3.1328301429748535, + "logps/chosen": -164.3925323486328, + "logps/rejected": -759.9993286132812, + "loss": 0.2692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2816692590713501, + "rewards/margins": 2.579444408416748, + "rewards/rejected": -2.2977752685546875, + "step": 232 + }, + { + "epoch": 0.44, + "learning_rate": 6.276967677542542e-08, + "logits/chosen": -3.119192123413086, + "logits/rejected": -3.044914960861206, + "logps/chosen": -135.23504638671875, + "logps/rejected": -340.87908935546875, + "loss": 0.3312, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2042236328125, + "rewards/margins": 1.1593308448791504, + "rewards/rejected": -0.9551071524620056, + "step": 233 + }, + { + "epoch": 0.44, + "learning_rate": 6.247568957554839e-08, + "logits/chosen": -3.1228013038635254, + "logits/rejected": -3.132256031036377, + "logps/chosen": -137.23428344726562, + "logps/rejected": -348.2335205078125, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22653961181640625, + "rewards/margins": 1.1850006580352783, + "rewards/rejected": -0.9584609866142273, + "step": 234 + }, + { + "epoch": 0.44, + "learning_rate": 6.218124171420805e-08, + "logits/chosen": -3.1718199253082275, + "logits/rejected": -3.1417489051818848, + "logps/chosen": -135.6736297607422, + "logps/rejected": -511.5364074707031, + "loss": 0.306, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25927314162254333, + "rewards/margins": 1.9358235597610474, + "rewards/rejected": -1.6765503883361816, + "step": 235 + }, + { + "epoch": 0.44, + "learning_rate": 6.188634406381205e-08, + "logits/chosen": -3.1062068939208984, + "logits/rejected": -3.1139142513275146, + "logps/chosen": -129.48782348632812, + "logps/rejected": -510.5879211425781, + "loss": 0.3008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39607200026512146, + "rewards/margins": 1.681966781616211, + "rewards/rejected": -1.2858948707580566, + "step": 236 + }, + { + "epoch": 0.44, + "learning_rate": 6.159100751337641e-08, + "logits/chosen": -3.163496255874634, + "logits/rejected": -3.140267848968506, + "logps/chosen": -150.8787384033203, + "logps/rejected": -486.416259765625, + "loss": 0.3746, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2581859529018402, + "rewards/margins": 1.7486554384231567, + "rewards/rejected": -1.4904694557189941, + "step": 237 + }, + { + "epoch": 0.45, + "learning_rate": 6.129524296812333e-08, + "logits/chosen": -3.161357879638672, + "logits/rejected": -3.1613545417785645, + "logps/chosen": -125.16285705566406, + "logps/rejected": -431.1656188964844, + "loss": 0.2937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23663941025733948, + "rewards/margins": 1.3522675037384033, + "rewards/rejected": -1.1156281232833862, + "step": 238 + }, + { + "epoch": 0.45, + "learning_rate": 6.099906134907867e-08, + "logits/chosen": -3.163001537322998, + "logits/rejected": -3.1014328002929688, + "logps/chosen": -136.07827758789062, + "logps/rejected": -269.36328125, + "loss": 0.3509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4332634210586548, + "rewards/margins": 1.1173042058944702, + "rewards/rejected": -0.6840408444404602, + "step": 239 + }, + { + "epoch": 0.45, + "learning_rate": 6.070247359266859e-08, + "logits/chosen": -3.1244120597839355, + "logits/rejected": -3.1214523315429688, + "logps/chosen": -156.64736938476562, + "logps/rejected": -398.07440185546875, + "loss": 0.3667, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3276428282260895, + "rewards/margins": 1.5351409912109375, + "rewards/rejected": -1.2074981927871704, + "step": 240 + }, + { + "epoch": 0.45, + "learning_rate": 6.04054906503158e-08, + "logits/chosen": -3.101978302001953, + "logits/rejected": -3.064969539642334, + "logps/chosen": -158.0201873779297, + "logps/rejected": -412.8946533203125, + "loss": 0.3007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2558891475200653, + "rewards/margins": 1.5050957202911377, + "rewards/rejected": -1.24920654296875, + "step": 241 + }, + { + "epoch": 0.45, + "learning_rate": 6.010812348803509e-08, + "logits/chosen": -3.129756212234497, + "logits/rejected": -3.1065921783447266, + "logps/chosen": -140.70352172851562, + "logps/rejected": -324.84600830078125, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2932876646518707, + "rewards/margins": 1.076655626296997, + "rewards/rejected": -0.783367931842804, + "step": 242 + }, + { + "epoch": 0.46, + "learning_rate": 5.981038308602852e-08, + "logits/chosen": -3.06575870513916, + "logits/rejected": -3.068394660949707, + "logps/chosen": -132.2380828857422, + "logps/rejected": -595.5369873046875, + "loss": 0.3184, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2903503477573395, + "rewards/margins": 2.076498508453369, + "rewards/rejected": -1.7861480712890625, + "step": 243 + }, + { + "epoch": 0.46, + "learning_rate": 5.951228043827991e-08, + "logits/chosen": -3.116058826446533, + "logits/rejected": -3.0610275268554688, + "logps/chosen": -189.00509643554688, + "logps/rejected": -427.5319519042969, + "loss": 0.3277, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3974548578262329, + "rewards/margins": 1.3779220581054688, + "rewards/rejected": -0.9804672598838806, + "step": 244 + }, + { + "epoch": 0.46, + "learning_rate": 5.921382655214888e-08, + "logits/chosen": -3.1253342628479004, + "logits/rejected": -3.092487335205078, + "logps/chosen": -140.83905029296875, + "logps/rejected": -410.1316833496094, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3801593780517578, + "rewards/margins": 1.3683620691299438, + "rewards/rejected": -0.988202691078186, + "step": 245 + }, + { + "epoch": 0.46, + "learning_rate": 5.891503244796448e-08, + "logits/chosen": -3.123101234436035, + "logits/rejected": -3.1158649921417236, + "logps/chosen": -169.63571166992188, + "logps/rejected": -553.3143920898438, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3703598082065582, + "rewards/margins": 1.783512830734253, + "rewards/rejected": -1.4131531715393066, + "step": 246 + }, + { + "epoch": 0.46, + "learning_rate": 5.861590915861816e-08, + "logits/chosen": -3.072474241256714, + "logits/rejected": -3.0962040424346924, + "logps/chosen": -164.10443115234375, + "logps/rejected": -429.1443786621094, + "loss": 0.3266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32720106840133667, + "rewards/margins": 1.3862602710723877, + "rewards/rejected": -1.0590591430664062, + "step": 247 + }, + { + "epoch": 0.46, + "learning_rate": 5.8316467729156505e-08, + "logits/chosen": -3.106112480163574, + "logits/rejected": -3.0929336547851562, + "logps/chosen": -141.26922607421875, + "logps/rejected": -348.0220947265625, + "loss": 0.3798, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22937354445457458, + "rewards/margins": 1.2022618055343628, + "rewards/rejected": -0.9728882312774658, + "step": 248 + }, + { + "epoch": 0.47, + "learning_rate": 5.801671921637328e-08, + "logits/chosen": -3.1674907207489014, + "logits/rejected": -3.1161913871765137, + "logps/chosen": -166.6644287109375, + "logps/rejected": -392.75244140625, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2807350158691406, + "rewards/margins": 1.2220207452774048, + "rewards/rejected": -0.9412857294082642, + "step": 249 + }, + { + "epoch": 0.47, + "learning_rate": 5.771667468840128e-08, + "logits/chosen": -3.1209561824798584, + "logits/rejected": -3.0619754791259766, + "logps/chosen": -145.10382080078125, + "logps/rejected": -376.15374755859375, + "loss": 0.3141, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30258598923683167, + "rewards/margins": 1.3692653179168701, + "rewards/rejected": -1.0666794776916504, + "step": 250 + }, + { + "epoch": 0.47, + "learning_rate": 5.741634522430352e-08, + "logits/chosen": -3.1442203521728516, + "logits/rejected": -3.1543092727661133, + "logps/chosen": -142.92950439453125, + "logps/rejected": -382.5140686035156, + "loss": 0.2796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22719725966453552, + "rewards/margins": 1.416499376296997, + "rewards/rejected": -1.1893020868301392, + "step": 251 + }, + { + "epoch": 0.47, + "learning_rate": 5.711574191366426e-08, + "logits/chosen": -3.0394604206085205, + "logits/rejected": -2.970064640045166, + "logps/chosen": -173.89871215820312, + "logps/rejected": -495.28155517578125, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48568612337112427, + "rewards/margins": 2.042867660522461, + "rewards/rejected": -1.5571815967559814, + "step": 252 + }, + { + "epoch": 0.47, + "learning_rate": 5.6814875856179414e-08, + "logits/chosen": -3.044299364089966, + "logits/rejected": -3.1814165115356445, + "logps/chosen": -151.98924255371094, + "logps/rejected": -921.1364135742188, + "loss": 0.2902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41892778873443604, + "rewards/margins": 3.1185386180877686, + "rewards/rejected": -2.699610948562622, + "step": 253 + }, + { + "epoch": 0.48, + "learning_rate": 5.6513758161246785e-08, + "logits/chosen": -3.1681160926818848, + "logits/rejected": -3.0784568786621094, + "logps/chosen": -136.4245147705078, + "logps/rejected": -714.2351684570312, + "loss": 0.293, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2718299925327301, + "rewards/margins": 2.572472333908081, + "rewards/rejected": -2.300642490386963, + "step": 254 + }, + { + "epoch": 0.48, + "learning_rate": 5.6212399947555825e-08, + "logits/chosen": -3.0914225578308105, + "logits/rejected": -3.061450481414795, + "logps/chosen": -140.21926879882812, + "logps/rejected": -605.5050048828125, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4397850036621094, + "rewards/margins": 2.1181421279907227, + "rewards/rejected": -1.6783570051193237, + "step": 255 + }, + { + "epoch": 0.48, + "learning_rate": 5.591081234267706e-08, + "logits/chosen": -3.172306537628174, + "logits/rejected": -3.1324682235717773, + "logps/chosen": -141.90585327148438, + "logps/rejected": -470.60784912109375, + "loss": 0.301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29474562406539917, + "rewards/margins": 1.5604424476623535, + "rewards/rejected": -1.2656967639923096, + "step": 256 + }, + { + "epoch": 0.48, + "learning_rate": 5.560900648265123e-08, + "logits/chosen": -3.1554484367370605, + "logits/rejected": -3.046628475189209, + "logps/chosen": -196.07550048828125, + "logps/rejected": -403.66510009765625, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4931495785713196, + "rewards/margins": 1.7780570983886719, + "rewards/rejected": -1.284907579421997, + "step": 257 + }, + { + "epoch": 0.48, + "learning_rate": 5.530699351157809e-08, + "logits/chosen": -3.1738319396972656, + "logits/rejected": -3.177121877670288, + "logps/chosen": -140.8791046142578, + "logps/rejected": -644.3237915039062, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.305951327085495, + "rewards/margins": 2.4782748222351074, + "rewards/rejected": -2.172323703765869, + "step": 258 + }, + { + "epoch": 0.49, + "learning_rate": 5.5004784581204924e-08, + "logits/chosen": -3.186185359954834, + "logits/rejected": -3.1230111122131348, + "logps/chosen": -155.61727905273438, + "logps/rejected": -445.3653259277344, + "loss": 0.2807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42627832293510437, + "rewards/margins": 1.912313461303711, + "rewards/rejected": -1.4860352277755737, + "step": 259 + }, + { + "epoch": 0.49, + "learning_rate": 5.4702390850514723e-08, + "logits/chosen": -3.1736817359924316, + "logits/rejected": -3.1943812370300293, + "logps/chosen": -171.30963134765625, + "logps/rejected": -575.035888671875, + "loss": 0.3053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4399665892124176, + "rewards/margins": 2.1365578174591064, + "rewards/rejected": -1.6965912580490112, + "step": 260 + }, + { + "epoch": 0.49, + "learning_rate": 5.439982348531422e-08, + "logits/chosen": -3.1041030883789062, + "logits/rejected": -3.1370058059692383, + "logps/chosen": -154.61663818359375, + "logps/rejected": -485.2511901855469, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3796340823173523, + "rewards/margins": 2.1149306297302246, + "rewards/rejected": -1.7352967262268066, + "step": 261 + }, + { + "epoch": 0.49, + "learning_rate": 5.4097093657821534e-08, + "logits/chosen": -3.1458728313446045, + "logits/rejected": -3.1630077362060547, + "logps/chosen": -156.20660400390625, + "logps/rejected": -704.0216064453125, + "loss": 0.3017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31738588213920593, + "rewards/margins": 2.4426088333129883, + "rewards/rejected": -2.125222682952881, + "step": 262 + }, + { + "epoch": 0.49, + "learning_rate": 5.379421254625366e-08, + "logits/chosen": -3.0824220180511475, + "logits/rejected": -3.0985107421875, + "logps/chosen": -136.31771850585938, + "logps/rejected": -434.5936279296875, + "loss": 0.3155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21696090698242188, + "rewards/margins": 1.2355735301971436, + "rewards/rejected": -1.0186126232147217, + "step": 263 + }, + { + "epoch": 0.49, + "learning_rate": 5.3491191334413746e-08, + "logits/chosen": -3.112506866455078, + "logits/rejected": -3.0737061500549316, + "logps/chosen": -133.01156616210938, + "logps/rejected": -396.33660888671875, + "loss": 0.3016, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19733314216136932, + "rewards/margins": 1.7202030420303345, + "rewards/rejected": -1.5228699445724487, + "step": 264 + }, + { + "epoch": 0.5, + "learning_rate": 5.3188041211278064e-08, + "logits/chosen": -3.0331995487213135, + "logits/rejected": -3.0386219024658203, + "logps/chosen": -151.9534912109375, + "logps/rejected": -436.09991455078125, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2728218138217926, + "rewards/margins": 1.4045768976211548, + "rewards/rejected": -1.1317551136016846, + "step": 265 + }, + { + "epoch": 0.5, + "learning_rate": 5.288477337058293e-08, + "logits/chosen": -2.976478099822998, + "logits/rejected": -3.066014289855957, + "logps/chosen": -164.824951171875, + "logps/rejected": -372.7054443359375, + "loss": 0.3928, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2628135681152344, + "rewards/margins": 1.4254403114318848, + "rewards/rejected": -1.1626267433166504, + "step": 266 + }, + { + "epoch": 0.5, + "learning_rate": 5.258139901041131e-08, + "logits/chosen": -3.1225452423095703, + "logits/rejected": -3.1018643379211426, + "logps/chosen": -111.48138427734375, + "logps/rejected": -455.86102294921875, + "loss": 0.3229, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31571847200393677, + "rewards/margins": 2.098938465118408, + "rewards/rejected": -1.7832199335098267, + "step": 267 + }, + { + "epoch": 0.5, + "learning_rate": 5.2277929332779426e-08, + "logits/chosen": -3.0763697624206543, + "logits/rejected": -3.0451064109802246, + "logps/chosen": -116.99169158935547, + "logps/rejected": -250.89535522460938, + "loss": 0.3191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2829681634902954, + "rewards/margins": 1.123591661453247, + "rewards/rejected": -0.8406234979629517, + "step": 268 + }, + { + "epoch": 0.5, + "learning_rate": 5.197437554322304e-08, + "logits/chosen": -3.0944433212280273, + "logits/rejected": -3.069451332092285, + "logps/chosen": -118.46003723144531, + "logps/rejected": -322.4015197753906, + "loss": 0.3493, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27131080627441406, + "rewards/margins": 1.2326068878173828, + "rewards/rejected": -0.9612960815429688, + "step": 269 + }, + { + "epoch": 0.51, + "learning_rate": 5.167074885038373e-08, + "logits/chosen": -3.1410322189331055, + "logits/rejected": -3.163437843322754, + "logps/chosen": -130.92222595214844, + "logps/rejected": -456.9173278808594, + "loss": 0.3129, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3266046643257141, + "rewards/margins": 1.8727747201919556, + "rewards/rejected": -1.5461699962615967, + "step": 270 + }, + { + "epoch": 0.51, + "learning_rate": 5.1367060465595004e-08, + "logits/chosen": -3.0930557250976562, + "logits/rejected": -3.0882582664489746, + "logps/chosen": -157.79489135742188, + "logps/rejected": -762.768310546875, + "loss": 0.3025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43711090087890625, + "rewards/margins": 2.6628830432891846, + "rewards/rejected": -2.2257721424102783, + "step": 271 + }, + { + "epoch": 0.51, + "learning_rate": 5.1063321602468335e-08, + "logits/chosen": -3.0951082706451416, + "logits/rejected": -3.0877013206481934, + "logps/chosen": -144.70065307617188, + "logps/rejected": -697.2410888671875, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1748981475830078, + "rewards/margins": 2.471846342086792, + "rewards/rejected": -2.296948194503784, + "step": 272 + }, + { + "epoch": 0.51, + "learning_rate": 5.0759543476479084e-08, + "logits/chosen": -3.148818254470825, + "logits/rejected": -3.122547149658203, + "logps/chosen": -151.7786102294922, + "logps/rejected": -462.8675842285156, + "loss": 0.2889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29236945509910583, + "rewards/margins": 1.6972461938858032, + "rewards/rejected": -1.404876708984375, + "step": 273 + }, + { + "epoch": 0.51, + "learning_rate": 5.04557373045524e-08, + "logits/chosen": -3.1038732528686523, + "logits/rejected": -3.1001548767089844, + "logps/chosen": -179.53855895996094, + "logps/rejected": -293.78460693359375, + "loss": 0.3123, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4334823489189148, + "rewards/margins": 1.2794969081878662, + "rewards/rejected": -0.8460144400596619, + "step": 274 + }, + { + "epoch": 0.51, + "learning_rate": 5.0151914304649015e-08, + "logits/chosen": -3.096680164337158, + "logits/rejected": -3.1630146503448486, + "logps/chosen": -108.13325500488281, + "logps/rejected": -568.0257568359375, + "loss": 0.2436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2921428680419922, + "rewards/margins": 2.163182497024536, + "rewards/rejected": -1.871039628982544, + "step": 275 + }, + { + "epoch": 0.52, + "learning_rate": 4.9848085695351007e-08, + "logits/chosen": -3.115494728088379, + "logits/rejected": -3.0757431983947754, + "logps/chosen": -172.82159423828125, + "logps/rejected": -527.684814453125, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3431946039199829, + "rewards/margins": 1.5848815441131592, + "rewards/rejected": -1.2416870594024658, + "step": 276 + }, + { + "epoch": 0.52, + "learning_rate": 4.95442626954476e-08, + "logits/chosen": -3.069180965423584, + "logits/rejected": -3.0552735328674316, + "logps/chosen": -144.98995971679688, + "logps/rejected": -368.0321044921875, + "loss": 0.3409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34591445326805115, + "rewards/margins": 1.4150123596191406, + "rewards/rejected": -1.069097876548767, + "step": 277 + }, + { + "epoch": 0.52, + "learning_rate": 4.924045652352092e-08, + "logits/chosen": -3.1503467559814453, + "logits/rejected": -3.1155872344970703, + "logps/chosen": -172.855224609375, + "logps/rejected": -487.1414794921875, + "loss": 0.2823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4372665286064148, + "rewards/margins": 2.0535354614257812, + "rewards/rejected": -1.6162688732147217, + "step": 278 + }, + { + "epoch": 0.52, + "learning_rate": 4.8936678397531673e-08, + "logits/chosen": -3.1027672290802, + "logits/rejected": -3.1303319931030273, + "logps/chosen": -137.09471130371094, + "logps/rejected": -495.74566650390625, + "loss": 0.3479, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35378000140190125, + "rewards/margins": 2.1402852535247803, + "rewards/rejected": -1.7865052223205566, + "step": 279 + }, + { + "epoch": 0.52, + "learning_rate": 4.8632939534405e-08, + "logits/chosen": -3.1202752590179443, + "logits/rejected": -3.092174768447876, + "logps/chosen": -150.42909240722656, + "logps/rejected": -220.7249298095703, + "loss": 0.3696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2715034484863281, + "rewards/margins": 0.7635795474052429, + "rewards/rejected": -0.4920760989189148, + "step": 280 + }, + { + "epoch": 0.53, + "learning_rate": 4.8329251149616286e-08, + "logits/chosen": -3.148146629333496, + "logits/rejected": -3.134969711303711, + "logps/chosen": -113.51377868652344, + "logps/rejected": -471.7474670410156, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29947471618652344, + "rewards/margins": 1.73821222782135, + "rewards/rejected": -1.4387375116348267, + "step": 281 + }, + { + "epoch": 0.53, + "learning_rate": 4.802562445677696e-08, + "logits/chosen": -3.128572940826416, + "logits/rejected": -3.118596076965332, + "logps/chosen": -117.50666809082031, + "logps/rejected": -509.229248046875, + "loss": 0.2813, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29893073439598083, + "rewards/margins": 1.7493870258331299, + "rewards/rejected": -1.4504562616348267, + "step": 282 + }, + { + "epoch": 0.53, + "learning_rate": 4.772207066722056e-08, + "logits/chosen": -3.1444993019104004, + "logits/rejected": -3.130740165710449, + "logps/chosen": -161.92103576660156, + "logps/rejected": -456.974853515625, + "loss": 0.2699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28662949800491333, + "rewards/margins": 1.9163520336151123, + "rewards/rejected": -1.6297225952148438, + "step": 283 + }, + { + "epoch": 0.53, + "learning_rate": 4.741860098958869e-08, + "logits/chosen": -3.1082632541656494, + "logits/rejected": -3.1507411003112793, + "logps/chosen": -131.65426635742188, + "logps/rejected": -654.4825439453125, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3667144775390625, + "rewards/margins": 2.8708388805389404, + "rewards/rejected": -2.504124402999878, + "step": 284 + }, + { + "epoch": 0.53, + "learning_rate": 4.711522662941707e-08, + "logits/chosen": -3.0644149780273438, + "logits/rejected": -3.073392629623413, + "logps/chosen": -145.86480712890625, + "logps/rejected": -392.6314697265625, + "loss": 0.2877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2715950012207031, + "rewards/margins": 1.5566978454589844, + "rewards/rejected": -1.2851028442382812, + "step": 285 + }, + { + "epoch": 0.54, + "learning_rate": 4.681195878872194e-08, + "logits/chosen": -3.065835475921631, + "logits/rejected": -3.0349960327148438, + "logps/chosen": -120.6701431274414, + "logps/rejected": -421.3600769042969, + "loss": 0.3043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4089409112930298, + "rewards/margins": 1.995359182357788, + "rewards/rejected": -1.5864181518554688, + "step": 286 + }, + { + "epoch": 0.54, + "learning_rate": 4.6508808665586256e-08, + "logits/chosen": -3.0867981910705566, + "logits/rejected": -3.0887699127197266, + "logps/chosen": -111.971923828125, + "logps/rejected": -398.8553771972656, + "loss": 0.2999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3739185333251953, + "rewards/margins": 1.7472660541534424, + "rewards/rejected": -1.373347520828247, + "step": 287 + }, + { + "epoch": 0.54, + "learning_rate": 4.620578745374633e-08, + "logits/chosen": -3.086595058441162, + "logits/rejected": -3.1038453578948975, + "logps/chosen": -182.2504119873047, + "logps/rejected": -548.0050048828125, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31694871187210083, + "rewards/margins": 2.0455727577209473, + "rewards/rejected": -1.7286241054534912, + "step": 288 + }, + { + "epoch": 0.54, + "learning_rate": 4.5902906342178475e-08, + "logits/chosen": -3.077216148376465, + "logits/rejected": -3.099134922027588, + "logps/chosen": -155.15484619140625, + "logps/rejected": -502.4404602050781, + "loss": 0.2799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4478713870048523, + "rewards/margins": 2.4776382446289062, + "rewards/rejected": -2.029766798019409, + "step": 289 + }, + { + "epoch": 0.54, + "learning_rate": 4.560017651468579e-08, + "logits/chosen": -3.2362356185913086, + "logits/rejected": -3.2475762367248535, + "logps/chosen": -214.7156982421875, + "logps/rejected": -633.4053955078125, + "loss": 0.2797, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34450453519821167, + "rewards/margins": 2.6663291454315186, + "rewards/rejected": -2.321824550628662, + "step": 290 + }, + { + "epoch": 0.54, + "learning_rate": 4.529760914948529e-08, + "logits/chosen": -3.1012563705444336, + "logits/rejected": -3.132739305496216, + "logps/chosen": -142.009521484375, + "logps/rejected": -329.57861328125, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2767147123813629, + "rewards/margins": 1.1075642108917236, + "rewards/rejected": -0.8308494687080383, + "step": 291 + }, + { + "epoch": 0.55, + "learning_rate": 4.499521541879508e-08, + "logits/chosen": -3.107529640197754, + "logits/rejected": -3.165656328201294, + "logps/chosen": -150.98843383789062, + "logps/rejected": -528.2400512695312, + "loss": 0.2874, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29737550020217896, + "rewards/margins": 2.043156385421753, + "rewards/rejected": -1.7457809448242188, + "step": 292 + }, + { + "epoch": 0.55, + "learning_rate": 4.4693006488421906e-08, + "logits/chosen": -3.0093164443969727, + "logits/rejected": -3.1025853157043457, + "logps/chosen": -111.97055053710938, + "logps/rejected": -308.5641784667969, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24374008178710938, + "rewards/margins": 1.0318657159805298, + "rewards/rejected": -0.7881256341934204, + "step": 293 + }, + { + "epoch": 0.55, + "learning_rate": 4.4390993517348776e-08, + "logits/chosen": -3.1118624210357666, + "logits/rejected": -3.0860602855682373, + "logps/chosen": -136.75927734375, + "logps/rejected": -557.0413818359375, + "loss": 0.2646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4782215356826782, + "rewards/margins": 2.393364429473877, + "rewards/rejected": -1.9151427745819092, + "step": 294 + }, + { + "epoch": 0.55, + "learning_rate": 4.408918765732295e-08, + "logits/chosen": -3.1303415298461914, + "logits/rejected": -3.143685817718506, + "logps/chosen": -166.6866455078125, + "logps/rejected": -425.84423828125, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2988395690917969, + "rewards/margins": 1.8104865550994873, + "rewards/rejected": -1.5116469860076904, + "step": 295 + }, + { + "epoch": 0.55, + "learning_rate": 4.378760005244417e-08, + "logits/chosen": -3.159069299697876, + "logits/rejected": -3.192971706390381, + "logps/chosen": -241.47640991210938, + "logps/rejected": -629.0490112304688, + "loss": 0.2585, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3668166995048523, + "rewards/margins": 2.3931474685668945, + "rewards/rejected": -2.0263307094573975, + "step": 296 + }, + { + "epoch": 0.56, + "learning_rate": 4.348624183875322e-08, + "logits/chosen": -3.14432692527771, + "logits/rejected": -3.119379997253418, + "logps/chosen": -136.78195190429688, + "logps/rejected": -540.6836547851562, + "loss": 0.2776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43768465518951416, + "rewards/margins": 2.3239059448242188, + "rewards/rejected": -1.8862214088439941, + "step": 297 + }, + { + "epoch": 0.56, + "learning_rate": 4.318512414382058e-08, + "logits/chosen": -3.007611036300659, + "logits/rejected": -3.0200417041778564, + "logps/chosen": -127.52251434326172, + "logps/rejected": -326.4508056640625, + "loss": 0.2668, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33633196353912354, + "rewards/margins": 1.4567451477050781, + "rewards/rejected": -1.1204133033752441, + "step": 298 + }, + { + "epoch": 0.56, + "learning_rate": 4.2884258086335744e-08, + "logits/chosen": -3.1963870525360107, + "logits/rejected": -3.135042190551758, + "logps/chosen": -156.1092529296875, + "logps/rejected": -526.9775390625, + "loss": 0.2695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37228697538375854, + "rewards/margins": 2.2330994606018066, + "rewards/rejected": -1.8608124256134033, + "step": 299 + }, + { + "epoch": 0.56, + "learning_rate": 4.2583654775696476e-08, + "logits/chosen": -3.0719974040985107, + "logits/rejected": -3.076620578765869, + "logps/chosen": -157.7698211669922, + "logps/rejected": -592.0501708984375, + "loss": 0.267, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2879444360733032, + "rewards/margins": 2.1758017539978027, + "rewards/rejected": -1.88785719871521, + "step": 300 + }, + { + "epoch": 0.56, + "learning_rate": 4.228332531159871e-08, + "logits/chosen": -3.141658306121826, + "logits/rejected": -3.127849578857422, + "logps/chosen": -124.81776428222656, + "logps/rejected": -406.537353515625, + "loss": 0.2826, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37793388962745667, + "rewards/margins": 1.7100292444229126, + "rewards/rejected": -1.3320953845977783, + "step": 301 + }, + { + "epoch": 0.57, + "learning_rate": 4.198328078362672e-08, + "logits/chosen": -3.141510009765625, + "logits/rejected": -3.1227149963378906, + "logps/chosen": -157.80087280273438, + "logps/rejected": -300.84515380859375, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3651977777481079, + "rewards/margins": 1.0052826404571533, + "rewards/rejected": -0.6400848627090454, + "step": 302 + }, + { + "epoch": 0.57, + "learning_rate": 4.16835322708435e-08, + "logits/chosen": -3.070797920227051, + "logits/rejected": -3.096574306488037, + "logps/chosen": -167.9966583251953, + "logps/rejected": -466.55157470703125, + "loss": 0.3332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3258773982524872, + "rewards/margins": 1.8174011707305908, + "rewards/rejected": -1.4915237426757812, + "step": 303 + }, + { + "epoch": 0.57, + "learning_rate": 4.1384090841381845e-08, + "logits/chosen": -3.0919976234436035, + "logits/rejected": -3.0964956283569336, + "logps/chosen": -120.54764556884766, + "logps/rejected": -350.594970703125, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3287185728549957, + "rewards/margins": 1.1591179370880127, + "rewards/rejected": -0.8303993940353394, + "step": 304 + }, + { + "epoch": 0.57, + "learning_rate": 4.1084967552035525e-08, + "logits/chosen": -3.0920803546905518, + "logits/rejected": -3.074899196624756, + "logps/chosen": -166.88992309570312, + "logps/rejected": -427.1831359863281, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3733772337436676, + "rewards/margins": 1.8631799221038818, + "rewards/rejected": -1.489802598953247, + "step": 305 + }, + { + "epoch": 0.57, + "learning_rate": 4.078617344785112e-08, + "logits/chosen": -3.1463379859924316, + "logits/rejected": -3.1409435272216797, + "logps/chosen": -113.36784362792969, + "logps/rejected": -291.0270080566406, + "loss": 0.3327, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3236335813999176, + "rewards/margins": 1.1518776416778564, + "rewards/rejected": -0.8282440304756165, + "step": 306 + }, + { + "epoch": 0.57, + "learning_rate": 4.0487719561720095e-08, + "logits/chosen": -3.152998447418213, + "logits/rejected": -3.1165175437927246, + "logps/chosen": -135.85060119628906, + "logps/rejected": -382.01947021484375, + "loss": 0.3042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26115304231643677, + "rewards/margins": 1.7551641464233398, + "rewards/rejected": -1.4940109252929688, + "step": 307 + }, + { + "epoch": 0.58, + "learning_rate": 4.018961691397148e-08, + "logits/chosen": -3.126307725906372, + "logits/rejected": -3.079291343688965, + "logps/chosen": -129.47732543945312, + "logps/rejected": -423.0074462890625, + "loss": 0.3072, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3671848177909851, + "rewards/margins": 1.8369297981262207, + "rewards/rejected": -1.4697449207305908, + "step": 308 + }, + { + "epoch": 0.58, + "learning_rate": 3.989187651196493e-08, + "logits/chosen": -2.9771859645843506, + "logits/rejected": -3.0386135578155518, + "logps/chosen": -267.8958740234375, + "logps/rejected": -448.9704284667969, + "loss": 0.3541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3613433837890625, + "rewards/margins": 1.7063019275665283, + "rewards/rejected": -1.3449585437774658, + "step": 309 + }, + { + "epoch": 0.58, + "learning_rate": 3.9594509349684215e-08, + "logits/chosen": -3.0865063667297363, + "logits/rejected": -3.0711920261383057, + "logps/chosen": -110.83908081054688, + "logps/rejected": -369.58984375, + "loss": 0.2884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.281585693359375, + "rewards/margins": 1.3742554187774658, + "rewards/rejected": -1.0926697254180908, + "step": 310 + }, + { + "epoch": 0.58, + "learning_rate": 3.929752640733141e-08, + "logits/chosen": -3.0519018173217773, + "logits/rejected": -3.068375587463379, + "logps/chosen": -144.81948852539062, + "logps/rejected": -437.0885314941406, + "loss": 0.2728, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27022668719291687, + "rewards/margins": 1.7852840423583984, + "rewards/rejected": -1.5150573253631592, + "step": 311 + }, + { + "epoch": 0.58, + "learning_rate": 3.9000938650921334e-08, + "logits/chosen": -3.1260383129119873, + "logits/rejected": -3.113619804382324, + "logps/chosen": -153.72769165039062, + "logps/rejected": -692.68359375, + "loss": 0.2343, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2876613736152649, + "rewards/margins": 2.7695629596710205, + "rewards/rejected": -2.4819016456604004, + "step": 312 + }, + { + "epoch": 0.59, + "learning_rate": 3.870475703187666e-08, + "logits/chosen": -3.0748982429504395, + "logits/rejected": -3.106316089630127, + "logps/chosen": -133.30209350585938, + "logps/rejected": -433.48455810546875, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37905237078666687, + "rewards/margins": 1.7875652313232422, + "rewards/rejected": -1.4085129499435425, + "step": 313 + }, + { + "epoch": 0.59, + "learning_rate": 3.840899248662358e-08, + "logits/chosen": -3.109528064727783, + "logits/rejected": -3.099381446838379, + "logps/chosen": -145.57244873046875, + "logps/rejected": -456.9603271484375, + "loss": 0.3064, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32478219270706177, + "rewards/margins": 2.031381607055664, + "rewards/rejected": -1.706599473953247, + "step": 314 + }, + { + "epoch": 0.59, + "learning_rate": 3.811365593618794e-08, + "logits/chosen": -3.1012463569641113, + "logits/rejected": -3.067675828933716, + "logps/chosen": -127.20050811767578, + "logps/rejected": -618.2714233398438, + "loss": 0.2628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.41283950209617615, + "rewards/margins": 2.196068525314331, + "rewards/rejected": -1.783229112625122, + "step": 315 + }, + { + "epoch": 0.59, + "learning_rate": 3.781875828579195e-08, + "logits/chosen": -3.0498905181884766, + "logits/rejected": -3.0661139488220215, + "logps/chosen": -257.14111328125, + "logps/rejected": -416.06488037109375, + "loss": 0.3152, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21381568908691406, + "rewards/margins": 1.5357739925384521, + "rewards/rejected": -1.3219581842422485, + "step": 316 + }, + { + "epoch": 0.59, + "learning_rate": 3.752431042445163e-08, + "logits/chosen": -3.112623691558838, + "logits/rejected": -3.110421657562256, + "logps/chosen": -163.0890655517578, + "logps/rejected": -736.715087890625, + "loss": 0.2529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4002388119697571, + "rewards/margins": 2.891132354736328, + "rewards/rejected": -2.490893602371216, + "step": 317 + }, + { + "epoch": 0.6, + "learning_rate": 3.723032322457458e-08, + "logits/chosen": -3.094346523284912, + "logits/rejected": -3.143925666809082, + "logps/chosen": -160.94383239746094, + "logps/rejected": -326.3375244140625, + "loss": 0.351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2834590971469879, + "rewards/margins": 1.6170940399169922, + "rewards/rejected": -1.3336349725723267, + "step": 318 + }, + { + "epoch": 0.6, + "learning_rate": 3.693680754155867e-08, + "logits/chosen": -3.1293728351593018, + "logits/rejected": -3.123725652694702, + "logps/chosen": -133.1419219970703, + "logps/rejected": -359.1884765625, + "loss": 0.3326, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27192991971969604, + "rewards/margins": 1.555908203125, + "rewards/rejected": -1.2839782238006592, + "step": 319 + }, + { + "epoch": 0.6, + "learning_rate": 3.664377421339111e-08, + "logits/chosen": -3.142427444458008, + "logits/rejected": -3.126152515411377, + "logps/chosen": -151.82601928710938, + "logps/rejected": -490.8179931640625, + "loss": 0.3195, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30840301513671875, + "rewards/margins": 1.9179412126541138, + "rewards/rejected": -1.6095383167266846, + "step": 320 + }, + { + "epoch": 0.6, + "learning_rate": 3.635123406024828e-08, + "logits/chosen": -3.085240364074707, + "logits/rejected": -3.126145839691162, + "logps/chosen": -123.25518035888672, + "logps/rejected": -455.4437255859375, + "loss": 0.2985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31051141023635864, + "rewards/margins": 1.7566677331924438, + "rewards/rejected": -1.4461562633514404, + "step": 321 + }, + { + "epoch": 0.6, + "learning_rate": 3.60591978840962e-08, + "logits/chosen": -3.1054625511169434, + "logits/rejected": -3.148219585418701, + "logps/chosen": -166.43142700195312, + "logps/rejected": -589.02001953125, + "loss": 0.2883, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38885003328323364, + "rewards/margins": 2.1642894744873047, + "rewards/rejected": -1.7754395008087158, + "step": 322 + }, + { + "epoch": 0.6, + "learning_rate": 3.576767646829171e-08, + "logits/chosen": -3.1302411556243896, + "logits/rejected": -3.170506715774536, + "logps/chosen": -139.3777618408203, + "logps/rejected": -386.95025634765625, + "loss": 0.3288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31579893827438354, + "rewards/margins": 1.707405924797058, + "rewards/rejected": -1.3916069269180298, + "step": 323 + }, + { + "epoch": 0.61, + "learning_rate": 3.54766805771842e-08, + "logits/chosen": -3.097529411315918, + "logits/rejected": -3.094223976135254, + "logps/chosen": -133.59815979003906, + "logps/rejected": -521.531982421875, + "loss": 0.2774, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35218048095703125, + "rewards/margins": 2.1364150047302246, + "rewards/rejected": -1.784234642982483, + "step": 324 + }, + { + "epoch": 0.61, + "learning_rate": 3.5186220955718305e-08, + "logits/chosen": -3.111475944519043, + "logits/rejected": -3.1503591537475586, + "logps/chosen": -133.32400512695312, + "logps/rejected": -607.0511474609375, + "loss": 0.2663, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39912569522857666, + "rewards/margins": 2.474047899246216, + "rewards/rejected": -2.0749220848083496, + "step": 325 + }, + { + "epoch": 0.61, + "learning_rate": 3.489630832903694e-08, + "logits/chosen": -3.0591862201690674, + "logits/rejected": -3.117448329925537, + "logps/chosen": -94.47006225585938, + "logps/rejected": -354.4953308105469, + "loss": 0.2984, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27860796451568604, + "rewards/margins": 1.3594062328338623, + "rewards/rejected": -1.0807983875274658, + "step": 326 + }, + { + "epoch": 0.61, + "learning_rate": 3.4606953402085456e-08, + "logits/chosen": -3.057518482208252, + "logits/rejected": -3.0894055366516113, + "logps/chosen": -141.89505004882812, + "logps/rejected": -334.34820556640625, + "loss": 0.3133, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23064956068992615, + "rewards/margins": 1.1582603454589844, + "rewards/rejected": -0.9276108145713806, + "step": 327 + }, + { + "epoch": 0.61, + "learning_rate": 3.431816685921625e-08, + "logits/chosen": -3.044571876525879, + "logits/rejected": -3.1159849166870117, + "logps/chosen": -142.7990264892578, + "logps/rejected": -432.683349609375, + "loss": 0.2906, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22432327270507812, + "rewards/margins": 1.6906899213790894, + "rewards/rejected": -1.4663665294647217, + "step": 328 + }, + { + "epoch": 0.62, + "learning_rate": 3.402995936379432e-08, + "logits/chosen": -3.0206727981567383, + "logits/rejected": -3.088066577911377, + "logps/chosen": -189.55052185058594, + "logps/rejected": -542.0484008789062, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4403427243232727, + "rewards/margins": 2.2631821632385254, + "rewards/rejected": -1.8228394985198975, + "step": 329 + }, + { + "epoch": 0.62, + "learning_rate": 3.37423415578035e-08, + "logits/chosen": -3.111461639404297, + "logits/rejected": -3.068295955657959, + "logps/chosen": -138.20352172851562, + "logps/rejected": -446.2125244140625, + "loss": 0.2745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4103015959262848, + "rewards/margins": 1.7792088985443115, + "rewards/rejected": -1.3689072132110596, + "step": 330 + }, + { + "epoch": 0.62, + "learning_rate": 3.3455324061453446e-08, + "logits/chosen": -3.02555513381958, + "logits/rejected": -3.0285282135009766, + "logps/chosen": -130.98594665527344, + "logps/rejected": -405.218994140625, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25306016206741333, + "rewards/margins": 1.4888496398925781, + "rewards/rejected": -1.2357895374298096, + "step": 331 + }, + { + "epoch": 0.62, + "learning_rate": 3.3168917472787607e-08, + "logits/chosen": -3.0018935203552246, + "logits/rejected": -3.034168243408203, + "logps/chosen": -132.255859375, + "logps/rejected": -364.123779296875, + "loss": 0.2957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3596011996269226, + "rewards/margins": 1.5057239532470703, + "rewards/rejected": -1.146122694015503, + "step": 332 + }, + { + "epoch": 0.62, + "learning_rate": 3.288313236729182e-08, + "logits/chosen": -3.1971487998962402, + "logits/rejected": -3.209728240966797, + "logps/chosen": -148.8447265625, + "logps/rejected": -767.186767578125, + "loss": 0.2711, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5525932312011719, + "rewards/margins": 3.5183310508728027, + "rewards/rejected": -2.965737819671631, + "step": 333 + }, + { + "epoch": 0.63, + "learning_rate": 3.2597979297503774e-08, + "logits/chosen": -3.1143887042999268, + "logits/rejected": -3.1440987586975098, + "logps/chosen": -127.67144775390625, + "logps/rejected": -357.3248596191406, + "loss": 0.2486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.304190456867218, + "rewards/margins": 1.579735279083252, + "rewards/rejected": -1.2755447626113892, + "step": 334 + }, + { + "epoch": 0.63, + "learning_rate": 3.231346879262349e-08, + "logits/chosen": -3.1071534156799316, + "logits/rejected": -3.0805914402008057, + "logps/chosen": -164.93333435058594, + "logps/rejected": -426.99139404296875, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3278030455112457, + "rewards/margins": 1.5310349464416504, + "rewards/rejected": -1.2032318115234375, + "step": 335 + }, + { + "epoch": 0.63, + "learning_rate": 3.202961135812437e-08, + "logits/chosen": -3.1000747680664062, + "logits/rejected": -3.132654905319214, + "logps/chosen": -107.73587799072266, + "logps/rejected": -326.4158020019531, + "loss": 0.2897, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22457389533519745, + "rewards/margins": 1.2480754852294922, + "rewards/rejected": -1.0235016345977783, + "step": 336 + }, + { + "epoch": 0.63, + "learning_rate": 3.17464174753654e-08, + "logits/chosen": -3.1293587684631348, + "logits/rejected": -3.108884334564209, + "logps/chosen": -143.72288513183594, + "logps/rejected": -324.8512268066406, + "loss": 0.351, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3112194240093231, + "rewards/margins": 1.4337079524993896, + "rewards/rejected": -1.1224884986877441, + "step": 337 + }, + { + "epoch": 0.63, + "learning_rate": 3.146389760120416e-08, + "logits/chosen": -3.069016456604004, + "logits/rejected": -3.015789031982422, + "logps/chosen": -136.106201171875, + "logps/rejected": -433.40643310546875, + "loss": 0.2808, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2833496034145355, + "rewards/margins": 1.6642532348632812, + "rewards/rejected": -1.380903720855713, + "step": 338 + }, + { + "epoch": 0.63, + "learning_rate": 3.118206216761053e-08, + "logits/chosen": -3.11421275138855, + "logits/rejected": -3.112640619277954, + "logps/chosen": -141.84564208984375, + "logps/rejected": -325.4703369140625, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3765144348144531, + "rewards/margins": 1.346956729888916, + "rewards/rejected": -0.9704421758651733, + "step": 339 + }, + { + "epoch": 0.64, + "learning_rate": 3.090092158128172e-08, + "logits/chosen": -3.1309289932250977, + "logits/rejected": -3.190002918243408, + "logps/chosen": -138.05380249023438, + "logps/rejected": -534.9027099609375, + "loss": 0.2944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3893478512763977, + "rewards/margins": 2.1573503017425537, + "rewards/rejected": -1.7680023908615112, + "step": 340 + }, + { + "epoch": 0.64, + "learning_rate": 3.062048622325779e-08, + "logits/chosen": -3.102811098098755, + "logits/rejected": -3.0490663051605225, + "logps/chosen": -134.02145385742188, + "logps/rejected": -180.5104217529297, + "loss": 0.38, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3366047143936157, + "rewards/margins": 0.796799898147583, + "rewards/rejected": -0.4601951837539673, + "step": 341 + }, + { + "epoch": 0.64, + "learning_rate": 3.034076644853853e-08, + "logits/chosen": -3.085814952850342, + "logits/rejected": -3.1143224239349365, + "logps/chosen": -107.2327880859375, + "logps/rejected": -529.5917358398438, + "loss": 0.2405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38645631074905396, + "rewards/margins": 2.2832765579223633, + "rewards/rejected": -1.896820068359375, + "step": 342 + }, + { + "epoch": 0.64, + "learning_rate": 3.006177258570095e-08, + "logits/chosen": -3.15238618850708, + "logits/rejected": -3.1481218338012695, + "logps/chosen": -127.70743560791016, + "logps/rejected": -432.03558349609375, + "loss": 0.2631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3999183773994446, + "rewards/margins": 1.7266883850097656, + "rewards/rejected": -1.3267700672149658, + "step": 343 + }, + { + "epoch": 0.64, + "learning_rate": 2.9783514936517962e-08, + "logits/chosen": -3.0572397708892822, + "logits/rejected": -3.0116705894470215, + "logps/chosen": -104.40251922607422, + "logps/rejected": -357.87738037109375, + "loss": 0.319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31711769104003906, + "rewards/margins": 1.5457004308700562, + "rewards/rejected": -1.228582739830017, + "step": 344 + }, + { + "epoch": 0.65, + "learning_rate": 2.9506003775578036e-08, + "logits/chosen": -3.1686019897460938, + "logits/rejected": -3.1188430786132812, + "logps/chosen": -146.61312866210938, + "logps/rejected": -462.2884826660156, + "loss": 0.307, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20851249992847443, + "rewards/margins": 2.1203885078430176, + "rewards/rejected": -1.9118759632110596, + "step": 345 + }, + { + "epoch": 0.65, + "learning_rate": 2.9229249349905684e-08, + "logits/chosen": -3.0801050662994385, + "logits/rejected": -3.08949613571167, + "logps/chosen": -101.32269287109375, + "logps/rejected": -412.45526123046875, + "loss": 0.3284, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3059654235839844, + "rewards/margins": 1.436055064201355, + "rewards/rejected": -1.1300896406173706, + "step": 346 + }, + { + "epoch": 0.65, + "learning_rate": 2.895326187858326e-08, + "logits/chosen": -3.0746002197265625, + "logits/rejected": -3.1126604080200195, + "logps/chosen": -104.44927978515625, + "logps/rejected": -506.0067443847656, + "loss": 0.3109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25881537795066833, + "rewards/margins": 2.203798294067383, + "rewards/rejected": -1.944982886314392, + "step": 347 + }, + { + "epoch": 0.65, + "learning_rate": 2.8678051552373482e-08, + "logits/chosen": -3.067943811416626, + "logits/rejected": -2.3507907390594482, + "logps/chosen": -147.61257934570312, + "logps/rejected": -3601.191162109375, + "loss": 0.2896, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46302032470703125, + "rewards/margins": 3.7257614135742188, + "rewards/rejected": -3.2627413272857666, + "step": 348 + }, + { + "epoch": 0.65, + "learning_rate": 2.8403628533343204e-08, + "logits/chosen": -3.1058406829833984, + "logits/rejected": -3.086815118789673, + "logps/chosen": -148.5101776123047, + "logps/rejected": -393.81439208984375, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3654533624649048, + "rewards/margins": 1.5870094299316406, + "rewards/rejected": -1.2215560674667358, + "step": 349 + }, + { + "epoch": 0.66, + "learning_rate": 2.813000295448818e-08, + "logits/chosen": -3.1138906478881836, + "logits/rejected": -3.084352731704712, + "logps/chosen": -114.08784484863281, + "logps/rejected": -339.5931396484375, + "loss": 0.3524, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36037978529930115, + "rewards/margins": 1.5681970119476318, + "rewards/rejected": -1.2078170776367188, + "step": 350 + }, + { + "epoch": 0.66, + "learning_rate": 2.7857184919358935e-08, + "logits/chosen": -3.098766326904297, + "logits/rejected": -3.0938944816589355, + "logps/chosen": -116.51506042480469, + "logps/rejected": -433.6838073730469, + "loss": 0.2723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32613909244537354, + "rewards/margins": 1.7953011989593506, + "rewards/rejected": -1.469162106513977, + "step": 351 + }, + { + "epoch": 0.66, + "learning_rate": 2.7585184501687574e-08, + "logits/chosen": -3.071211814880371, + "logits/rejected": -3.0773282051086426, + "logps/chosen": -131.0990753173828, + "logps/rejected": -592.967041015625, + "loss": 0.2621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3612903654575348, + "rewards/margins": 2.150831937789917, + "rewards/rejected": -1.7895416021347046, + "step": 352 + }, + { + "epoch": 0.66, + "learning_rate": 2.7314011745016007e-08, + "logits/chosen": -3.096407651901245, + "logits/rejected": -3.1043004989624023, + "logps/chosen": -166.22317504882812, + "logps/rejected": -489.3037109375, + "loss": 0.3283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3774646818637848, + "rewards/margins": 2.2831180095672607, + "rewards/rejected": -1.905653476715088, + "step": 353 + }, + { + "epoch": 0.66, + "learning_rate": 2.7043676662324876e-08, + "logits/chosen": -3.1355104446411133, + "logits/rejected": -3.136726140975952, + "logps/chosen": -161.3994140625, + "logps/rejected": -533.9793701171875, + "loss": 0.2793, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5485397577285767, + "rewards/margins": 2.2653017044067383, + "rewards/rejected": -1.716761827468872, + "step": 354 + }, + { + "epoch": 0.66, + "learning_rate": 2.6774189235664024e-08, + "logits/chosen": -3.1176629066467285, + "logits/rejected": -3.165982246398926, + "logps/chosen": -105.63034057617188, + "logps/rejected": -394.940673828125, + "loss": 0.3009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19185028970241547, + "rewards/margins": 1.5832566022872925, + "rewards/rejected": -1.3914062976837158, + "step": 355 + }, + { + "epoch": 0.67, + "learning_rate": 2.6505559415783808e-08, + "logits/chosen": -3.044860363006592, + "logits/rejected": -3.0619583129882812, + "logps/chosen": -113.00593566894531, + "logps/rejected": -330.182373046875, + "loss": 0.305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3593631982803345, + "rewards/margins": 1.4460132122039795, + "rewards/rejected": -1.0866501331329346, + "step": 356 + }, + { + "epoch": 0.67, + "learning_rate": 2.623779712176763e-08, + "logits/chosen": -3.1908016204833984, + "logits/rejected": -3.1175894737243652, + "logps/chosen": -119.73859405517578, + "logps/rejected": -330.26678466796875, + "loss": 0.3036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3159317076206207, + "rewards/margins": 1.3042106628417969, + "rewards/rejected": -0.9882789850234985, + "step": 357 + }, + { + "epoch": 0.67, + "learning_rate": 2.597091224066581e-08, + "logits/chosen": -3.106127977371216, + "logits/rejected": -3.151705265045166, + "logps/chosen": -161.91488647460938, + "logps/rejected": -712.906494140625, + "loss": 0.263, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40092697739601135, + "rewards/margins": 3.129105567932129, + "rewards/rejected": -2.7281785011291504, + "step": 358 + }, + { + "epoch": 0.67, + "learning_rate": 2.570491462713037e-08, + "logits/chosen": -3.1398086547851562, + "logits/rejected": -3.186769962310791, + "logps/chosen": -129.4798583984375, + "logps/rejected": -624.8782958984375, + "loss": 0.2843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28961944580078125, + "rewards/margins": 2.6277496814727783, + "rewards/rejected": -2.338130235671997, + "step": 359 + }, + { + "epoch": 0.67, + "learning_rate": 2.5439814103051284e-08, + "logits/chosen": -3.1648426055908203, + "logits/rejected": -3.208171844482422, + "logps/chosen": -146.4864501953125, + "logps/rejected": -672.4420166015625, + "loss": 0.3122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3838653564453125, + "rewards/margins": 2.7441725730895996, + "rewards/rejected": -2.360307455062866, + "step": 360 + }, + { + "epoch": 0.68, + "learning_rate": 2.5175620457193668e-08, + "logits/chosen": -3.150193691253662, + "logits/rejected": -3.1575424671173096, + "logps/chosen": -143.0070037841797, + "logps/rejected": -563.7274169921875, + "loss": 0.2712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4008724093437195, + "rewards/margins": 2.5044827461242676, + "rewards/rejected": -2.1036102771759033, + "step": 361 + }, + { + "epoch": 0.68, + "learning_rate": 2.4912343444836444e-08, + "logits/chosen": -3.09834623336792, + "logits/rejected": -3.1568188667297363, + "logps/chosen": -144.7668914794922, + "logps/rejected": -530.997314453125, + "loss": 0.2298, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4400436282157898, + "rewards/margins": 2.211930751800537, + "rewards/rejected": -1.7718873023986816, + "step": 362 + }, + { + "epoch": 0.68, + "learning_rate": 2.4649992787412098e-08, + "logits/chosen": -3.1167473793029785, + "logits/rejected": -3.161900043487549, + "logps/chosen": -111.04676818847656, + "logps/rejected": -374.1767578125, + "loss": 0.3169, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2769203186035156, + "rewards/margins": 1.542393445968628, + "rewards/rejected": -1.2654732465744019, + "step": 363 + }, + { + "epoch": 0.68, + "learning_rate": 2.4388578172147673e-08, + "logits/chosen": -3.1623148918151855, + "logits/rejected": -3.1564393043518066, + "logps/chosen": -155.0773468017578, + "logps/rejected": -505.82110595703125, + "loss": 0.2754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42314374446868896, + "rewards/margins": 2.14144229888916, + "rewards/rejected": -1.7182984352111816, + "step": 364 + }, + { + "epoch": 0.68, + "learning_rate": 2.412810925170715e-08, + "logits/chosen": -3.096832275390625, + "logits/rejected": -3.1173343658447266, + "logps/chosen": -108.1762924194336, + "logps/rejected": -393.6318359375, + "loss": 0.2632, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26522523164749146, + "rewards/margins": 1.5809326171875, + "rewards/rejected": -1.3157074451446533, + "step": 365 + }, + { + "epoch": 0.69, + "learning_rate": 2.3868595643834994e-08, + "logits/chosen": -3.1708765029907227, + "logits/rejected": -3.2060980796813965, + "logps/chosen": -133.814697265625, + "logps/rejected": -458.2724304199219, + "loss": 0.2553, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28676605224609375, + "rewards/margins": 1.9162216186523438, + "rewards/rejected": -1.62945556640625, + "step": 366 + }, + { + "epoch": 0.69, + "learning_rate": 2.3610046931000937e-08, + "logits/chosen": -3.129655361175537, + "logits/rejected": -3.1614084243774414, + "logps/chosen": -146.2301483154297, + "logps/rejected": -637.7322998046875, + "loss": 0.2669, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.279440313577652, + "rewards/margins": 2.79994535446167, + "rewards/rejected": -2.520504951477051, + "step": 367 + }, + { + "epoch": 0.69, + "learning_rate": 2.335247266004629e-08, + "logits/chosen": -3.065152883529663, + "logits/rejected": -3.153393268585205, + "logps/chosen": -173.62066650390625, + "logps/rejected": -629.9893798828125, + "loss": 0.2927, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.36286085844039917, + "rewards/margins": 2.415560245513916, + "rewards/rejected": -2.052699327468872, + "step": 368 + }, + { + "epoch": 0.69, + "learning_rate": 2.3095882341831368e-08, + "logits/chosen": -3.176990032196045, + "logits/rejected": -3.1137919425964355, + "logps/chosen": -108.6143569946289, + "logps/rejected": -909.0311279296875, + "loss": 0.2423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4092109799385071, + "rewards/margins": 3.5182595252990723, + "rewards/rejected": -3.10904860496521, + "step": 369 + }, + { + "epoch": 0.69, + "learning_rate": 2.2840285450884227e-08, + "logits/chosen": -3.0224106311798096, + "logits/rejected": -3.119359016418457, + "logps/chosen": -161.9175567626953, + "logps/rejected": -457.40826416015625, + "loss": 0.2876, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.398917019367218, + "rewards/margins": 1.9013782739639282, + "rewards/rejected": -1.5024611949920654, + "step": 370 + }, + { + "epoch": 0.69, + "learning_rate": 2.258569142505098e-08, + "logits/chosen": -3.112186908721924, + "logits/rejected": -3.0932083129882812, + "logps/chosen": -116.48884582519531, + "logps/rejected": -387.03228759765625, + "loss": 0.2631, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27780836820602417, + "rewards/margins": 1.777605414390564, + "rewards/rejected": -1.4997971057891846, + "step": 371 + }, + { + "epoch": 0.7, + "learning_rate": 2.2332109665147127e-08, + "logits/chosen": -3.149240016937256, + "logits/rejected": -3.15667724609375, + "logps/chosen": -109.81011199951172, + "logps/rejected": -404.15631103515625, + "loss": 0.2742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32662659883499146, + "rewards/margins": 2.052114963531494, + "rewards/rejected": -1.725488305091858, + "step": 372 + }, + { + "epoch": 0.7, + "learning_rate": 2.2079549534610604e-08, + "logits/chosen": -3.13974666595459, + "logits/rejected": -3.1294403076171875, + "logps/chosen": -159.88824462890625, + "logps/rejected": -358.5498962402344, + "loss": 0.2601, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.484719455242157, + "rewards/margins": 1.7948100566864014, + "rewards/rejected": -1.3100906610488892, + "step": 373 + }, + { + "epoch": 0.7, + "learning_rate": 2.18280203591559e-08, + "logits/chosen": -3.091153621673584, + "logits/rejected": -3.0826334953308105, + "logps/chosen": -170.12342834472656, + "logps/rejected": -482.33221435546875, + "loss": 0.2544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4922653138637543, + "rewards/margins": 2.3567521572113037, + "rewards/rejected": -1.8644866943359375, + "step": 374 + }, + { + "epoch": 0.7, + "learning_rate": 2.157753142642978e-08, + "logits/chosen": -3.090737819671631, + "logits/rejected": -3.080275058746338, + "logps/chosen": -143.02926635742188, + "logps/rejected": -578.539306640625, + "loss": 0.2756, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3680889308452606, + "rewards/margins": 2.244288206100464, + "rewards/rejected": -1.8761993646621704, + "step": 375 + }, + { + "epoch": 0.7, + "learning_rate": 2.1328091985668368e-08, + "logits/chosen": -3.045722007751465, + "logits/rejected": -3.0831446647644043, + "logps/chosen": -87.09675598144531, + "logps/rejected": -358.1884460449219, + "loss": 0.332, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29774782061576843, + "rewards/margins": 1.5583558082580566, + "rewards/rejected": -1.2606079578399658, + "step": 376 + }, + { + "epoch": 0.71, + "learning_rate": 2.1079711247355503e-08, + "logits/chosen": -3.1860451698303223, + "logits/rejected": -3.2153635025024414, + "logps/chosen": -131.82073974609375, + "logps/rejected": -551.62451171875, + "loss": 0.2967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2849006652832031, + "rewards/margins": 2.6610755920410156, + "rewards/rejected": -2.3761749267578125, + "step": 377 + }, + { + "epoch": 0.71, + "learning_rate": 2.08323983828828e-08, + "logits/chosen": -3.127406597137451, + "logits/rejected": -3.13299560546875, + "logps/chosen": -132.6533966064453, + "logps/rejected": -280.98675537109375, + "loss": 0.3153, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35607606172561646, + "rewards/margins": 1.2759689092636108, + "rewards/rejected": -0.9198929071426392, + "step": 378 + }, + { + "epoch": 0.71, + "learning_rate": 2.0586162524210892e-08, + "logits/chosen": -3.170927047729492, + "logits/rejected": -3.1267738342285156, + "logps/chosen": -197.99082946777344, + "logps/rejected": -700.997802734375, + "loss": 0.2732, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.47253191471099854, + "rewards/margins": 2.9898629188537598, + "rewards/rejected": -2.5173308849334717, + "step": 379 + }, + { + "epoch": 0.71, + "learning_rate": 2.0341012763532238e-08, + "logits/chosen": -3.1011343002319336, + "logits/rejected": -3.1029090881347656, + "logps/chosen": -227.5536346435547, + "logps/rejected": -449.95245361328125, + "loss": 0.3233, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31461334228515625, + "rewards/margins": 1.553704857826233, + "rewards/rejected": -1.2390915155410767, + "step": 380 + }, + { + "epoch": 0.71, + "learning_rate": 2.009695815293548e-08, + "logits/chosen": -3.1453046798706055, + "logits/rejected": -3.1707072257995605, + "logps/chosen": -193.2315216064453, + "logps/rejected": -489.81884765625, + "loss": 0.3013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1881641447544098, + "rewards/margins": 2.1617488861083984, + "rewards/rejected": -1.973584771156311, + "step": 381 + }, + { + "epoch": 0.72, + "learning_rate": 1.9854007704071063e-08, + "logits/chosen": -3.0450124740600586, + "logits/rejected": -3.05539870262146, + "logps/chosen": -119.88188171386719, + "logps/rejected": -385.13140869140625, + "loss": 0.3186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2997550964355469, + "rewards/margins": 1.4744468927383423, + "rewards/rejected": -1.1746917963027954, + "step": 382 + }, + { + "epoch": 0.72, + "learning_rate": 1.9612170387818626e-08, + "logits/chosen": -3.084226131439209, + "logits/rejected": -3.1168246269226074, + "logps/chosen": -144.79074096679688, + "logps/rejected": -562.914306640625, + "loss": 0.2675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44296950101852417, + "rewards/margins": 2.2614877223968506, + "rewards/rejected": -1.8185181617736816, + "step": 383 + }, + { + "epoch": 0.72, + "learning_rate": 1.937145513395567e-08, + "logits/chosen": -3.1258037090301514, + "logits/rejected": -3.120716094970703, + "logps/chosen": -150.01612854003906, + "logps/rejected": -471.2449951171875, + "loss": 0.2822, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3854484558105469, + "rewards/margins": 2.3308799266815186, + "rewards/rejected": -1.9454315900802612, + "step": 384 + }, + { + "epoch": 0.72, + "learning_rate": 1.9131870830827818e-08, + "logits/chosen": -3.104055404663086, + "logits/rejected": -3.1400156021118164, + "logps/chosen": -172.00579833984375, + "logps/rejected": -513.1275634765625, + "loss": 0.2584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43507957458496094, + "rewards/margins": 2.176668882369995, + "rewards/rejected": -1.7415893077850342, + "step": 385 + }, + { + "epoch": 0.72, + "learning_rate": 1.8893426325020683e-08, + "logits/chosen": -3.089111804962158, + "logits/rejected": -3.072500705718994, + "logps/chosen": -158.8029022216797, + "logps/rejected": -426.216552734375, + "loss": 0.3248, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2857986390590668, + "rewards/margins": 1.8915762901306152, + "rewards/rejected": -1.6057777404785156, + "step": 386 + }, + { + "epoch": 0.72, + "learning_rate": 1.8656130421033122e-08, + "logits/chosen": -3.0841689109802246, + "logits/rejected": -3.088803291320801, + "logps/chosen": -176.09494018554688, + "logps/rejected": -558.6500244140625, + "loss": 0.291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3209190368652344, + "rewards/margins": 2.2195000648498535, + "rewards/rejected": -1.8985810279846191, + "step": 387 + }, + { + "epoch": 0.73, + "learning_rate": 1.8419991880952242e-08, + "logits/chosen": -3.1341958045959473, + "logits/rejected": -3.1672422885894775, + "logps/chosen": -161.1513214111328, + "logps/rejected": -593.7029418945312, + "loss": 0.2483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5254581570625305, + "rewards/margins": 2.8782505989074707, + "rewards/rejected": -2.352792501449585, + "step": 388 + }, + { + "epoch": 0.73, + "learning_rate": 1.818501942412975e-08, + "logits/chosen": -3.0791845321655273, + "logits/rejected": -3.123415470123291, + "logps/chosen": -138.32659912109375, + "logps/rejected": -491.5925598144531, + "loss": 0.2779, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3796825408935547, + "rewards/margins": 1.9583507776260376, + "rewards/rejected": -1.5786683559417725, + "step": 389 + }, + { + "epoch": 0.73, + "learning_rate": 1.7951221726860043e-08, + "logits/chosen": -3.07523250579834, + "logits/rejected": -3.0676212310791016, + "logps/chosen": -173.1561279296875, + "logps/rejected": -324.2310485839844, + "loss": 0.292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32999879121780396, + "rewards/margins": 1.4316681623458862, + "rewards/rejected": -1.1016693115234375, + "step": 390 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718607422059877e-08, + "logits/chosen": -3.129413604736328, + "logits/rejected": -3.1135425567626953, + "logps/chosen": -185.44192504882812, + "logps/rejected": -447.8582458496094, + "loss": 0.2593, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42859041690826416, + "rewards/margins": 1.9368149042129517, + "rewards/rejected": -1.5082244873046875, + "step": 391 + }, + { + "epoch": 0.73, + "learning_rate": 1.7487185098949564e-08, + "logits/chosen": -3.0912985801696777, + "logits/rejected": -3.127685546875, + "logps/chosen": -145.39093017578125, + "logps/rejected": -504.2864990234375, + "loss": 0.268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3501113951206207, + "rewards/margins": 2.1256699562072754, + "rewards/rejected": -1.7755584716796875, + "step": 392 + }, + { + "epoch": 0.74, + "learning_rate": 1.725696330273575e-08, + "logits/chosen": -3.1014673709869385, + "logits/rejected": -3.1411170959472656, + "logps/chosen": -151.068603515625, + "logps/rejected": -484.4219970703125, + "loss": 0.2815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3986312747001648, + "rewards/margins": 1.990953803062439, + "rewards/rejected": -1.592322587966919, + "step": 393 + }, + { + "epoch": 0.74, + "learning_rate": 1.7027950534296028e-08, + "logits/chosen": -3.111743450164795, + "logits/rejected": -3.1094326972961426, + "logps/chosen": -162.5471649169922, + "logps/rejected": -526.38623046875, + "loss": 0.262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2832931578159332, + "rewards/margins": 2.4461870193481445, + "rewards/rejected": -2.162893772125244, + "step": 394 + }, + { + "epoch": 0.74, + "learning_rate": 1.6800155249864894e-08, + "logits/chosen": -3.1444058418273926, + "logits/rejected": -3.115656852722168, + "logps/chosen": -131.77996826171875, + "logps/rejected": -432.8836669921875, + "loss": 0.2541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2882343530654907, + "rewards/margins": 1.7721011638641357, + "rewards/rejected": -1.4838669300079346, + "step": 395 + }, + { + "epoch": 0.74, + "learning_rate": 1.6573585860721645e-08, + "logits/chosen": -3.137995958328247, + "logits/rejected": -3.178687572479248, + "logps/chosen": -161.15487670898438, + "logps/rejected": -677.5031127929688, + "loss": 0.2253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29814377427101135, + "rewards/margins": 2.7254388332366943, + "rewards/rejected": -2.427294969558716, + "step": 396 + }, + { + "epoch": 0.74, + "learning_rate": 1.63482507328797e-08, + "logits/chosen": -3.0616345405578613, + "logits/rejected": -3.09065318107605, + "logps/chosen": -105.16846466064453, + "logps/rejected": -375.74407958984375, + "loss": 0.2992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.321158230304718, + "rewards/margins": 1.7177753448486328, + "rewards/rejected": -1.3966171741485596, + "step": 397 + }, + { + "epoch": 0.75, + "learning_rate": 1.6124158186777677e-08, + "logits/chosen": -3.132322311401367, + "logits/rejected": -3.1201303005218506, + "logps/chosen": -154.7256622314453, + "logps/rejected": -305.17120361328125, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2463279664516449, + "rewards/margins": 1.483716607093811, + "rewards/rejected": -1.2373886108398438, + "step": 398 + }, + { + "epoch": 0.75, + "learning_rate": 1.5901316496972262e-08, + "logits/chosen": -3.1128597259521484, + "logits/rejected": -3.1011788845062256, + "logps/chosen": -133.37852478027344, + "logps/rejected": -581.83154296875, + "loss": 0.2483, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4287639856338501, + "rewards/margins": 2.638979911804199, + "rewards/rejected": -2.2102158069610596, + "step": 399 + }, + { + "epoch": 0.75, + "learning_rate": 1.5679733891832553e-08, + "logits/chosen": -3.101767063140869, + "logits/rejected": -3.0556249618530273, + "logps/chosen": -175.93466186523438, + "logps/rejected": -675.9395751953125, + "loss": 0.2802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32830125093460083, + "rewards/margins": 2.5458688735961914, + "rewards/rejected": -2.2175674438476562, + "step": 400 + }, + { + "epoch": 0.75, + "learning_rate": 1.545941855323634e-08, + "logits/chosen": -2.96448016166687, + "logits/rejected": -2.9490766525268555, + "logps/chosen": -208.43829345703125, + "logps/rejected": -432.34503173828125, + "loss": 0.347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3735359311103821, + "rewards/margins": 1.6279990673065186, + "rewards/rejected": -1.2544631958007812, + "step": 401 + }, + { + "epoch": 0.75, + "learning_rate": 1.5240378616267884e-08, + "logits/chosen": -3.060194492340088, + "logits/rejected": -3.0142030715942383, + "logps/chosen": -204.72525024414062, + "logps/rejected": -785.8313598632812, + "loss": 0.2726, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48442116379737854, + "rewards/margins": 3.4462811946868896, + "rewards/rejected": -2.961859941482544, + "step": 402 + }, + { + "epoch": 0.75, + "learning_rate": 1.5022622168917646e-08, + "logits/chosen": -3.099644660949707, + "logits/rejected": -3.1159873008728027, + "logps/chosen": -156.32122802734375, + "logps/rejected": -309.004150390625, + "loss": 0.2911, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31258317828178406, + "rewards/margins": 1.4436378479003906, + "rewards/rejected": -1.1310547590255737, + "step": 403 + }, + { + "epoch": 0.76, + "learning_rate": 1.4806157251783514e-08, + "logits/chosen": -3.0268750190734863, + "logits/rejected": -3.076225757598877, + "logps/chosen": -160.97222900390625, + "logps/rejected": -327.20501708984375, + "loss": 0.3433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3719367980957031, + "rewards/margins": 1.287819743156433, + "rewards/rejected": -0.91588294506073, + "step": 404 + }, + { + "epoch": 0.76, + "learning_rate": 1.4590991857774038e-08, + "logits/chosen": -3.1092538833618164, + "logits/rejected": -3.0875244140625, + "logps/chosen": -143.99676513671875, + "logps/rejected": -446.46099853515625, + "loss": 0.2882, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34781038761138916, + "rewards/margins": 1.9585494995117188, + "rewards/rejected": -1.6107392311096191, + "step": 405 + }, + { + "epoch": 0.76, + "learning_rate": 1.4377133931813168e-08, + "logits/chosen": -3.0645105838775635, + "logits/rejected": -3.061046600341797, + "logps/chosen": -127.20413970947266, + "logps/rejected": -256.3220520019531, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2767307460308075, + "rewards/margins": 1.0427769422531128, + "rewards/rejected": -0.7660461664199829, + "step": 406 + }, + { + "epoch": 0.76, + "learning_rate": 1.4164591370547002e-08, + "logits/chosen": -3.115461826324463, + "logits/rejected": -3.102051019668579, + "logps/chosen": -129.83114624023438, + "logps/rejected": -494.4033203125, + "loss": 0.3095, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30895692110061646, + "rewards/margins": 2.0295684337615967, + "rewards/rejected": -1.720611572265625, + "step": 407 + }, + { + "epoch": 0.76, + "learning_rate": 1.3953372022052106e-08, + "logits/chosen": -3.0600790977478027, + "logits/rejected": -3.066476345062256, + "logps/chosen": -129.2195587158203, + "logps/rejected": -499.9012451171875, + "loss": 0.2482, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25078850984573364, + "rewards/margins": 2.495140552520752, + "rewards/rejected": -2.244352102279663, + "step": 408 + }, + { + "epoch": 0.77, + "learning_rate": 1.374348368554581e-08, + "logits/chosen": -3.133774757385254, + "logits/rejected": -3.1092023849487305, + "logps/chosen": -160.5269775390625, + "logps/rejected": -539.1087646484375, + "loss": 0.2835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.48429450392723083, + "rewards/margins": 2.3834476470947266, + "rewards/rejected": -1.8991531133651733, + "step": 409 + }, + { + "epoch": 0.77, + "learning_rate": 1.3534934111098179e-08, + "logits/chosen": -3.119905948638916, + "logits/rejected": -3.111537456512451, + "logps/chosen": -120.8127670288086, + "logps/rejected": -550.32470703125, + "loss": 0.2391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2673172056674957, + "rewards/margins": 2.99072265625, + "rewards/rejected": -2.723405599594116, + "step": 410 + }, + { + "epoch": 0.77, + "learning_rate": 1.3327730999345815e-08, + "logits/chosen": -3.15250825881958, + "logits/rejected": -3.1543619632720947, + "logps/chosen": -117.92450714111328, + "logps/rejected": -558.5802612304688, + "loss": 0.217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40770190954208374, + "rewards/margins": 2.669361114501953, + "rewards/rejected": -2.2616593837738037, + "step": 411 + }, + { + "epoch": 0.77, + "learning_rate": 1.3121882001207613e-08, + "logits/chosen": -3.135230541229248, + "logits/rejected": -3.163388252258301, + "logps/chosen": -169.5257568359375, + "logps/rejected": -548.897216796875, + "loss": 0.2802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2936142086982727, + "rewards/margins": 2.368891716003418, + "rewards/rejected": -2.075277805328369, + "step": 412 + }, + { + "epoch": 0.77, + "learning_rate": 1.291739471760212e-08, + "logits/chosen": -3.134462356567383, + "logits/rejected": -3.133734941482544, + "logps/chosen": -142.4628143310547, + "logps/rejected": -389.01910400390625, + "loss": 0.286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21936264634132385, + "rewards/margins": 1.7429298162460327, + "rewards/rejected": -1.5235671997070312, + "step": 413 + }, + { + "epoch": 0.78, + "learning_rate": 1.2714276699166992e-08, + "logits/chosen": -3.127727746963501, + "logits/rejected": -3.1042380332946777, + "logps/chosen": -167.04928588867188, + "logps/rejected": -375.4304504394531, + "loss": 0.2952, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22949372231960297, + "rewards/margins": 1.3323471546173096, + "rewards/rejected": -1.1028534173965454, + "step": 414 + }, + { + "epoch": 0.78, + "learning_rate": 1.2512535445980138e-08, + "logits/chosen": -3.130845069885254, + "logits/rejected": -3.063973903656006, + "logps/chosen": -143.46661376953125, + "logps/rejected": -724.3336181640625, + "loss": 0.2611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5963420867919922, + "rewards/margins": 3.035099506378174, + "rewards/rejected": -2.4387574195861816, + "step": 415 + }, + { + "epoch": 0.78, + "learning_rate": 1.2312178407282747e-08, + "logits/chosen": -3.0549402236938477, + "logits/rejected": -3.1008522510528564, + "logps/chosen": -123.35259246826172, + "logps/rejected": -542.2001953125, + "loss": 0.2643, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3478256165981293, + "rewards/margins": 2.488908290863037, + "rewards/rejected": -2.141082763671875, + "step": 416 + }, + { + "epoch": 0.78, + "learning_rate": 1.211321298120429e-08, + "logits/chosen": -3.169262409210205, + "logits/rejected": -3.2087273597717285, + "logps/chosen": -180.3726348876953, + "logps/rejected": -607.6980590820312, + "loss": 0.296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45715028047561646, + "rewards/margins": 2.826059103012085, + "rewards/rejected": -2.368908643722534, + "step": 417 + }, + { + "epoch": 0.78, + "learning_rate": 1.1915646514489291e-08, + "logits/chosen": -3.0800178050994873, + "logits/rejected": -3.060999631881714, + "logps/chosen": -128.43524169921875, + "logps/rejected": -371.96435546875, + "loss": 0.2946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.260488897562027, + "rewards/margins": 1.6474289894104004, + "rewards/rejected": -1.3869401216506958, + "step": 418 + }, + { + "epoch": 0.78, + "learning_rate": 1.1719486302226117e-08, + "logits/chosen": -3.0278377532958984, + "logits/rejected": -3.118227481842041, + "logps/chosen": -135.2738800048828, + "logps/rejected": -425.89093017578125, + "loss": 0.3212, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26085013151168823, + "rewards/margins": 1.6827418804168701, + "rewards/rejected": -1.4218918085098267, + "step": 419 + }, + { + "epoch": 0.79, + "learning_rate": 1.1524739587577559e-08, + "logits/chosen": -3.094470977783203, + "logits/rejected": -3.118704080581665, + "logps/chosen": -158.29949951171875, + "logps/rejected": -688.97314453125, + "loss": 0.2391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3689979612827301, + "rewards/margins": 2.90731143951416, + "rewards/rejected": -2.538313388824463, + "step": 420 + }, + { + "epoch": 0.79, + "learning_rate": 1.133141356151336e-08, + "logits/chosen": -3.09328293800354, + "logits/rejected": -3.1102099418640137, + "logps/chosen": -159.16781616210938, + "logps/rejected": -346.32489013671875, + "loss": 0.2705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30635833740234375, + "rewards/margins": 1.5064926147460938, + "rewards/rejected": -1.20013427734375, + "step": 421 + }, + { + "epoch": 0.79, + "learning_rate": 1.1139515362544754e-08, + "logits/chosen": -3.1001129150390625, + "logits/rejected": -3.0454304218292236, + "logps/chosen": -149.13031005859375, + "logps/rejected": -825.7736206054688, + "loss": 0.2878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39240762591362, + "rewards/margins": 3.5392627716064453, + "rewards/rejected": -3.146855354309082, + "step": 422 + }, + { + "epoch": 0.79, + "learning_rate": 1.0949052076460851e-08, + "logits/chosen": -3.0591812133789062, + "logits/rejected": -3.0752780437469482, + "logps/chosen": -115.32752990722656, + "logps/rejected": -414.1817932128906, + "loss": 0.2839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32970428466796875, + "rewards/margins": 1.8244538307189941, + "rewards/rejected": -1.4947494268417358, + "step": 423 + }, + { + "epoch": 0.79, + "learning_rate": 1.0760030736066949e-08, + "logits/chosen": -3.1311845779418945, + "logits/rejected": -3.1938352584838867, + "logps/chosen": -137.91749572753906, + "logps/rejected": -435.6331787109375, + "loss": 0.2522, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2329082489013672, + "rewards/margins": 1.80327570438385, + "rewards/rejected": -1.570367455482483, + "step": 424 + }, + { + "epoch": 0.8, + "learning_rate": 1.0572458320924942e-08, + "logits/chosen": -3.1402692794799805, + "logits/rejected": -3.1203136444091797, + "logps/chosen": -114.66494750976562, + "logps/rejected": -252.4225616455078, + "loss": 0.3065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3824596405029297, + "rewards/margins": 1.2819912433624268, + "rewards/rejected": -0.8995316028594971, + "step": 425 + }, + { + "epoch": 0.8, + "learning_rate": 1.03863417570955e-08, + "logits/chosen": -3.16017746925354, + "logits/rejected": -3.134626865386963, + "logps/chosen": -145.03939819335938, + "logps/rejected": -225.09756469726562, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3733528256416321, + "rewards/margins": 0.9259971380233765, + "rewards/rejected": -0.5526443719863892, + "step": 426 + }, + { + "epoch": 0.8, + "learning_rate": 1.0201687916882418e-08, + "logits/chosen": -3.0925841331481934, + "logits/rejected": -3.058358907699585, + "logps/chosen": -109.2313232421875, + "logps/rejected": -274.86480712890625, + "loss": 0.3162, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23372498154640198, + "rewards/margins": 1.1913001537322998, + "rewards/rejected": -0.957575261592865, + "step": 427 + }, + { + "epoch": 0.8, + "learning_rate": 1.0018503618578817e-08, + "logits/chosen": -3.0982718467712402, + "logits/rejected": -3.036649227142334, + "logps/chosen": -183.34259033203125, + "logps/rejected": -419.0614013671875, + "loss": 0.3001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1879783570766449, + "rewards/margins": 1.774468183517456, + "rewards/rejected": -1.5864897966384888, + "step": 428 + }, + { + "epoch": 0.8, + "learning_rate": 9.836795626215355e-09, + "logits/chosen": -3.08979868888855, + "logits/rejected": -3.04292631149292, + "logps/chosen": -162.8323974609375, + "logps/rejected": -667.4627685546875, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5333927273750305, + "rewards/margins": 3.477539539337158, + "rewards/rejected": -2.9441466331481934, + "step": 429 + }, + { + "epoch": 0.81, + "learning_rate": 9.65657064931048e-09, + "logits/chosen": -3.1446187496185303, + "logits/rejected": -3.1873531341552734, + "logps/chosen": -139.30848693847656, + "logps/rejected": -568.612548828125, + "loss": 0.2842, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3776687979698181, + "rewards/margins": 2.9225525856018066, + "rewards/rejected": -2.5448837280273438, + "step": 430 + }, + { + "epoch": 0.81, + "learning_rate": 9.477835342622758e-09, + "logits/chosen": -3.0879576206207275, + "logits/rejected": -3.1419291496276855, + "logps/chosen": -154.5089874267578, + "logps/rejected": -527.1871337890625, + "loss": 0.2597, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31038588285446167, + "rewards/margins": 2.227252960205078, + "rewards/rejected": -1.9168671369552612, + "step": 431 + }, + { + "epoch": 0.81, + "learning_rate": 9.300596305905011e-09, + "logits/chosen": -3.157177448272705, + "logits/rejected": -3.1481285095214844, + "logps/chosen": -184.71954345703125, + "logps/rejected": -562.8074951171875, + "loss": 0.2595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5269126892089844, + "rewards/margins": 2.7474069595336914, + "rewards/rejected": -2.220494270324707, + "step": 432 + }, + { + "epoch": 0.81, + "learning_rate": 9.124860083660768e-09, + "logits/chosen": -3.130270004272461, + "logits/rejected": -3.1719231605529785, + "logps/chosen": -123.39787292480469, + "logps/rejected": -354.9037170410156, + "loss": 0.3121, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45340806245803833, + "rewards/margins": 1.4754081964492798, + "rewards/rejected": -1.0220000743865967, + "step": 433 + }, + { + "epoch": 0.81, + "learning_rate": 8.950633164902466e-09, + "logits/chosen": -3.1560416221618652, + "logits/rejected": -3.1553187370300293, + "logps/chosen": -134.28189086914062, + "logps/rejected": -482.23858642578125, + "loss": 0.2536, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33192598819732666, + "rewards/margins": 2.388181209564209, + "rewards/rejected": -2.056255340576172, + "step": 434 + }, + { + "epoch": 0.81, + "learning_rate": 8.777921982911996e-09, + "logits/chosen": -3.0755538940429688, + "logits/rejected": -3.0695972442626953, + "logps/chosen": -138.96063232421875, + "logps/rejected": -439.66839599609375, + "loss": 0.2636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27072906494140625, + "rewards/margins": 1.7161484956741333, + "rewards/rejected": -1.4454193115234375, + "step": 435 + }, + { + "epoch": 0.82, + "learning_rate": 8.606732915003001e-09, + "logits/chosen": -3.0955448150634766, + "logits/rejected": -3.0906124114990234, + "logps/chosen": -173.51661682128906, + "logps/rejected": -462.40570068359375, + "loss": 0.2831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4484962522983551, + "rewards/margins": 2.0951623916625977, + "rewards/rejected": -1.6466660499572754, + "step": 436 + }, + { + "epoch": 0.82, + "learning_rate": 8.437072282285535e-09, + "logits/chosen": -3.1533901691436768, + "logits/rejected": -3.1912806034088135, + "logps/chosen": -167.63986206054688, + "logps/rejected": -419.552490234375, + "loss": 0.3251, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3365989923477173, + "rewards/margins": 2.0925590991973877, + "rewards/rejected": -1.7559601068496704, + "step": 437 + }, + { + "epoch": 0.82, + "learning_rate": 8.268946349432582e-09, + "logits/chosen": -3.082280158996582, + "logits/rejected": -3.1507749557495117, + "logps/chosen": -142.44839477539062, + "logps/rejected": -687.8110961914062, + "loss": 0.2243, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46116793155670166, + "rewards/margins": 3.0278122425079346, + "rewards/rejected": -2.5666441917419434, + "step": 438 + }, + { + "epoch": 0.82, + "learning_rate": 8.102361324448714e-09, + "logits/chosen": -3.1723642349243164, + "logits/rejected": -3.1410746574401855, + "logps/chosen": -157.81143188476562, + "logps/rejected": -426.5469055175781, + "loss": 0.2811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.45098191499710083, + "rewards/margins": 2.397696018218994, + "rewards/rejected": -1.9467140436172485, + "step": 439 + }, + { + "epoch": 0.82, + "learning_rate": 7.937323358440933e-09, + "logits/chosen": -3.1070098876953125, + "logits/rejected": -3.141753673553467, + "logps/chosen": -145.10574340820312, + "logps/rejected": -542.43212890625, + "loss": 0.2559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5410572290420532, + "rewards/margins": 2.5951099395751953, + "rewards/rejected": -2.0540528297424316, + "step": 440 + }, + { + "epoch": 0.83, + "learning_rate": 7.773838545391515e-09, + "logits/chosen": -3.1049418449401855, + "logits/rejected": -3.101527214050293, + "logps/chosen": -135.31240844726562, + "logps/rejected": -288.223876953125, + "loss": 0.3311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2256793975830078, + "rewards/margins": 1.2914142608642578, + "rewards/rejected": -1.06573486328125, + "step": 441 + }, + { + "epoch": 0.83, + "learning_rate": 7.61191292193294e-09, + "logits/chosen": -3.1184802055358887, + "logits/rejected": -3.1022825241088867, + "logps/chosen": -125.0112075805664, + "logps/rejected": -300.46795654296875, + "loss": 0.2929, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20155830681324005, + "rewards/margins": 1.300699234008789, + "rewards/rejected": -1.0991408824920654, + "step": 442 + }, + { + "epoch": 0.83, + "learning_rate": 7.451552467125072e-09, + "logits/chosen": -3.1214985847473145, + "logits/rejected": -3.1034750938415527, + "logps/chosen": -139.18093872070312, + "logps/rejected": -328.84893798828125, + "loss": 0.2625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32540473341941833, + "rewards/margins": 1.4933216571807861, + "rewards/rejected": -1.1679168939590454, + "step": 443 + }, + { + "epoch": 0.83, + "learning_rate": 7.292763102234328e-09, + "logits/chosen": -3.077510118484497, + "logits/rejected": -3.111664295196533, + "logps/chosen": -137.3868865966797, + "logps/rejected": -657.831787109375, + "loss": 0.24, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5233039855957031, + "rewards/margins": 3.4308784008026123, + "rewards/rejected": -2.907574415206909, + "step": 444 + }, + { + "epoch": 0.83, + "learning_rate": 7.135550690515052e-09, + "logits/chosen": -2.959463596343994, + "logits/rejected": -2.9921529293060303, + "logps/chosen": -155.0432891845703, + "logps/rejected": -960.6242065429688, + "loss": 0.2892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38153570890426636, + "rewards/margins": 3.929504871368408, + "rewards/rejected": -3.547969102859497, + "step": 445 + }, + { + "epoch": 0.84, + "learning_rate": 6.9799210369930414e-09, + "logits/chosen": -3.0794944763183594, + "logits/rejected": -3.0606441497802734, + "logps/chosen": -110.42960357666016, + "logps/rejected": -480.8779296875, + "loss": 0.255, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24868011474609375, + "rewards/margins": 2.0067977905273438, + "rewards/rejected": -1.75811767578125, + "step": 446 + }, + { + "epoch": 0.84, + "learning_rate": 6.825879888251135e-09, + "logits/chosen": -3.0791285037994385, + "logits/rejected": -3.116945266723633, + "logps/chosen": -149.608642578125, + "logps/rejected": -450.65887451171875, + "loss": 0.2723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43800967931747437, + "rewards/margins": 1.9721717834472656, + "rewards/rejected": -1.534162163734436, + "step": 447 + }, + { + "epoch": 0.84, + "learning_rate": 6.673432932217116e-09, + "logits/chosen": -3.1149160861968994, + "logits/rejected": -3.143096923828125, + "logps/chosen": -143.42568969726562, + "logps/rejected": -678.44482421875, + "loss": 0.2637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4196929931640625, + "rewards/margins": 2.923999071121216, + "rewards/rejected": -2.5043060779571533, + "step": 448 + }, + { + "epoch": 0.84, + "learning_rate": 6.522585797953578e-09, + "logits/chosen": -3.1064109802246094, + "logits/rejected": -3.1357226371765137, + "logps/chosen": -117.61446380615234, + "logps/rejected": -419.19110107421875, + "loss": 0.2926, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25325584411621094, + "rewards/margins": 1.5328471660614014, + "rewards/rejected": -1.2795913219451904, + "step": 449 + }, + { + "epoch": 0.84, + "learning_rate": 6.373344055450164e-09, + "logits/chosen": -3.0765981674194336, + "logits/rejected": -3.067274570465088, + "logps/chosen": -138.38339233398438, + "logps/rejected": -428.91925048828125, + "loss": 0.3294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43143922090530396, + "rewards/margins": 1.8073654174804688, + "rewards/rejected": -1.3759262561798096, + "step": 450 + }, + { + "epoch": 0.84, + "learning_rate": 6.225713215417866e-09, + "logits/chosen": -3.1316280364990234, + "logits/rejected": -3.108652114868164, + "logps/chosen": -154.61325073242188, + "logps/rejected": -594.160888671875, + "loss": 0.2424, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4088565707206726, + "rewards/margins": 2.8566715717315674, + "rewards/rejected": -2.44781494140625, + "step": 451 + }, + { + "epoch": 0.85, + "learning_rate": 6.079698729085497e-09, + "logits/chosen": -3.1319966316223145, + "logits/rejected": -3.12296724319458, + "logps/chosen": -133.76902770996094, + "logps/rejected": -250.69113159179688, + "loss": 0.2863, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3072223663330078, + "rewards/margins": 1.2111736536026, + "rewards/rejected": -0.9039512872695923, + "step": 452 + }, + { + "epoch": 0.85, + "learning_rate": 5.935305987998496e-09, + "logits/chosen": -3.1588878631591797, + "logits/rejected": -3.1346421241760254, + "logps/chosen": -151.62118530273438, + "logps/rejected": -497.78045654296875, + "loss": 0.3192, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4457565248012543, + "rewards/margins": 2.3190324306488037, + "rewards/rejected": -1.8732757568359375, + "step": 453 + }, + { + "epoch": 0.85, + "learning_rate": 5.79254032381975e-09, + "logits/chosen": -3.0688657760620117, + "logits/rejected": -3.136579751968384, + "logps/chosen": -177.0197296142578, + "logps/rejected": -884.093505859375, + "loss": 0.2398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4371635615825653, + "rewards/margins": 3.889838218688965, + "rewards/rejected": -3.4526748657226562, + "step": 454 + }, + { + "epoch": 0.85, + "learning_rate": 5.651407008132808e-09, + "logits/chosen": -3.1762263774871826, + "logits/rejected": -3.1239261627197266, + "logps/chosen": -138.9414520263672, + "logps/rejected": -405.1002197265625, + "loss": 0.2573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3747551143169403, + "rewards/margins": 1.6734092235565186, + "rewards/rejected": -1.2986541986465454, + "step": 455 + }, + { + "epoch": 0.85, + "learning_rate": 5.511911252247192e-09, + "logits/chosen": -3.1191043853759766, + "logits/rejected": -3.0446665287017822, + "logps/chosen": -128.13987731933594, + "logps/rejected": -1068.1785888671875, + "loss": 0.2681, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4931800961494446, + "rewards/margins": 4.3569207191467285, + "rewards/rejected": -3.8637404441833496, + "step": 456 + }, + { + "epoch": 0.86, + "learning_rate": 5.374058207005944e-09, + "logits/chosen": -3.1290512084960938, + "logits/rejected": -3.1004252433776855, + "logps/chosen": -125.70376586914062, + "logps/rejected": -756.0145263671875, + "loss": 0.2805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33200186491012573, + "rewards/margins": 2.974490165710449, + "rewards/rejected": -2.6424882411956787, + "step": 457 + }, + { + "epoch": 0.86, + "learning_rate": 5.237852962595468e-09, + "logits/chosen": -3.1155264377593994, + "logits/rejected": -3.1980178356170654, + "logps/chosen": -133.26437377929688, + "logps/rejected": -838.55224609375, + "loss": 0.2606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3492107391357422, + "rewards/margins": 3.373415470123291, + "rewards/rejected": -3.024204969406128, + "step": 458 + }, + { + "epoch": 0.86, + "learning_rate": 5.103300548357592e-09, + "logits/chosen": -3.1327362060546875, + "logits/rejected": -3.157057285308838, + "logps/chosen": -163.49951171875, + "logps/rejected": -358.55126953125, + "loss": 0.3039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3593559265136719, + "rewards/margins": 1.6192588806152344, + "rewards/rejected": -1.2599029541015625, + "step": 459 + }, + { + "epoch": 0.86, + "learning_rate": 4.970405932603805e-09, + "logits/chosen": -3.140934467315674, + "logits/rejected": -3.074716329574585, + "logps/chosen": -128.4469757080078, + "logps/rejected": -421.326904296875, + "loss": 0.2891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3037528991699219, + "rewards/margins": 1.6654915809631348, + "rewards/rejected": -1.3617385625839233, + "step": 460 + }, + { + "epoch": 0.86, + "learning_rate": 4.839174022431858e-09, + "logits/chosen": -3.0373640060424805, + "logits/rejected": -3.0719799995422363, + "logps/chosen": -151.07542419433594, + "logps/rejected": -312.0150146484375, + "loss": 0.2851, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3487064242362976, + "rewards/margins": 1.4057056903839111, + "rewards/rejected": -1.0569992065429688, + "step": 461 + }, + { + "epoch": 0.87, + "learning_rate": 4.709609663544533e-09, + "logits/chosen": -3.1144204139709473, + "logits/rejected": -3.1213645935058594, + "logps/chosen": -143.61647033691406, + "logps/rejected": -716.537353515625, + "loss": 0.2205, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3388725221157074, + "rewards/margins": 3.5269980430603027, + "rewards/rejected": -3.1881256103515625, + "step": 462 + }, + { + "epoch": 0.87, + "learning_rate": 4.5817176400707426e-09, + "logits/chosen": -3.1284165382385254, + "logits/rejected": -3.1181325912475586, + "logps/chosen": -157.63832092285156, + "logps/rejected": -508.97320556640625, + "loss": 0.2836, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3826374113559723, + "rewards/margins": 2.2411465644836426, + "rewards/rejected": -1.8585091829299927, + "step": 463 + }, + { + "epoch": 0.87, + "learning_rate": 4.4555026743888725e-09, + "logits/chosen": -3.1309754848480225, + "logits/rejected": -3.119389772415161, + "logps/chosen": -143.92086791992188, + "logps/rejected": -311.11651611328125, + "loss": 0.3004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33531874418258667, + "rewards/margins": 1.2581183910369873, + "rewards/rejected": -0.9227997064590454, + "step": 464 + }, + { + "epoch": 0.87, + "learning_rate": 4.330969426952374e-09, + "logits/chosen": -3.1588454246520996, + "logits/rejected": -3.135711193084717, + "logps/chosen": -148.6053009033203, + "logps/rejected": -517.3611450195312, + "loss": 0.289, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3567871153354645, + "rewards/margins": 2.571211099624634, + "rewards/rejected": -2.2144241333007812, + "step": 465 + }, + { + "epoch": 0.87, + "learning_rate": 4.208122496117744e-09, + "logits/chosen": -3.1226956844329834, + "logits/rejected": -3.132716655731201, + "logps/chosen": -142.30831909179688, + "logps/rejected": -509.98455810546875, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42056161165237427, + "rewards/margins": 2.0945024490356445, + "rewards/rejected": -1.6739410161972046, + "step": 466 + }, + { + "epoch": 0.87, + "learning_rate": 4.086966417974669e-09, + "logits/chosen": -3.1108579635620117, + "logits/rejected": -3.0662975311279297, + "logps/chosen": -126.1821060180664, + "logps/rejected": -394.52191162109375, + "loss": 0.3468, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35279580950737, + "rewards/margins": 1.7700024843215942, + "rewards/rejected": -1.4172066450119019, + "step": 467 + }, + { + "epoch": 0.88, + "learning_rate": 3.967505666178556e-09, + "logits/chosen": -3.099673271179199, + "logits/rejected": -3.114797592163086, + "logps/chosen": -126.34507751464844, + "logps/rejected": -374.4612121582031, + "loss": 0.2846, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39189720153808594, + "rewards/margins": 1.811279058456421, + "rewards/rejected": -1.4193817377090454, + "step": 468 + }, + { + "epoch": 0.88, + "learning_rate": 3.84974465178538e-09, + "logits/chosen": -3.0726571083068848, + "logits/rejected": -3.110989570617676, + "logps/chosen": -131.27859497070312, + "logps/rejected": -393.45343017578125, + "loss": 0.3234, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35091936588287354, + "rewards/margins": 1.752488136291504, + "rewards/rejected": -1.4015686511993408, + "step": 469 + }, + { + "epoch": 0.88, + "learning_rate": 3.733687723088725e-09, + "logits/chosen": -3.069206953048706, + "logits/rejected": -3.0820248126983643, + "logps/chosen": -164.37484741210938, + "logps/rejected": -654.739013671875, + "loss": 0.2498, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4017952084541321, + "rewards/margins": 2.6417579650878906, + "rewards/rejected": -2.2399628162384033, + "step": 470 + }, + { + "epoch": 0.88, + "learning_rate": 3.6193391654593064e-09, + "logits/chosen": -3.097442865371704, + "logits/rejected": -3.0472307205200195, + "logps/chosen": -133.84457397460938, + "logps/rejected": -375.3958740234375, + "loss": 0.3047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33319932222366333, + "rewards/margins": 1.5960122346878052, + "rewards/rejected": -1.262812852859497, + "step": 471 + }, + { + "epoch": 0.88, + "learning_rate": 3.5067032011866784e-09, + "logits/chosen": -3.0732686519622803, + "logits/rejected": -3.0506114959716797, + "logps/chosen": -107.39482879638672, + "logps/rejected": -373.68182373046875, + "loss": 0.3023, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3167051374912262, + "rewards/margins": 1.7931135892868042, + "rewards/rejected": -1.4764084815979004, + "step": 472 + }, + { + "epoch": 0.89, + "learning_rate": 3.3957839893233533e-09, + "logits/chosen": -3.02797269821167, + "logits/rejected": -2.9451699256896973, + "logps/chosen": -142.579833984375, + "logps/rejected": -378.647705078125, + "loss": 0.3144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37645378708839417, + "rewards/margins": 1.4263172149658203, + "rewards/rejected": -1.0498634576797485, + "step": 473 + }, + { + "epoch": 0.89, + "learning_rate": 3.286585625531241e-09, + "logits/chosen": -3.1171064376831055, + "logits/rejected": -3.099402904510498, + "logps/chosen": -172.95989990234375, + "logps/rejected": -635.477783203125, + "loss": 0.2276, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5065963864326477, + "rewards/margins": 2.7007951736450195, + "rewards/rejected": -2.1941986083984375, + "step": 474 + }, + { + "epoch": 0.89, + "learning_rate": 3.1791121419303792e-09, + "logits/chosen": -3.1010661125183105, + "logits/rejected": -3.0572190284729004, + "logps/chosen": -157.51858520507812, + "logps/rejected": -406.266845703125, + "loss": 0.2936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3732589781284332, + "rewards/margins": 1.6604187488555908, + "rewards/rejected": -1.2871596813201904, + "step": 475 + }, + { + "epoch": 0.89, + "learning_rate": 3.0733675069500862e-09, + "logits/chosen": -3.1469783782958984, + "logits/rejected": -3.1351771354675293, + "logps/chosen": -169.85311889648438, + "logps/rejected": -564.5670166015625, + "loss": 0.2598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30928725004196167, + "rewards/margins": 2.2952322959899902, + "rewards/rejected": -1.985945224761963, + "step": 476 + }, + { + "epoch": 0.89, + "learning_rate": 2.969355625182418e-09, + "logits/chosen": -3.0974292755126953, + "logits/rejected": -3.0490450859069824, + "logps/chosen": -125.62431335449219, + "logps/rejected": -452.2491455078125, + "loss": 0.2659, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3432239592075348, + "rewards/margins": 1.861113429069519, + "rewards/rejected": -1.5178894996643066, + "step": 477 + }, + { + "epoch": 0.9, + "learning_rate": 2.8670803372379536e-09, + "logits/chosen": -3.129063606262207, + "logits/rejected": -3.1314432621002197, + "logps/chosen": -127.91188049316406, + "logps/rejected": -281.50213623046875, + "loss": 0.3069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31530115008354187, + "rewards/margins": 1.37070894241333, + "rewards/rejected": -1.0554077625274658, + "step": 478 + }, + { + "epoch": 0.9, + "learning_rate": 2.7665454196040663e-09, + "logits/chosen": -3.0975823402404785, + "logits/rejected": -3.099858045578003, + "logps/chosen": -115.74735260009766, + "logps/rejected": -294.91217041015625, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25211870670318604, + "rewards/margins": 1.1051247119903564, + "rewards/rejected": -0.8530060052871704, + "step": 479 + }, + { + "epoch": 0.9, + "learning_rate": 2.6677545845053717e-09, + "logits/chosen": -3.1369452476501465, + "logits/rejected": -3.1425552368164062, + "logps/chosen": -176.83901977539062, + "logps/rejected": -376.59259033203125, + "loss": 0.3003, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39908331632614136, + "rewards/margins": 1.6797428131103516, + "rewards/rejected": -1.280659556388855, + "step": 480 + }, + { + "epoch": 0.9, + "learning_rate": 2.5707114797667463e-09, + "logits/chosen": -3.0796446800231934, + "logits/rejected": -3.0827364921569824, + "logps/chosen": -164.70065307617188, + "logps/rejected": -498.2000427246094, + "loss": 0.2697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4730224609375, + "rewards/margins": 2.2779664993286133, + "rewards/rejected": -1.8049437999725342, + "step": 481 + }, + { + "epoch": 0.9, + "learning_rate": 2.475419688678598e-09, + "logits/chosen": -3.139355182647705, + "logits/rejected": -3.1069726943969727, + "logps/chosen": -164.11572265625, + "logps/rejected": -488.1797180175781, + "loss": 0.2744, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29559326171875, + "rewards/margins": 2.259268283843994, + "rewards/rejected": -1.9636750221252441, + "step": 482 + }, + { + "epoch": 0.9, + "learning_rate": 2.3818827298645206e-09, + "logits/chosen": -3.093550205230713, + "logits/rejected": -3.0638651847839355, + "logps/chosen": -149.80844116210938, + "logps/rejected": -386.90576171875, + "loss": 0.2552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4225776791572571, + "rewards/margins": 1.7454445362091064, + "rewards/rejected": -1.3228669166564941, + "step": 483 + }, + { + "epoch": 0.91, + "learning_rate": 2.290104057151432e-09, + "logits/chosen": -3.089463710784912, + "logits/rejected": -3.074765682220459, + "logps/chosen": -150.90399169921875, + "logps/rejected": -410.7755126953125, + "loss": 0.3146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38391876220703125, + "rewards/margins": 1.6609268188476562, + "rewards/rejected": -1.277008056640625, + "step": 484 + }, + { + "epoch": 0.91, + "learning_rate": 2.2000870594419907e-09, + "logits/chosen": -3.1467108726501465, + "logits/rejected": -3.13568115234375, + "logps/chosen": -113.24161529541016, + "logps/rejected": -426.83819580078125, + "loss": 0.3018, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.46495094895362854, + "rewards/margins": 2.10524320602417, + "rewards/rejected": -1.6402924060821533, + "step": 485 + }, + { + "epoch": 0.91, + "learning_rate": 2.111835060589495e-09, + "logits/chosen": -3.121462345123291, + "logits/rejected": -3.072352886199951, + "logps/chosen": -131.73324584960938, + "logps/rejected": -487.51507568359375, + "loss": 0.2903, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37649843096733093, + "rewards/margins": 2.0510621070861816, + "rewards/rejected": -1.6745636463165283, + "step": 486 + }, + { + "epoch": 0.91, + "learning_rate": 2.025351319275137e-09, + "logits/chosen": -3.1306378841400146, + "logits/rejected": -3.109468936920166, + "logps/chosen": -163.19393920898438, + "logps/rejected": -520.50048828125, + "loss": 0.3076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6060867309570312, + "rewards/margins": 2.1749634742736816, + "rewards/rejected": -1.5688766241073608, + "step": 487 + }, + { + "epoch": 0.91, + "learning_rate": 1.9406390288876584e-09, + "logits/chosen": -3.1014325618743896, + "logits/rejected": -3.0696630477905273, + "logps/chosen": -138.25535583496094, + "logps/rejected": -322.90203857421875, + "loss": 0.2961, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3002704679965973, + "rewards/margins": 1.3209278583526611, + "rewards/rejected": -1.0206574201583862, + "step": 488 + }, + { + "epoch": 0.92, + "learning_rate": 1.8577013174054855e-09, + "logits/chosen": -3.107661724090576, + "logits/rejected": -3.0887861251831055, + "logps/chosen": -112.41644287109375, + "logps/rejected": -334.48651123046875, + "loss": 0.352, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2912319302558899, + "rewards/margins": 1.4561290740966797, + "rewards/rejected": -1.1648972034454346, + "step": 489 + }, + { + "epoch": 0.92, + "learning_rate": 1.776541247281177e-09, + "logits/chosen": -3.034113883972168, + "logits/rejected": -3.091085910797119, + "logps/chosen": -126.45149230957031, + "logps/rejected": -409.754638671875, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25024643540382385, + "rewards/margins": 2.1393179893493652, + "rewards/rejected": -1.8890717029571533, + "step": 490 + }, + { + "epoch": 0.92, + "learning_rate": 1.6971618153283629e-09, + "logits/chosen": -3.0433506965637207, + "logits/rejected": -3.1423635482788086, + "logps/chosen": -134.6065216064453, + "logps/rejected": -498.131591796875, + "loss": 0.3035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3760029077529907, + "rewards/margins": 2.6057910919189453, + "rewards/rejected": -2.229788303375244, + "step": 491 + }, + { + "epoch": 0.92, + "learning_rate": 1.6195659526111182e-09, + "logits/chosen": -3.114948272705078, + "logits/rejected": -3.0833163261413574, + "logps/chosen": -191.2910919189453, + "logps/rejected": -475.4429016113281, + "loss": 0.2812, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34178924560546875, + "rewards/margins": 2.1714677810668945, + "rewards/rejected": -1.8296782970428467, + "step": 492 + }, + { + "epoch": 0.92, + "learning_rate": 1.5437565243356654e-09, + "logits/chosen": -3.041456460952759, + "logits/rejected": -3.1071925163269043, + "logps/chosen": -112.03218078613281, + "logps/rejected": -617.1761474609375, + "loss": 0.2641, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4319114685058594, + "rewards/margins": 3.5651772022247314, + "rewards/rejected": -3.133265733718872, + "step": 493 + }, + { + "epoch": 0.93, + "learning_rate": 1.4697363297446474e-09, + "logits/chosen": -3.12358021736145, + "logits/rejected": -3.135344982147217, + "logps/chosen": -116.02423095703125, + "logps/rejected": -477.21307373046875, + "loss": 0.3025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2517356872558594, + "rewards/margins": 1.677619218826294, + "rewards/rejected": -1.4258835315704346, + "step": 494 + }, + { + "epoch": 0.93, + "learning_rate": 1.3975081020137392e-09, + "logits/chosen": -3.0883307456970215, + "logits/rejected": -3.0309174060821533, + "logps/chosen": -171.83099365234375, + "logps/rejected": -664.2872314453125, + "loss": 0.2526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4186813235282898, + "rewards/margins": 2.75565505027771, + "rewards/rejected": -2.3369736671447754, + "step": 495 + }, + { + "epoch": 0.93, + "learning_rate": 1.3270745081506995e-09, + "logits/chosen": -3.1516971588134766, + "logits/rejected": -3.109900712966919, + "logps/chosen": -146.49600219726562, + "logps/rejected": -479.8341369628906, + "loss": 0.2824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.336251437664032, + "rewards/margins": 2.0497097969055176, + "rewards/rejected": -1.7134582996368408, + "step": 496 + }, + { + "epoch": 0.93, + "learning_rate": 1.2584381488969454e-09, + "logits/chosen": -3.179841995239258, + "logits/rejected": -3.1700148582458496, + "logps/chosen": -147.16456604003906, + "logps/rejected": -576.5809326171875, + "loss": 0.2433, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4721420407295227, + "rewards/margins": 2.932512044906616, + "rewards/rejected": -2.4603700637817383, + "step": 497 + }, + { + "epoch": 0.93, + "learning_rate": 1.191601558631461e-09, + "logits/chosen": -3.168591022491455, + "logits/rejected": -3.1373133659362793, + "logps/chosen": -119.38355255126953, + "logps/rejected": -338.9580383300781, + "loss": 0.2578, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4695472717285156, + "rewards/margins": 1.4998176097869873, + "rewards/rejected": -1.0302703380584717, + "step": 498 + }, + { + "epoch": 0.93, + "learning_rate": 1.126567205277279e-09, + "logits/chosen": -3.1459245681762695, + "logits/rejected": -3.1116538047790527, + "logps/chosen": -133.7545623779297, + "logps/rejected": -816.600341796875, + "loss": 0.2814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4344402551651001, + "rewards/margins": 3.742943286895752, + "rewards/rejected": -3.3085029125213623, + "step": 499 + }, + { + "epoch": 0.94, + "learning_rate": 1.0633374902103088e-09, + "logits/chosen": -3.024901866912842, + "logits/rejected": -3.002150535583496, + "logps/chosen": -137.79928588867188, + "logps/rejected": -353.03216552734375, + "loss": 0.2923, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3413265347480774, + "rewards/margins": 1.5207531452178955, + "rewards/rejected": -1.1794265508651733, + "step": 500 + }, + { + "epoch": 0.94, + "learning_rate": 1.0019147481706625e-09, + "logits/chosen": -3.1625537872314453, + "logits/rejected": -3.159015655517578, + "logps/chosen": -132.420654296875, + "logps/rejected": -480.3035888671875, + "loss": 0.2833, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3402961790561676, + "rewards/margins": 2.241326093673706, + "rewards/rejected": -1.9010299444198608, + "step": 501 + }, + { + "epoch": 0.94, + "learning_rate": 9.423012471764913e-10, + "logits/chosen": -3.16459584236145, + "logits/rejected": -3.1732215881347656, + "logps/chosen": -114.95317840576172, + "logps/rejected": -360.8703918457031, + "loss": 0.2838, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26132431626319885, + "rewards/margins": 1.889155626296997, + "rewards/rejected": -1.627831220626831, + "step": 502 + }, + { + "epoch": 0.94, + "learning_rate": 8.844991884401853e-10, + "logits/chosen": -3.100750684738159, + "logits/rejected": -3.1659507751464844, + "logps/chosen": -103.72982788085938, + "logps/rejected": -590.0950927734375, + "loss": 0.3176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39298173785209656, + "rewards/margins": 2.813382625579834, + "rewards/rejected": -2.420401096343994, + "step": 503 + }, + { + "epoch": 0.94, + "learning_rate": 8.285107062871333e-10, + "logits/chosen": -3.089031934738159, + "logits/rejected": -3.1220390796661377, + "logps/chosen": -120.72425842285156, + "logps/rejected": -316.2760314941406, + "loss": 0.3028, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4103664457798004, + "rewards/margins": 1.4273312091827393, + "rewards/rejected": -1.0169646739959717, + "step": 504 + }, + { + "epoch": 0.95, + "learning_rate": 7.743378680769019e-10, + "logits/chosen": -3.134594440460205, + "logits/rejected": -3.081752300262451, + "logps/chosen": -139.97647094726562, + "logps/rejected": -394.58489990234375, + "loss": 0.3038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2139129638671875, + "rewards/margins": 1.409022569656372, + "rewards/rejected": -1.1951096057891846, + "step": 505 + }, + { + "epoch": 0.95, + "learning_rate": 7.219826741268808e-10, + "logits/chosen": -3.1143152713775635, + "logits/rejected": -3.1132054328918457, + "logps/chosen": -148.7552490234375, + "logps/rejected": -295.70562744140625, + "loss": 0.3325, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1640804409980774, + "rewards/margins": 1.1422115564346313, + "rewards/rejected": -0.9781311750411987, + "step": 506 + }, + { + "epoch": 0.95, + "learning_rate": 6.714470576384579e-10, + "logits/chosen": -3.1071839332580566, + "logits/rejected": -3.206240653991699, + "logps/chosen": -146.13177490234375, + "logps/rejected": -659.148681640625, + "loss": 0.2673, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43094825744628906, + "rewards/margins": 3.374443769454956, + "rewards/rejected": -2.943495273590088, + "step": 507 + }, + { + "epoch": 0.95, + "learning_rate": 6.22732884625593e-10, + "logits/chosen": -3.0678305625915527, + "logits/rejected": -3.103898048400879, + "logps/chosen": -125.8221664428711, + "logps/rejected": -488.14544677734375, + "loss": 0.2607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.42054903507232666, + "rewards/margins": 1.9974868297576904, + "rewards/rejected": -1.5769379138946533, + "step": 508 + }, + { + "epoch": 0.95, + "learning_rate": 5.758419538459458e-10, + "logits/chosen": -3.141282081604004, + "logits/rejected": -3.137490749359131, + "logps/chosen": -158.83578491210938, + "logps/rejected": -626.0155029296875, + "loss": 0.2688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40873032808303833, + "rewards/margins": 2.6971397399902344, + "rewards/rejected": -2.288409471511841, + "step": 509 + }, + { + "epoch": 0.96, + "learning_rate": 5.307759967344672e-10, + "logits/chosen": -3.081697463989258, + "logits/rejected": -3.0960707664489746, + "logps/chosen": -156.73416137695312, + "logps/rejected": -340.5374755859375, + "loss": 0.3071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1962730437517166, + "rewards/margins": 1.200829267501831, + "rewards/rejected": -1.0045562982559204, + "step": 510 + }, + { + "epoch": 0.96, + "learning_rate": 4.875366773394229e-10, + "logits/chosen": -3.187812328338623, + "logits/rejected": -3.154256820678711, + "logps/chosen": -151.19715881347656, + "logps/rejected": -367.9975891113281, + "loss": 0.2648, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3235412836074829, + "rewards/margins": 1.6943787336349487, + "rewards/rejected": -1.3708374500274658, + "step": 511 + }, + { + "epoch": 0.96, + "learning_rate": 4.4612559226099857e-10, + "logits/chosen": -3.103592872619629, + "logits/rejected": -3.1437816619873047, + "logps/chosen": -147.15377807617188, + "logps/rejected": -442.363037109375, + "loss": 0.2508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2963802218437195, + "rewards/margins": 2.0894787311553955, + "rewards/rejected": -1.7930984497070312, + "step": 512 + }, + { + "epoch": 0.96, + "learning_rate": 4.0654427059229055e-10, + "logits/chosen": -3.1472010612487793, + "logits/rejected": -3.142284631729126, + "logps/chosen": -159.43988037109375, + "logps/rejected": -507.58270263671875, + "loss": 0.2628, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.314199835062027, + "rewards/margins": 2.0181822776794434, + "rewards/rejected": -1.7039825916290283, + "step": 513 + }, + { + "epoch": 0.96, + "learning_rate": 3.6879417386291856e-10, + "logits/chosen": -3.145777702331543, + "logits/rejected": -3.129086494445801, + "logps/chosen": -144.0118865966797, + "logps/rejected": -392.7803039550781, + "loss": 0.2207, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4321540594100952, + "rewards/margins": 1.7530494928359985, + "rewards/rejected": -1.3208954334259033, + "step": 514 + }, + { + "epoch": 0.96, + "learning_rate": 3.328766959849738e-10, + "logits/chosen": -3.187284469604492, + "logits/rejected": -3.1989660263061523, + "logps/chosen": -171.71075439453125, + "logps/rejected": -412.2127685546875, + "loss": 0.2795, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.261050820350647, + "rewards/margins": 1.8962528705596924, + "rewards/rejected": -1.6352020502090454, + "step": 515 + }, + { + "epoch": 0.97, + "learning_rate": 2.987931632016272e-10, + "logits/chosen": -3.173471689224243, + "logits/rejected": -3.159818649291992, + "logps/chosen": -133.20126342773438, + "logps/rejected": -509.71331787109375, + "loss": 0.2629, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.310995876789093, + "rewards/margins": 2.4898440837860107, + "rewards/rejected": -2.1788482666015625, + "step": 516 + }, + { + "epoch": 0.97, + "learning_rate": 2.6654483403810157e-10, + "logits/chosen": -3.0831875801086426, + "logits/rejected": -3.052992343902588, + "logps/chosen": -129.87286376953125, + "logps/rejected": -377.1026916503906, + "loss": 0.3339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37996408343315125, + "rewards/margins": 1.5682346820831299, + "rewards/rejected": -1.1882705688476562, + "step": 517 + }, + { + "epoch": 0.97, + "learning_rate": 2.361328992552314e-10, + "logits/chosen": -3.1319122314453125, + "logits/rejected": -3.1034421920776367, + "logps/chosen": -165.09945678710938, + "logps/rejected": -369.2383728027344, + "loss": 0.2821, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5056533813476562, + "rewards/margins": 1.8514435291290283, + "rewards/rejected": -1.345790147781372, + "step": 518 + }, + { + "epoch": 0.97, + "learning_rate": 2.0755848180547542e-10, + "logits/chosen": -3.1329352855682373, + "logits/rejected": -3.1118388175964355, + "logps/chosen": -220.42922973632812, + "logps/rejected": -545.186767578125, + "loss": 0.3124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07294158637523651, + "rewards/margins": 1.916509985923767, + "rewards/rejected": -1.8435684442520142, + "step": 519 + }, + { + "epoch": 0.97, + "learning_rate": 1.8082263679148335e-10, + "logits/chosen": -3.1528844833374023, + "logits/rejected": -3.1884334087371826, + "logps/chosen": -157.53839111328125, + "logps/rejected": -378.8784484863281, + "loss": 0.324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37177276611328125, + "rewards/margins": 2.067762851715088, + "rewards/rejected": -1.695989966392517, + "step": 520 + }, + { + "epoch": 0.98, + "learning_rate": 1.5592635142709366e-10, + "logits/chosen": -3.030707836151123, + "logits/rejected": -3.0510759353637695, + "logps/chosen": -149.46432495117188, + "logps/rejected": -299.5859375, + "loss": 0.2924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34186631441116333, + "rewards/margins": 1.2820320129394531, + "rewards/rejected": -0.9401657581329346, + "step": 521 + }, + { + "epoch": 0.98, + "learning_rate": 1.328705450009071e-10, + "logits/chosen": -3.0440118312835693, + "logits/rejected": -3.088975429534912, + "logps/chosen": -136.11245727539062, + "logps/rejected": -497.63201904296875, + "loss": 0.2703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5023102164268494, + "rewards/margins": 2.4653244018554688, + "rewards/rejected": -1.9630141258239746, + "step": 522 + }, + { + "epoch": 0.98, + "learning_rate": 1.116560688423418e-10, + "logits/chosen": -3.1267457008361816, + "logits/rejected": -3.1437196731567383, + "logps/chosen": -171.23056030273438, + "logps/rejected": -742.5333251953125, + "loss": 0.2391, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40755385160446167, + "rewards/margins": 3.0200891494750977, + "rewards/rejected": -2.612535238265991, + "step": 523 + }, + { + "epoch": 0.98, + "learning_rate": 9.228370629019711e-11, + "logits/chosen": -3.1899290084838867, + "logits/rejected": -3.179593086242676, + "logps/chosen": -186.31494140625, + "logps/rejected": -569.405029296875, + "loss": 0.2265, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5843048095703125, + "rewards/margins": 2.793818712234497, + "rewards/rejected": -2.2095139026641846, + "step": 524 + }, + { + "epoch": 0.98, + "learning_rate": 7.475417266371576e-11, + "logits/chosen": -3.146986484527588, + "logits/rejected": -3.1185457706451416, + "logps/chosen": -128.7005157470703, + "logps/rejected": -380.9029541015625, + "loss": 0.2701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.44458311796188354, + "rewards/margins": 1.773095726966858, + "rewards/rejected": -1.3285126686096191, + "step": 525 + }, + { + "epoch": 0.99, + "learning_rate": 5.906811523618271e-11, + "logits/chosen": -3.0545926094055176, + "logits/rejected": -3.087705612182617, + "logps/chosen": -150.2297821044922, + "logps/rejected": -552.5162963867188, + "loss": 0.2176, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.519792914390564, + "rewards/margins": 3.017890214920044, + "rewards/rejected": -2.4980974197387695, + "step": 526 + }, + { + "epoch": 0.99, + "learning_rate": 4.52261132110332e-11, + "logits/chosen": -3.080054998397827, + "logits/rejected": -3.0949764251708984, + "logps/chosen": -146.21841430664062, + "logps/rejected": -453.5968933105469, + "loss": 0.2666, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2537589967250824, + "rewards/margins": 1.8828787803649902, + "rewards/rejected": -1.629119873046875, + "step": 527 + }, + { + "epoch": 0.99, + "learning_rate": 3.322867770044202e-11, + "logits/chosen": -3.1179938316345215, + "logits/rejected": -3.137730598449707, + "logps/chosen": -146.3706512451172, + "logps/rejected": -682.6677856445312, + "loss": 0.2538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.43275681138038635, + "rewards/margins": 3.1742515563964844, + "rewards/rejected": -2.74149489402771, + "step": 528 + }, + { + "epoch": 0.99, + "learning_rate": 2.3076251706477536e-11, + "logits/chosen": -3.1275134086608887, + "logits/rejected": -3.088906764984131, + "logps/chosen": -167.51454162597656, + "logps/rejected": -738.6668701171875, + "loss": 0.2819, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2622409760951996, + "rewards/margins": 2.651665210723877, + "rewards/rejected": -2.3894240856170654, + "step": 529 + }, + { + "epoch": 0.99, + "learning_rate": 1.476921010471477e-11, + "logits/chosen": -3.078446865081787, + "logits/rejected": -3.111029624938965, + "logps/chosen": -113.65213012695312, + "logps/rejected": -405.52020263671875, + "loss": 0.2849, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2983596920967102, + "rewards/margins": 1.7068328857421875, + "rewards/rejected": -1.408473253250122, + "step": 530 + }, + { + "epoch": 0.99, + "learning_rate": 8.307859630429792e-12, + "logits/chosen": -3.083954095840454, + "logits/rejected": -3.074540615081787, + "logps/chosen": -101.43350219726562, + "logps/rejected": -352.3771667480469, + "loss": 0.2606, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37070274353027344, + "rewards/margins": 1.3200191259384155, + "rewards/rejected": -0.9493163824081421, + "step": 531 + }, + { + "epoch": 1.0, + "learning_rate": 3.692438867225478e-12, + "logits/chosen": -3.089052438735962, + "logits/rejected": -3.1082043647766113, + "logps/chosen": -134.64016723632812, + "logps/rejected": -923.4788818359375, + "loss": 0.2313, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5148289203643799, + "rewards/margins": 3.869446277618408, + "rewards/rejected": -3.3546175956726074, + "step": 532 + }, + { + "epoch": 1.0, + "learning_rate": 9.231182382773983e-13, + "logits/chosen": -3.0191590785980225, + "logits/rejected": -3.03761625289917, + "logps/chosen": -186.1719207763672, + "logps/rejected": -550.8084106445312, + "loss": 0.2941, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4966144561767578, + "rewards/margins": 2.4296085834503174, + "rewards/rejected": -1.9329941272735596, + "step": 533 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -3.072124481201172, + "logits/rejected": -3.0544466972351074, + "logps/chosen": -144.7108917236328, + "logps/rejected": -619.5296630859375, + "loss": 0.2561, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29144442081451416, + "rewards/margins": 2.8432388305664062, + "rewards/rejected": -2.5517945289611816, + "step": 534 + }, + { + "epoch": 1.0, + "step": 534, + "total_flos": 0.0, + "train_loss": 0.3880178291587794, + "train_runtime": 1092.4036, + "train_samples_per_second": 3.91, + "train_steps_per_second": 0.489 + } + ], + "logging_steps": 1.0, + "max_steps": 534, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/README.md b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95a6e735ab17970ac51fee8f6b2c7f264e8f70e6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/README.md @@ -0,0 +1,202 @@ +--- +base_model: liuhaotian/llava-v1.6-mistral-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/adapter_config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cdf979dd6dd08f319aa2fedb1d81c462e5a89645 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "o_proj", + "k_proj", + "gate_proj", + "up_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/adapter_model.safetensors b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82abfb1743ef4cb00d3f679ccabe43e5884f91a6 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284c88afec91b595b2ccb0125c35ec11936e1bb5743595e1822860163174f44e +size 708925520 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/config.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ea14a76ff4cee69b8db81d08f95108817f81b5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaMistralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/non_lora_trainables.bin b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/non_lora_trainables.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ae47bce15d1d27e2a1892d51ad129f29f2d2cb9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/non_lora_trainables.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fb82c3660319e6d0b239950b20c28181e97f1ade117dc0660b40e2ad94a89b +size 912 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/trainer_state.json b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..80939c9faf84987a2e3bec3b2229f892cb2ad741 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1-judge_new_lora/trainer_state.json @@ -0,0 +1,5406 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 384, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 8.333333333333332e-09, + "logits/chosen": -3.0307211875915527, + "logits/rejected": -3.079486846923828, + "logps/chosen": -117.82061004638672, + "logps/rejected": -268.4422607421875, + "loss": 0.8499, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666664e-08, + "logits/chosen": -3.032890796661377, + "logits/rejected": -3.0348920822143555, + "logps/chosen": -153.7798614501953, + "logps/rejected": -356.5205078125, + "loss": 0.8718, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.04731598123908043, + "rewards/margins": -0.04731598123908043, + "rewards/rejected": 0.0, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-08, + "logits/chosen": -3.039437770843506, + "logits/rejected": -3.0532898902893066, + "logps/chosen": -156.29916381835938, + "logps/rejected": -501.42156982421875, + "loss": 0.8643, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0035282140597701073, + "rewards/margins": -0.0917331725358963, + "rewards/rejected": 0.09526138007640839, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 3.333333333333333e-08, + "logits/chosen": -3.115422010421753, + "logits/rejected": -3.0844807624816895, + "logps/chosen": -173.97581481933594, + "logps/rejected": -348.89453125, + "loss": 0.8811, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.011089324951171875, + "rewards/margins": -0.047400668263435364, + "rewards/rejected": 0.05848999321460724, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-08, + "logits/chosen": -3.104954242706299, + "logits/rejected": -3.0922954082489014, + "logps/chosen": -133.37814331054688, + "logps/rejected": -277.6751708984375, + "loss": 0.8628, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02024536207318306, + "rewards/margins": -0.05682678148150444, + "rewards/rejected": 0.03658141940832138, + "step": 5 + }, + { + "epoch": 0.02, + "learning_rate": 5e-08, + "logits/chosen": -3.0719985961914062, + "logits/rejected": -3.0922281742095947, + "logps/chosen": -149.9077606201172, + "logps/rejected": -1046.501708984375, + "loss": 0.865, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.043318942189216614, + "rewards/margins": -0.038553617894649506, + "rewards/rejected": 0.08187256008386612, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333333e-08, + "logits/chosen": -3.1053850650787354, + "logits/rejected": -3.129606246948242, + "logps/chosen": -172.1470184326172, + "logps/rejected": -494.1867980957031, + "loss": 0.8415, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.01180420070886612, + "rewards/margins": 0.04008789360523224, + "rewards/rejected": -0.05189209058880806, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666665e-08, + "logits/chosen": -3.016517400741577, + "logits/rejected": -3.017077922821045, + "logps/chosen": -171.33059692382812, + "logps/rejected": -455.6844482421875, + "loss": 0.8505, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02915649488568306, + "rewards/margins": -0.013015749864280224, + "rewards/rejected": 0.04217224195599556, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 7.5e-08, + "logits/chosen": -3.0334219932556152, + "logits/rejected": -3.0112240314483643, + "logps/chosen": -161.5889892578125, + "logps/rejected": -377.88885498046875, + "loss": 0.8674, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0023033141624182463, + "rewards/margins": 0.0013206489384174347, + "rewards/rejected": -0.00362396240234375, + "step": 9 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-08, + "logits/chosen": -3.125739097595215, + "logits/rejected": -3.125778913497925, + "logps/chosen": -127.92085266113281, + "logps/rejected": -358.69696044921875, + "loss": 0.8315, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0011234285775572062, + "rewards/margins": -0.015882492065429688, + "rewards/rejected": 0.014759061858057976, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666665e-08, + "logits/chosen": -3.0588126182556152, + "logits/rejected": -3.081554412841797, + "logps/chosen": -136.18914794921875, + "logps/rejected": -480.212646484375, + "loss": 0.8574, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0030193328857421875, + "rewards/margins": -0.03638725355267525, + "rewards/rejected": 0.03336792066693306, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 1e-07, + "logits/chosen": -3.109437942504883, + "logits/rejected": -3.096813917160034, + "logps/chosen": -164.38626098632812, + "logps/rejected": -548.5574951171875, + "loss": 0.8721, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.00432968232780695, + "rewards/margins": 0.048378754407167435, + "rewards/rejected": -0.05270843580365181, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 9.999821700020548e-08, + "logits/chosen": -3.0480148792266846, + "logits/rejected": -3.072915554046631, + "logps/chosen": -154.36654663085938, + "logps/rejected": -246.27928161621094, + "loss": 0.8451, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0019016265869140625, + "rewards/margins": -0.036587905138731, + "rewards/rejected": 0.03468628227710724, + "step": 13 + }, + { + "epoch": 0.04, + "learning_rate": 9.999286812798549e-08, + "logits/chosen": -3.0555713176727295, + "logits/rejected": -3.0670523643493652, + "logps/chosen": -121.01158905029297, + "logps/rejected": -563.6170654296875, + "loss": 0.8392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028258897364139557, + "rewards/margins": 0.09066124260425568, + "rewards/rejected": -0.06240234524011612, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 9.998395376482151e-08, + "logits/chosen": -3.0960075855255127, + "logits/rejected": -3.0433192253112793, + "logps/chosen": -164.980224609375, + "logps/rejected": -421.9732360839844, + "loss": 0.8477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01464080810546875, + "rewards/margins": 0.06188201904296875, + "rewards/rejected": -0.0472412109375, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 9.997147454648589e-08, + "logits/chosen": -3.0965030193328857, + "logits/rejected": -2.9544897079467773, + "logps/chosen": -154.86343383789062, + "logps/rejected": -1022.7692260742188, + "loss": 0.8264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007194137666374445, + "rewards/margins": 0.16180458664894104, + "rewards/rejected": -0.16899871826171875, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 9.995543136299635e-08, + "logits/chosen": -3.0376551151275635, + "logits/rejected": -2.9783806800842285, + "logps/chosen": -157.32798767089844, + "logps/rejected": -334.8014221191406, + "loss": 0.8592, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.02289428748190403, + "rewards/margins": 0.0074790953658521175, + "rewards/rejected": 0.015415191650390625, + "step": 17 + }, + { + "epoch": 0.05, + "learning_rate": 9.993582535855262e-08, + "logits/chosen": -2.9836764335632324, + "logits/rejected": -3.0253584384918213, + "logps/chosen": -148.21580505371094, + "logps/rejected": -850.8123779296875, + "loss": 0.8222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.014519501477479935, + "rewards/margins": 0.11354293674230576, + "rewards/rejected": -0.09902343899011612, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 9.991265793145478e-08, + "logits/chosen": -3.124847412109375, + "logits/rejected": -3.1592774391174316, + "logps/chosen": -108.57760620117188, + "logps/rejected": -357.8117980957031, + "loss": 0.7856, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012224960140883923, + "rewards/margins": 0.04878196865320206, + "rewards/rejected": -0.03655700758099556, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 9.988593073400353e-08, + "logits/chosen": -3.1143364906311035, + "logits/rejected": -3.0975587368011475, + "logps/chosen": -130.48472595214844, + "logps/rejected": -224.56846618652344, + "loss": 0.7985, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.008734130300581455, + "rewards/margins": 0.008308409713208675, + "rewards/rejected": 0.00042572012171149254, + "step": 20 + }, + { + "epoch": 0.05, + "learning_rate": 9.985564567238236e-08, + "logits/chosen": -3.088385581970215, + "logits/rejected": -3.113447427749634, + "logps/chosen": -115.84940338134766, + "logps/rejected": -399.8799743652344, + "loss": 0.8091, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005365753546357155, + "rewards/margins": 0.05701598897576332, + "rewards/rejected": -0.051650237292051315, + "step": 21 + }, + { + "epoch": 0.06, + "learning_rate": 9.982180490652164e-08, + "logits/chosen": -3.128260612487793, + "logits/rejected": -3.063107490539551, + "logps/chosen": -96.20269012451172, + "logps/rejected": -270.8910217285156, + "loss": 0.8009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02546844631433487, + "rewards/margins": 0.05889739841222763, + "rewards/rejected": -0.03342895582318306, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 9.978441084994451e-08, + "logits/chosen": -3.0731868743896484, + "logits/rejected": -3.08613920211792, + "logps/chosen": -170.9404296875, + "logps/rejected": -395.1220703125, + "loss": 0.8253, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.009806442074477673, + "rewards/margins": 0.0309421569108963, + "rewards/rejected": -0.04074859619140625, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 9.974346616959474e-08, + "logits/chosen": -3.091060161590576, + "logits/rejected": -3.1305809020996094, + "logps/chosen": -168.5071258544922, + "logps/rejected": -601.2286987304688, + "loss": 0.7924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03056793287396431, + "rewards/margins": 0.252227783203125, + "rewards/rejected": -0.221659854054451, + "step": 24 + }, + { + "epoch": 0.07, + "learning_rate": 9.969897378564666e-08, + "logits/chosen": -3.0166072845458984, + "logits/rejected": -2.9743990898132324, + "logps/chosen": -133.541015625, + "logps/rejected": -400.1943359375, + "loss": 0.7941, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.00258483923971653, + "rewards/margins": 0.05059662088751793, + "rewards/rejected": -0.04801177978515625, + "step": 25 + }, + { + "epoch": 0.07, + "learning_rate": 9.965093687129667e-08, + "logits/chosen": -3.083164930343628, + "logits/rejected": -3.058166980743408, + "logps/chosen": -140.86941528320312, + "logps/rejected": -460.68670654296875, + "loss": 0.8053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033519745338708162, + "rewards/margins": 0.05784187093377113, + "rewards/rejected": -0.06119384616613388, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 9.959935885253715e-08, + "logits/chosen": -3.0929617881774902, + "logits/rejected": -3.0725157260894775, + "logps/chosen": -158.955322265625, + "logps/rejected": -513.2744750976562, + "loss": 0.7777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005496978759765625, + "rewards/margins": 0.14572982490062714, + "rewards/rejected": -0.1402328461408615, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 9.954424340791195e-08, + "logits/chosen": -3.063067674636841, + "logits/rejected": -3.0788683891296387, + "logps/chosen": -139.289794921875, + "logps/rejected": -652.360107421875, + "loss": 0.7909, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.005984497256577015, + "rewards/margins": 0.1822357177734375, + "rewards/rejected": -0.17625121772289276, + "step": 28 + }, + { + "epoch": 0.08, + "learning_rate": 9.948559446825411e-08, + "logits/chosen": -3.064708709716797, + "logits/rejected": -3.050351142883301, + "logps/chosen": -174.5298614501953, + "logps/rejected": -311.0984802246094, + "loss": 0.7712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.007610702887177467, + "rewards/margins": 0.07946434617042542, + "rewards/rejected": -0.0718536376953125, + "step": 29 + }, + { + "epoch": 0.08, + "learning_rate": 9.942341621640557e-08, + "logits/chosen": -3.035243511199951, + "logits/rejected": -3.110595226287842, + "logps/chosen": -104.3738021850586, + "logps/rejected": -338.25457763671875, + "loss": 0.7621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008847045712172985, + "rewards/margins": 0.08385621011257172, + "rewards/rejected": -0.09270325303077698, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 9.935771308691871e-08, + "logits/chosen": -3.0256645679473877, + "logits/rejected": -3.0206105709075928, + "logps/chosen": -120.55731201171875, + "logps/rejected": -355.2936706542969, + "loss": 0.7465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02988738939166069, + "rewards/margins": 0.16568145155906677, + "rewards/rejected": -0.13579407334327698, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 9.928848976574017e-08, + "logits/chosen": -3.0911827087402344, + "logits/rejected": -3.0772881507873535, + "logps/chosen": -154.494384765625, + "logps/rejected": -426.6494445800781, + "loss": 0.7407, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05678558349609375, + "rewards/margins": 0.15048065781593323, + "rewards/rejected": -0.09369507431983948, + "step": 32 + }, + { + "epoch": 0.09, + "learning_rate": 9.92157511898767e-08, + "logits/chosen": -3.110487699508667, + "logits/rejected": -3.079414129257202, + "logps/chosen": -130.54550170898438, + "logps/rejected": -357.9940185546875, + "loss": 0.7443, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04718208312988281, + "rewards/margins": 0.1623668670654297, + "rewards/rejected": -0.11518478393554688, + "step": 33 + }, + { + "epoch": 0.09, + "learning_rate": 9.91395025470429e-08, + "logits/chosen": -3.047300100326538, + "logits/rejected": -3.0848774909973145, + "logps/chosen": -113.19815063476562, + "logps/rejected": -551.868408203125, + "loss": 0.729, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.019368361681699753, + "rewards/margins": 0.2006549835205078, + "rewards/rejected": -0.18128661811351776, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 9.905974927529134e-08, + "logits/chosen": -3.0408053398132324, + "logits/rejected": -3.044644355773926, + "logps/chosen": -133.75390625, + "logps/rejected": -360.02001953125, + "loss": 0.7387, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00974121131002903, + "rewards/margins": 0.09431152045726776, + "rewards/rejected": -0.08457031846046448, + "step": 35 + }, + { + "epoch": 0.09, + "learning_rate": 9.897649706262472e-08, + "logits/chosen": -3.0656991004943848, + "logits/rejected": -3.1121299266815186, + "logps/chosen": -156.877197265625, + "logps/rejected": -526.0008544921875, + "loss": 0.703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05649872124195099, + "rewards/margins": 0.3229354918003082, + "rewards/rejected": -0.26643675565719604, + "step": 36 + }, + { + "epoch": 0.1, + "learning_rate": 9.888975184659016e-08, + "logits/chosen": -3.0608201026916504, + "logits/rejected": -3.064298152923584, + "logps/chosen": -140.88299560546875, + "logps/rejected": -462.60955810546875, + "loss": 0.7222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027727127075195312, + "rewards/margins": 0.2095561921596527, + "rewards/rejected": -0.1818290799856186, + "step": 37 + }, + { + "epoch": 0.1, + "learning_rate": 9.879951981385577e-08, + "logits/chosen": -3.12353253364563, + "logits/rejected": -3.092846393585205, + "logps/chosen": -173.59617614746094, + "logps/rejected": -417.7674255371094, + "loss": 0.7695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02161560207605362, + "rewards/margins": 0.15302734076976776, + "rewards/rejected": -0.13141174614429474, + "step": 38 + }, + { + "epoch": 0.1, + "learning_rate": 9.870580739976935e-08, + "logits/chosen": -3.0732216835021973, + "logits/rejected": -3.0793399810791016, + "logps/chosen": -138.778564453125, + "logps/rejected": -320.620361328125, + "loss": 0.7109, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02257843129336834, + "rewards/margins": 0.133769229054451, + "rewards/rejected": -0.1111907958984375, + "step": 39 + }, + { + "epoch": 0.1, + "learning_rate": 9.860862128789953e-08, + "logits/chosen": -3.129629611968994, + "logits/rejected": -3.1317062377929688, + "logps/chosen": -144.60348510742188, + "logps/rejected": -447.40350341796875, + "loss": 0.718, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.050365447998046875, + "rewards/margins": 0.24784623086452484, + "rewards/rejected": -0.19748076796531677, + "step": 40 + }, + { + "epoch": 0.11, + "learning_rate": 9.8507968409559e-08, + "logits/chosen": -3.0606250762939453, + "logits/rejected": -3.135775566101074, + "logps/chosen": -187.62294006347656, + "logps/rejected": -524.2372436523438, + "loss": 0.7008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.052581023424863815, + "rewards/margins": 0.22276687622070312, + "rewards/rejected": -0.170185849070549, + "step": 41 + }, + { + "epoch": 0.11, + "learning_rate": 9.840385594331021e-08, + "logits/chosen": -3.0988473892211914, + "logits/rejected": -3.0915627479553223, + "logps/chosen": -137.639404296875, + "logps/rejected": -467.61285400390625, + "loss": 0.691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06322822719812393, + "rewards/margins": 0.22872963547706604, + "rewards/rejected": -0.1655014157295227, + "step": 42 + }, + { + "epoch": 0.11, + "learning_rate": 9.82962913144534e-08, + "logits/chosen": -3.052077293395996, + "logits/rejected": -3.0944175720214844, + "logps/chosen": -133.0662078857422, + "logps/rejected": -376.21063232421875, + "loss": 0.7323, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03619346767663956, + "rewards/margins": 0.16581079363822937, + "rewards/rejected": -0.12961731851100922, + "step": 43 + }, + { + "epoch": 0.11, + "learning_rate": 9.818528219449705e-08, + "logits/chosen": -3.082125663757324, + "logits/rejected": -3.1191728115081787, + "logps/chosen": -136.15444946289062, + "logps/rejected": -404.40283203125, + "loss": 0.7463, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017996978014707565, + "rewards/margins": 0.09587325155735016, + "rewards/rejected": -0.07787628471851349, + "step": 44 + }, + { + "epoch": 0.12, + "learning_rate": 9.807083650061063e-08, + "logits/chosen": -3.0146260261535645, + "logits/rejected": -3.079491138458252, + "logps/chosen": -134.63063049316406, + "logps/rejected": -449.11090087890625, + "loss": 0.7017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.062363434582948685, + "rewards/margins": 0.3515937924385071, + "rewards/rejected": -0.2892303466796875, + "step": 45 + }, + { + "epoch": 0.12, + "learning_rate": 9.79529623950601e-08, + "logits/chosen": -3.051004648208618, + "logits/rejected": -3.0592997074127197, + "logps/chosen": -125.97396087646484, + "logps/rejected": -251.21022033691406, + "loss": 0.7341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.022904586046934128, + "rewards/margins": 0.1687847226858139, + "rewards/rejected": -0.14588013291358948, + "step": 46 + }, + { + "epoch": 0.12, + "learning_rate": 9.783166828462573e-08, + "logits/chosen": -3.080752372741699, + "logits/rejected": -3.0763068199157715, + "logps/chosen": -126.89360046386719, + "logps/rejected": -432.9432067871094, + "loss": 0.7066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0362396240234375, + "rewards/margins": 0.35206910967826843, + "rewards/rejected": -0.31582948565483093, + "step": 47 + }, + { + "epoch": 0.12, + "learning_rate": 9.770696282000244e-08, + "logits/chosen": -3.099541664123535, + "logits/rejected": -3.113893985748291, + "logps/chosen": -116.53134155273438, + "logps/rejected": -398.0201416015625, + "loss": 0.6757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0734890028834343, + "rewards/margins": 0.33077967166900635, + "rewards/rejected": -0.25729066133499146, + "step": 48 + }, + { + "epoch": 0.13, + "learning_rate": 9.757885489518296e-08, + "logits/chosen": -3.142254114151001, + "logits/rejected": -3.1298084259033203, + "logps/chosen": -163.1243896484375, + "logps/rejected": -469.64166259765625, + "loss": 0.6568, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00826721265912056, + "rewards/margins": 0.25718995928764343, + "rewards/rejected": -0.24892273545265198, + "step": 49 + }, + { + "epoch": 0.13, + "learning_rate": 9.744735364682344e-08, + "logits/chosen": -3.077094078063965, + "logits/rejected": -3.0158615112304688, + "logps/chosen": -126.41539764404297, + "logps/rejected": -734.1640625, + "loss": 0.6533, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0509490966796875, + "rewards/margins": 0.38267672061920166, + "rewards/rejected": -0.33172762393951416, + "step": 50 + }, + { + "epoch": 0.13, + "learning_rate": 9.731246845359184e-08, + "logits/chosen": -3.075345993041992, + "logits/rejected": -3.132819175720215, + "logps/chosen": -145.1927490234375, + "logps/rejected": -579.7572631835938, + "loss": 0.6817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.054694369435310364, + "rewards/margins": 0.3316383361816406, + "rewards/rejected": -0.27694398164749146, + "step": 51 + }, + { + "epoch": 0.14, + "learning_rate": 9.717420893549901e-08, + "logits/chosen": -3.0543928146362305, + "logits/rejected": -3.038001537322998, + "logps/chosen": -138.69903564453125, + "logps/rejected": -240.2859649658203, + "loss": 0.6807, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030818939208984375, + "rewards/margins": 0.14298096299171448, + "rewards/rejected": -0.1121620163321495, + "step": 52 + }, + { + "epoch": 0.14, + "learning_rate": 9.703258495321265e-08, + "logits/chosen": -3.0872020721435547, + "logits/rejected": -3.124840497970581, + "logps/chosen": -157.5909423828125, + "logps/rejected": -446.0245361328125, + "loss": 0.7015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07020988315343857, + "rewards/margins": 0.318490207195282, + "rewards/rejected": -0.2482803463935852, + "step": 53 + }, + { + "epoch": 0.14, + "learning_rate": 9.688760660735402e-08, + "logits/chosen": -3.0525829792022705, + "logits/rejected": -3.034884452819824, + "logps/chosen": -161.3529052734375, + "logps/rejected": -456.83935546875, + "loss": 0.6736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0756988525390625, + "rewards/margins": 0.3157547116279602, + "rewards/rejected": -0.2400558590888977, + "step": 54 + }, + { + "epoch": 0.14, + "learning_rate": 9.673928423777756e-08, + "logits/chosen": -3.0561890602111816, + "logits/rejected": -3.0936636924743652, + "logps/chosen": -131.31207275390625, + "logps/rejected": -326.34259033203125, + "loss": 0.6216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06268005073070526, + "rewards/margins": 0.25916749238967896, + "rewards/rejected": -0.1964874267578125, + "step": 55 + }, + { + "epoch": 0.15, + "learning_rate": 9.658762842283341e-08, + "logits/chosen": -3.1395034790039062, + "logits/rejected": -3.0616848468780518, + "logps/chosen": -144.80642700195312, + "logps/rejected": -404.3010559082031, + "loss": 0.6824, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06909103691577911, + "rewards/margins": 0.3904060423374176, + "rewards/rejected": -0.3213150203227997, + "step": 56 + }, + { + "epoch": 0.15, + "learning_rate": 9.643264997861311e-08, + "logits/chosen": -3.0999338626861572, + "logits/rejected": -3.097315788269043, + "logps/chosen": -138.83663940429688, + "logps/rejected": -341.8115539550781, + "loss": 0.6566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030616380274295807, + "rewards/margins": 0.2393321990966797, + "rewards/rejected": -0.20871582627296448, + "step": 57 + }, + { + "epoch": 0.15, + "learning_rate": 9.627435995817797e-08, + "logits/chosen": -3.105870246887207, + "logits/rejected": -3.100125789642334, + "logps/chosen": -146.20361328125, + "logps/rejected": -291.14190673828125, + "loss": 0.6811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03461112827062607, + "rewards/margins": 0.1516590118408203, + "rewards/rejected": -0.11704788357019424, + "step": 58 + }, + { + "epoch": 0.15, + "learning_rate": 9.611276965077097e-08, + "logits/chosen": -3.1170737743377686, + "logits/rejected": -3.078359365463257, + "logps/chosen": -155.81353759765625, + "logps/rejected": -447.19140625, + "loss": 0.6618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07251396030187607, + "rewards/margins": 0.38380545377731323, + "rewards/rejected": -0.31129151582717896, + "step": 59 + }, + { + "epoch": 0.16, + "learning_rate": 9.594789058101152e-08, + "logits/chosen": -3.0612587928771973, + "logits/rejected": -3.0942158699035645, + "logps/chosen": -95.36859130859375, + "logps/rejected": -383.41009521484375, + "loss": 0.6715, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08878135681152344, + "rewards/margins": 0.45123863220214844, + "rewards/rejected": -0.362457275390625, + "step": 60 + }, + { + "epoch": 0.16, + "learning_rate": 9.577973450807351e-08, + "logits/chosen": -3.097565174102783, + "logits/rejected": -3.056882381439209, + "logps/chosen": -122.61428833007812, + "logps/rejected": -241.2613525390625, + "loss": 0.6588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05034294351935387, + "rewards/margins": 0.15828514099121094, + "rewards/rejected": -0.10794220119714737, + "step": 61 + }, + { + "epoch": 0.16, + "learning_rate": 9.560831342484666e-08, + "logits/chosen": -3.077652931213379, + "logits/rejected": -3.0547194480895996, + "logps/chosen": -129.46328735351562, + "logps/rejected": -436.571533203125, + "loss": 0.6392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08942680060863495, + "rewards/margins": 0.5197811126708984, + "rewards/rejected": -0.4303542971611023, + "step": 62 + }, + { + "epoch": 0.16, + "learning_rate": 9.543363955708124e-08, + "logits/chosen": -3.0368919372558594, + "logits/rejected": -2.950756072998047, + "logps/chosen": -123.05414581298828, + "logps/rejected": -410.7945251464844, + "loss": 0.6361, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.052327729761600494, + "rewards/margins": 0.4158470332622528, + "rewards/rejected": -0.3635193109512329, + "step": 63 + }, + { + "epoch": 0.17, + "learning_rate": 9.525572536251606e-08, + "logits/chosen": -3.0845046043395996, + "logits/rejected": -3.059340000152588, + "logps/chosen": -139.08029174804688, + "logps/rejected": -359.78558349609375, + "loss": 0.6347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05359039455652237, + "rewards/margins": 0.3303421139717102, + "rewards/rejected": -0.27675172686576843, + "step": 64 + }, + { + "epoch": 0.17, + "learning_rate": 9.507458352999e-08, + "logits/chosen": -3.0462825298309326, + "logits/rejected": -3.0787110328674316, + "logps/chosen": -131.77865600585938, + "logps/rejected": -339.93902587890625, + "loss": 0.6258, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05319366604089737, + "rewards/margins": 0.2294357270002365, + "rewards/rejected": -0.17624206840991974, + "step": 65 + }, + { + "epoch": 0.17, + "learning_rate": 9.489022697853709e-08, + "logits/chosen": -3.017125129699707, + "logits/rejected": -3.0866847038269043, + "logps/chosen": -125.03013610839844, + "logps/rejected": -625.90087890625, + "loss": 0.6328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07961845397949219, + "rewards/margins": 0.509965181350708, + "rewards/rejected": -0.43034666776657104, + "step": 66 + }, + { + "epoch": 0.17, + "learning_rate": 9.470266885646503e-08, + "logits/chosen": -3.0929908752441406, + "logits/rejected": -3.1097140312194824, + "logps/chosen": -135.7242431640625, + "logps/rejected": -393.15716552734375, + "loss": 0.6228, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1434623748064041, + "rewards/margins": 0.4979194700717926, + "rewards/rejected": -0.3544571101665497, + "step": 67 + }, + { + "epoch": 0.18, + "learning_rate": 9.451192254041758e-08, + "logits/chosen": -3.014169692993164, + "logits/rejected": -3.046382427215576, + "logps/chosen": -124.5097427368164, + "logps/rejected": -482.1575622558594, + "loss": 0.6191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0772632583975792, + "rewards/margins": 0.5526202917098999, + "rewards/rejected": -0.4753570556640625, + "step": 68 + }, + { + "epoch": 0.18, + "learning_rate": 9.431800163442041e-08, + "logits/chosen": -3.094478130340576, + "logits/rejected": -3.103801727294922, + "logps/chosen": -130.55062866210938, + "logps/rejected": -391.23785400390625, + "loss": 0.6029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.038587190210819244, + "rewards/margins": 0.4001571834087372, + "rewards/rejected": -0.36156997084617615, + "step": 69 + }, + { + "epoch": 0.18, + "learning_rate": 9.412091996891095e-08, + "logits/chosen": -3.04434871673584, + "logits/rejected": -3.0291128158569336, + "logps/chosen": -132.2559814453125, + "logps/rejected": -519.736572265625, + "loss": 0.6004, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07587585598230362, + "rewards/margins": 0.5874344110488892, + "rewards/rejected": -0.5115585327148438, + "step": 70 + }, + { + "epoch": 0.18, + "learning_rate": 9.392069159975198e-08, + "logits/chosen": -3.1411893367767334, + "logits/rejected": -3.0590035915374756, + "logps/chosen": -157.3871307373047, + "logps/rejected": -436.4142150878906, + "loss": 0.5871, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07330437004566193, + "rewards/margins": 0.36560553312301636, + "rewards/rejected": -0.2923011779785156, + "step": 71 + }, + { + "epoch": 0.19, + "learning_rate": 9.37173308072291e-08, + "logits/chosen": -2.9899685382843018, + "logits/rejected": -3.033508539199829, + "logps/chosen": -171.3026580810547, + "logps/rejected": -576.732177734375, + "loss": 0.5646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08404427021741867, + "rewards/margins": 0.504245400428772, + "rewards/rejected": -0.4202011227607727, + "step": 72 + }, + { + "epoch": 0.19, + "learning_rate": 9.35108520950324e-08, + "logits/chosen": -3.0442543029785156, + "logits/rejected": -2.990016460418701, + "logps/chosen": -108.2281494140625, + "logps/rejected": -371.3621826171875, + "loss": 0.6131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0823211669921875, + "rewards/margins": 0.367379754781723, + "rewards/rejected": -0.2850585877895355, + "step": 73 + }, + { + "epoch": 0.19, + "learning_rate": 9.330127018922194e-08, + "logits/chosen": -3.069821357727051, + "logits/rejected": -3.083265542984009, + "logps/chosen": -159.2045135498047, + "logps/rejected": -500.7535400390625, + "loss": 0.5699, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0553436279296875, + "rewards/margins": 0.5946701169013977, + "rewards/rejected": -0.5393264889717102, + "step": 74 + }, + { + "epoch": 0.2, + "learning_rate": 9.308860003717748e-08, + "logits/chosen": -3.077775001525879, + "logits/rejected": -3.117739677429199, + "logps/chosen": -176.3304901123047, + "logps/rejected": -530.09521484375, + "loss": 0.5942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.030247116461396217, + "rewards/margins": 0.5923015475273132, + "rewards/rejected": -0.562054455280304, + "step": 75 + }, + { + "epoch": 0.2, + "learning_rate": 9.287285680653254e-08, + "logits/chosen": -3.0788753032684326, + "logits/rejected": -3.1052894592285156, + "logps/chosen": -127.11264038085938, + "logps/rejected": -437.7774658203125, + "loss": 0.562, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10236930847167969, + "rewards/margins": 0.5262508392333984, + "rewards/rejected": -0.42388156056404114, + "step": 76 + }, + { + "epoch": 0.2, + "learning_rate": 9.265405588409256e-08, + "logits/chosen": -3.077526092529297, + "logits/rejected": -3.0907516479492188, + "logps/chosen": -107.19694519042969, + "logps/rejected": -410.6039123535156, + "loss": 0.5727, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13322296738624573, + "rewards/margins": 0.5902794003486633, + "rewards/rejected": -0.4570564329624176, + "step": 77 + }, + { + "epoch": 0.2, + "learning_rate": 9.243221287473755e-08, + "logits/chosen": -3.0861191749572754, + "logits/rejected": -3.0687789916992188, + "logps/chosen": -134.363037109375, + "logps/rejected": -317.60772705078125, + "loss": 0.6356, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13323326408863068, + "rewards/margins": 0.429581880569458, + "rewards/rejected": -0.29634857177734375, + "step": 78 + }, + { + "epoch": 0.21, + "learning_rate": 9.220734360030906e-08, + "logits/chosen": -3.1204090118408203, + "logits/rejected": -3.1223556995391846, + "logps/chosen": -133.4827423095703, + "logps/rejected": -521.9791870117188, + "loss": 0.5814, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06677131354808807, + "rewards/margins": 0.5327320098876953, + "rewards/rejected": -0.46596068143844604, + "step": 79 + }, + { + "epoch": 0.21, + "learning_rate": 9.197946409848193e-08, + "logits/chosen": -3.047451972961426, + "logits/rejected": -3.1015357971191406, + "logps/chosen": -127.21318054199219, + "logps/rejected": -407.56072998046875, + "loss": 0.5884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07233314961194992, + "rewards/margins": 0.4177204370498657, + "rewards/rejected": -0.3453872799873352, + "step": 80 + }, + { + "epoch": 0.21, + "learning_rate": 9.174859062162037e-08, + "logits/chosen": -3.1356382369995117, + "logits/rejected": -3.138155937194824, + "logps/chosen": -120.20954132080078, + "logps/rejected": -351.27215576171875, + "loss": 0.5584, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09806785732507706, + "rewards/margins": 0.40987667441368103, + "rewards/rejected": -0.3118087947368622, + "step": 81 + }, + { + "epoch": 0.21, + "learning_rate": 9.151473963561882e-08, + "logits/chosen": -3.1201958656311035, + "logits/rejected": -3.136928081512451, + "logps/chosen": -116.27622985839844, + "logps/rejected": -544.8558959960938, + "loss": 0.5595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09387169033288956, + "rewards/margins": 0.7035847306251526, + "rewards/rejected": -0.6097130179405212, + "step": 82 + }, + { + "epoch": 0.22, + "learning_rate": 9.127792781872768e-08, + "logits/chosen": -3.0552587509155273, + "logits/rejected": -3.1091809272766113, + "logps/chosen": -184.06842041015625, + "logps/rejected": -511.405029296875, + "loss": 0.5857, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12380675971508026, + "rewards/margins": 0.45294344425201416, + "rewards/rejected": -0.3291366696357727, + "step": 83 + }, + { + "epoch": 0.22, + "learning_rate": 9.103817206036382e-08, + "logits/chosen": -3.120823860168457, + "logits/rejected": -3.095310688018799, + "logps/chosen": -131.00457763671875, + "logps/rejected": -524.494140625, + "loss": 0.5288, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1514739990234375, + "rewards/margins": 0.8164764642715454, + "rewards/rejected": -0.6650024652481079, + "step": 84 + }, + { + "epoch": 0.22, + "learning_rate": 9.079548945990592e-08, + "logits/chosen": -3.0484440326690674, + "logits/rejected": -3.0286378860473633, + "logps/chosen": -127.40220642089844, + "logps/rejected": -451.73223876953125, + "loss": 0.5811, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06544075161218643, + "rewards/margins": 0.5599842071533203, + "rewards/rejected": -0.4945434629917145, + "step": 85 + }, + { + "epoch": 0.22, + "learning_rate": 9.054989732547505e-08, + "logits/chosen": -3.0554211139678955, + "logits/rejected": -3.1054043769836426, + "logps/chosen": -114.87007141113281, + "logps/rejected": -290.08221435546875, + "loss": 0.58, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08735084533691406, + "rewards/margins": 0.2839786410331726, + "rewards/rejected": -0.19662781059741974, + "step": 86 + }, + { + "epoch": 0.23, + "learning_rate": 9.030141317270026e-08, + "logits/chosen": -3.0753350257873535, + "logits/rejected": -3.109714984893799, + "logps/chosen": -104.64696502685547, + "logps/rejected": -580.9428100585938, + "loss": 0.5492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1442081481218338, + "rewards/margins": 0.8396358489990234, + "rewards/rejected": -0.6954277157783508, + "step": 87 + }, + { + "epoch": 0.23, + "learning_rate": 9.005005472346922e-08, + "logits/chosen": -3.088611125946045, + "logits/rejected": -3.1046383380889893, + "logps/chosen": -164.68972778320312, + "logps/rejected": -379.5693054199219, + "loss": 0.5866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11806831508874893, + "rewards/margins": 0.5777943134307861, + "rewards/rejected": -0.459725946187973, + "step": 88 + }, + { + "epoch": 0.23, + "learning_rate": 8.979583990466453e-08, + "logits/chosen": -2.99778413772583, + "logits/rejected": -3.0379319190979004, + "logps/chosen": -124.2940444946289, + "logps/rejected": -596.187744140625, + "loss": 0.4866, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05086822807788849, + "rewards/margins": 0.7394806146621704, + "rewards/rejected": -0.6886124014854431, + "step": 89 + }, + { + "epoch": 0.23, + "learning_rate": 8.953878684688491e-08, + "logits/chosen": -3.0817694664001465, + "logits/rejected": -3.064870834350586, + "logps/chosen": -159.00543212890625, + "logps/rejected": -547.6121826171875, + "loss": 0.5425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08817443996667862, + "rewards/margins": 0.634625256061554, + "rewards/rejected": -0.5464507937431335, + "step": 90 + }, + { + "epoch": 0.24, + "learning_rate": 8.927891388315241e-08, + "logits/chosen": -2.989931106567383, + "logits/rejected": -2.995635986328125, + "logps/chosen": -135.1650390625, + "logps/rejected": -364.3675537109375, + "loss": 0.5889, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0971141830086708, + "rewards/margins": 0.43945273756980896, + "rewards/rejected": -0.34233856201171875, + "step": 91 + }, + { + "epoch": 0.24, + "learning_rate": 8.901623954760459e-08, + "logits/chosen": -3.070195436477661, + "logits/rejected": -3.0989131927490234, + "logps/chosen": -132.0677032470703, + "logps/rejected": -605.962890625, + "loss": 0.534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15522156655788422, + "rewards/margins": 0.9624221920967102, + "rewards/rejected": -0.8072006106376648, + "step": 92 + }, + { + "epoch": 0.24, + "learning_rate": 8.875078257417293e-08, + "logits/chosen": -3.015636920928955, + "logits/rejected": -3.0269956588745117, + "logps/chosen": -137.16949462890625, + "logps/rejected": -472.3475646972656, + "loss": 0.5392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10784912109375, + "rewards/margins": 0.6643036603927612, + "rewards/rejected": -0.5564544796943665, + "step": 93 + }, + { + "epoch": 0.24, + "learning_rate": 8.848256189524661e-08, + "logits/chosen": -3.1104114055633545, + "logits/rejected": -3.119488477706909, + "logps/chosen": -155.89979553222656, + "logps/rejected": -410.5753479003906, + "loss": 0.5262, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10979004204273224, + "rewards/margins": 0.5837089419364929, + "rewards/rejected": -0.4739189147949219, + "step": 94 + }, + { + "epoch": 0.25, + "learning_rate": 8.821159664032223e-08, + "logits/chosen": -3.1444764137268066, + "logits/rejected": -3.143578052520752, + "logps/chosen": -142.32901000976562, + "logps/rejected": -452.0612487792969, + "loss": 0.5319, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09103585034608841, + "rewards/margins": 0.709262490272522, + "rewards/rejected": -0.6182266473770142, + "step": 95 + }, + { + "epoch": 0.25, + "learning_rate": 8.793790613463953e-08, + "logits/chosen": -3.0895726680755615, + "logits/rejected": -3.075556755065918, + "logps/chosen": -129.18812561035156, + "logps/rejected": -801.845703125, + "loss": 0.4843, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07591895759105682, + "rewards/margins": 1.152750015258789, + "rewards/rejected": -1.0768311023712158, + "step": 96 + }, + { + "epoch": 0.25, + "learning_rate": 8.766150989780316e-08, + "logits/chosen": -3.1160526275634766, + "logits/rejected": -3.09315824508667, + "logps/chosen": -133.9324951171875, + "logps/rejected": -438.6438903808594, + "loss": 0.5215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09528198838233948, + "rewards/margins": 0.607269287109375, + "rewards/rejected": -0.5119873285293579, + "step": 97 + }, + { + "epoch": 0.26, + "learning_rate": 8.738242764239046e-08, + "logits/chosen": -3.0495352745056152, + "logits/rejected": -3.0439505577087402, + "logps/chosen": -134.5717010498047, + "logps/rejected": -461.920654296875, + "loss": 0.5291, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12519608438014984, + "rewards/margins": 0.6772361993789673, + "rewards/rejected": -0.5520401000976562, + "step": 98 + }, + { + "epoch": 0.26, + "learning_rate": 8.710067927254554e-08, + "logits/chosen": -3.1008071899414062, + "logits/rejected": -3.1193995475769043, + "logps/chosen": -129.51092529296875, + "logps/rejected": -511.3609313964844, + "loss": 0.5279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14045867323875427, + "rewards/margins": 0.9810425043106079, + "rewards/rejected": -0.8405838012695312, + "step": 99 + }, + { + "epoch": 0.26, + "learning_rate": 8.681628488255985e-08, + "logits/chosen": -3.0766711235046387, + "logits/rejected": -3.059945583343506, + "logps/chosen": -144.72056579589844, + "logps/rejected": -312.9176330566406, + "loss": 0.5279, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02620086818933487, + "rewards/margins": 0.53033447265625, + "rewards/rejected": -0.5041335821151733, + "step": 100 + }, + { + "epoch": 0.26, + "learning_rate": 8.652926475543898e-08, + "logits/chosen": -3.105048179626465, + "logits/rejected": -3.045379400253296, + "logps/chosen": -152.85479736328125, + "logps/rejected": -449.44049072265625, + "loss": 0.4885, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08530044555664062, + "rewards/margins": 0.606501042842865, + "rewards/rejected": -0.5212005972862244, + "step": 101 + }, + { + "epoch": 0.27, + "learning_rate": 8.6239639361456e-08, + "logits/chosen": -3.1072139739990234, + "logits/rejected": -3.0962910652160645, + "logps/chosen": -153.70169067382812, + "logps/rejected": -526.6728515625, + "loss": 0.4865, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13981246948242188, + "rewards/margins": 0.798088788986206, + "rewards/rejected": -0.658276379108429, + "step": 102 + }, + { + "epoch": 0.27, + "learning_rate": 8.594742935669164e-08, + "logits/chosen": -3.085860013961792, + "logits/rejected": -3.1064438819885254, + "logps/chosen": -169.9365234375, + "logps/rejected": -499.26031494140625, + "loss": 0.5529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06364021450281143, + "rewards/margins": 0.689719021320343, + "rewards/rejected": -0.6260787844657898, + "step": 103 + }, + { + "epoch": 0.27, + "learning_rate": 8.5652655581561e-08, + "logits/chosen": -3.1012449264526367, + "logits/rejected": -3.0859756469726562, + "logps/chosen": -160.6378173828125, + "logps/rejected": -488.68231201171875, + "loss": 0.4881, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13811111450195312, + "rewards/margins": 0.7191093564033508, + "rewards/rejected": -0.5809982419013977, + "step": 104 + }, + { + "epoch": 0.27, + "learning_rate": 8.535533905932736e-08, + "logits/chosen": -3.12321138381958, + "logits/rejected": -3.137329339981079, + "logps/chosen": -143.26687622070312, + "logps/rejected": -481.572265625, + "loss": 0.5019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1137973815202713, + "rewards/margins": 0.8209674954414368, + "rewards/rejected": -0.7071701288223267, + "step": 105 + }, + { + "epoch": 0.28, + "learning_rate": 8.505550099460263e-08, + "logits/chosen": -3.0717949867248535, + "logits/rejected": -3.0872530937194824, + "logps/chosen": -139.972900390625, + "logps/rejected": -367.69720458984375, + "loss": 0.5607, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06824417412281036, + "rewards/margins": 0.42268601059913635, + "rewards/rejected": -0.3544418215751648, + "step": 106 + }, + { + "epoch": 0.28, + "learning_rate": 8.475316277183508e-08, + "logits/chosen": -3.0739946365356445, + "logits/rejected": -3.055532455444336, + "logps/chosen": -140.32418823242188, + "logps/rejected": -515.7249755859375, + "loss": 0.4496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18369561433792114, + "rewards/margins": 0.7906902432441711, + "rewards/rejected": -0.60699462890625, + "step": 107 + }, + { + "epoch": 0.28, + "learning_rate": 8.444834595378432e-08, + "logits/chosen": -3.110487461090088, + "logits/rejected": -3.0980570316314697, + "logps/chosen": -156.11631774902344, + "logps/rejected": -546.1497802734375, + "loss": 0.4944, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24157103896141052, + "rewards/margins": 1.1911711692810059, + "rewards/rejected": -0.9496002197265625, + "step": 108 + }, + { + "epoch": 0.28, + "learning_rate": 8.414107227998328e-08, + "logits/chosen": -3.073934555053711, + "logits/rejected": -2.9963951110839844, + "logps/chosen": -148.50506591796875, + "logps/rejected": -422.1906433105469, + "loss": 0.5285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15232887864112854, + "rewards/margins": 0.5609409213066101, + "rewards/rejected": -0.40861204266548157, + "step": 109 + }, + { + "epoch": 0.29, + "learning_rate": 8.383136366518788e-08, + "logits/chosen": -3.0408082008361816, + "logits/rejected": -3.070180892944336, + "logps/chosen": -143.8345489501953, + "logps/rejected": -416.3847351074219, + "loss": 0.4801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11856498569250107, + "rewards/margins": 0.6799266934394836, + "rewards/rejected": -0.5613616704940796, + "step": 110 + }, + { + "epoch": 0.29, + "learning_rate": 8.351924219781392e-08, + "logits/chosen": -3.0806734561920166, + "logits/rejected": -3.104444980621338, + "logps/chosen": -140.15240478515625, + "logps/rejected": -414.15850830078125, + "loss": 0.507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06656761467456818, + "rewards/margins": 0.7442821264266968, + "rewards/rejected": -0.6777145266532898, + "step": 111 + }, + { + "epoch": 0.29, + "learning_rate": 8.320473013836195e-08, + "logits/chosen": -3.1769516468048096, + "logits/rejected": -3.1194047927856445, + "logps/chosen": -123.44123840332031, + "logps/rejected": -316.17108154296875, + "loss": 0.4924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1587810516357422, + "rewards/margins": 0.6660366058349609, + "rewards/rejected": -0.5072555541992188, + "step": 112 + }, + { + "epoch": 0.29, + "learning_rate": 8.288784991782945e-08, + "logits/chosen": -3.1095380783081055, + "logits/rejected": -3.0872340202331543, + "logps/chosen": -115.55393981933594, + "logps/rejected": -279.1080322265625, + "loss": 0.5113, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12760695815086365, + "rewards/margins": 0.49671629071235657, + "rewards/rejected": -0.3691093325614929, + "step": 113 + }, + { + "epoch": 0.3, + "learning_rate": 8.256862413611112e-08, + "logits/chosen": -3.069408416748047, + "logits/rejected": -3.1137943267822266, + "logps/chosen": -126.4194107055664, + "logps/rejected": -450.0791015625, + "loss": 0.4784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18316611647605896, + "rewards/margins": 0.846218466758728, + "rewards/rejected": -0.6630523800849915, + "step": 114 + }, + { + "epoch": 0.3, + "learning_rate": 8.22470755603871e-08, + "logits/chosen": -3.0987234115600586, + "logits/rejected": -3.1070446968078613, + "logps/chosen": -166.15826416015625, + "logps/rejected": -491.5101318359375, + "loss": 0.4745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16505204141139984, + "rewards/margins": 0.8798866868019104, + "rewards/rejected": -0.7148346304893494, + "step": 115 + }, + { + "epoch": 0.3, + "learning_rate": 8.192322712349918e-08, + "logits/chosen": -3.1067237854003906, + "logits/rejected": -3.1172144412994385, + "logps/chosen": -170.88323974609375, + "logps/rejected": -500.4214782714844, + "loss": 0.509, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08391723781824112, + "rewards/margins": 0.6314147710800171, + "rewards/rejected": -0.547497570514679, + "step": 116 + }, + { + "epoch": 0.3, + "learning_rate": 8.159710192231519e-08, + "logits/chosen": -3.152069091796875, + "logits/rejected": -3.104389190673828, + "logps/chosen": -129.3458709716797, + "logps/rejected": -340.47747802734375, + "loss": 0.5097, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0842922255396843, + "rewards/margins": 0.5303600430488586, + "rewards/rejected": -0.44606781005859375, + "step": 117 + }, + { + "epoch": 0.31, + "learning_rate": 8.126872321608184e-08, + "logits/chosen": -3.084352970123291, + "logits/rejected": -3.1063146591186523, + "logps/chosen": -115.57638549804688, + "logps/rejected": -367.7901611328125, + "loss": 0.4884, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12939758598804474, + "rewards/margins": 0.6558945178985596, + "rewards/rejected": -0.5264968872070312, + "step": 118 + }, + { + "epoch": 0.31, + "learning_rate": 8.093811442476572e-08, + "logits/chosen": -3.1137027740478516, + "logits/rejected": -3.1057538986206055, + "logps/chosen": -120.27814483642578, + "logps/rejected": -443.34893798828125, + "loss": 0.5249, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1916656494140625, + "rewards/margins": 0.9199768304824829, + "rewards/rejected": -0.7283111810684204, + "step": 119 + }, + { + "epoch": 0.31, + "learning_rate": 8.060529912738314e-08, + "logits/chosen": -3.008230209350586, + "logits/rejected": -3.037778377532959, + "logps/chosen": -136.24945068359375, + "logps/rejected": -398.6874084472656, + "loss": 0.455, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12051353603601456, + "rewards/margins": 0.7022350430488586, + "rewards/rejected": -0.5817214846611023, + "step": 120 + }, + { + "epoch": 0.32, + "learning_rate": 8.027030106031835e-08, + "logits/chosen": -3.0901260375976562, + "logits/rejected": -3.054316997528076, + "logps/chosen": -136.0763702392578, + "logps/rejected": -444.01190185546875, + "loss": 0.4803, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20876960456371307, + "rewards/margins": 0.9557605981826782, + "rewards/rejected": -0.746990978717804, + "step": 121 + }, + { + "epoch": 0.32, + "learning_rate": 7.993314411563074e-08, + "logits/chosen": -3.0519981384277344, + "logits/rejected": -3.0455362796783447, + "logps/chosen": -180.72691345214844, + "logps/rejected": -345.02520751953125, + "loss": 0.5031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09623298794031143, + "rewards/margins": 0.5094196200370789, + "rewards/rejected": -0.4131866693496704, + "step": 122 + }, + { + "epoch": 0.32, + "learning_rate": 7.959385233935086e-08, + "logits/chosen": -3.040330410003662, + "logits/rejected": -3.0450448989868164, + "logps/chosen": -106.28794860839844, + "logps/rejected": -601.3099975585938, + "loss": 0.4737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10963860154151917, + "rewards/margins": 1.229373574256897, + "rewards/rejected": -1.1197350025177002, + "step": 123 + }, + { + "epoch": 0.32, + "learning_rate": 7.925244992976537e-08, + "logits/chosen": -3.1247572898864746, + "logits/rejected": -3.071843147277832, + "logps/chosen": -142.97048950195312, + "logps/rejected": -393.67034912109375, + "loss": 0.5035, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1268947720527649, + "rewards/margins": 0.7534008026123047, + "rewards/rejected": -0.6265060305595398, + "step": 124 + }, + { + "epoch": 0.33, + "learning_rate": 7.890896123569135e-08, + "logits/chosen": -3.108030319213867, + "logits/rejected": -3.0857863426208496, + "logps/chosen": -146.473388671875, + "logps/rejected": -434.29913330078125, + "loss": 0.4787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14734727144241333, + "rewards/margins": 0.7177665829658508, + "rewards/rejected": -0.5704193115234375, + "step": 125 + }, + { + "epoch": 0.33, + "learning_rate": 7.856341075473961e-08, + "logits/chosen": -3.098536491394043, + "logits/rejected": -3.115039348602295, + "logps/chosen": -130.59547424316406, + "logps/rejected": -510.59033203125, + "loss": 0.4402, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09817161411046982, + "rewards/margins": 0.9691509008407593, + "rewards/rejected": -0.8709793090820312, + "step": 126 + }, + { + "epoch": 0.33, + "learning_rate": 7.821582313156763e-08, + "logits/chosen": -3.125305652618408, + "logits/rejected": -3.1248674392700195, + "logps/chosen": -133.17697143554688, + "logps/rejected": -382.2192077636719, + "loss": 0.4355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16860467195510864, + "rewards/margins": 0.8567227125167847, + "rewards/rejected": -0.688118040561676, + "step": 127 + }, + { + "epoch": 0.33, + "learning_rate": 7.786622315612181e-08, + "logits/chosen": -3.099607467651367, + "logits/rejected": -3.094407081604004, + "logps/chosen": -141.4564666748047, + "logps/rejected": -489.988037109375, + "loss": 0.4891, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13299790024757385, + "rewards/margins": 1.1324135065078735, + "rewards/rejected": -0.9994155764579773, + "step": 128 + }, + { + "epoch": 0.34, + "learning_rate": 7.751463576186956e-08, + "logits/chosen": -3.1352057456970215, + "logits/rejected": -3.1217899322509766, + "logps/chosen": -161.12014770507812, + "logps/rejected": -345.6103820800781, + "loss": 0.4977, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14476700127124786, + "rewards/margins": 0.7693535089492798, + "rewards/rejected": -0.6245864629745483, + "step": 129 + }, + { + "epoch": 0.34, + "learning_rate": 7.716108602402093e-08, + "logits/chosen": -3.0581696033477783, + "logits/rejected": -3.098001003265381, + "logps/chosen": -114.05946350097656, + "logps/rejected": -764.55810546875, + "loss": 0.5013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11148224025964737, + "rewards/margins": 1.2800339460372925, + "rewards/rejected": -1.1685516834259033, + "step": 130 + }, + { + "epoch": 0.34, + "learning_rate": 7.680559915774033e-08, + "logits/chosen": -3.085286855697632, + "logits/rejected": -3.1490554809570312, + "logps/chosen": -118.75178527832031, + "logps/rejected": -545.6494140625, + "loss": 0.4208, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1658802032470703, + "rewards/margins": 1.2417560815811157, + "rewards/rejected": -1.0758758783340454, + "step": 131 + }, + { + "epoch": 0.34, + "learning_rate": 7.644820051634812e-08, + "logits/chosen": -2.9970664978027344, + "logits/rejected": -3.08060359954834, + "logps/chosen": -131.451171875, + "logps/rejected": -580.27099609375, + "loss": 0.4421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14000816643238068, + "rewards/margins": 1.019918441772461, + "rewards/rejected": -0.8799102902412415, + "step": 132 + }, + { + "epoch": 0.35, + "learning_rate": 7.608891558951248e-08, + "logits/chosen": -3.1426069736480713, + "logits/rejected": -3.126580238342285, + "logps/chosen": -120.0411148071289, + "logps/rejected": -318.39874267578125, + "loss": 0.4725, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0847984328866005, + "rewards/margins": 0.6587028503417969, + "rewards/rejected": -0.5739044547080994, + "step": 133 + }, + { + "epoch": 0.35, + "learning_rate": 7.572777000143145e-08, + "logits/chosen": -3.0482301712036133, + "logits/rejected": -3.104376792907715, + "logps/chosen": -155.20269775390625, + "logps/rejected": -479.7928161621094, + "loss": 0.4532, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1876606047153473, + "rewards/margins": 1.072988510131836, + "rewards/rejected": -0.885327935218811, + "step": 134 + }, + { + "epoch": 0.35, + "learning_rate": 7.536478950900537e-08, + "logits/chosen": -3.148233652114868, + "logits/rejected": -3.0738954544067383, + "logps/chosen": -136.96656799316406, + "logps/rejected": -377.29486083984375, + "loss": 0.4375, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1400737762451172, + "rewards/margins": 0.7177913784980774, + "rewards/rejected": -0.5777176022529602, + "step": 135 + }, + { + "epoch": 0.35, + "learning_rate": 7.5e-08, + "logits/chosen": -3.10453200340271, + "logits/rejected": -3.0713415145874023, + "logps/chosen": -137.32994079589844, + "logps/rejected": -281.8401184082031, + "loss": 0.4639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11550483107566833, + "rewards/margins": 0.45048946142196655, + "rewards/rejected": -0.33498460054397583, + "step": 136 + }, + { + "epoch": 0.36, + "learning_rate": 7.463342749120013e-08, + "logits/chosen": -3.1459317207336426, + "logits/rejected": -3.1306095123291016, + "logps/chosen": -136.13946533203125, + "logps/rejected": -517.5847778320312, + "loss": 0.4832, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12081985920667648, + "rewards/margins": 1.118133544921875, + "rewards/rejected": -0.9973137378692627, + "step": 137 + }, + { + "epoch": 0.36, + "learning_rate": 7.426509812655406e-08, + "logits/chosen": -3.15494966506958, + "logits/rejected": -3.134921073913574, + "logps/chosen": -189.58892822265625, + "logps/rejected": -603.7501220703125, + "loss": 0.4163, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2330886870622635, + "rewards/margins": 1.2772369384765625, + "rewards/rejected": -1.0441482067108154, + "step": 138 + }, + { + "epoch": 0.36, + "learning_rate": 7.389503817530904e-08, + "logits/chosen": -3.089406967163086, + "logits/rejected": -3.0892255306243896, + "logps/chosen": -161.84805297851562, + "logps/rejected": -383.43756103515625, + "loss": 0.4625, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17034149169921875, + "rewards/margins": 0.6593421697616577, + "rewards/rejected": -0.48900070786476135, + "step": 139 + }, + { + "epoch": 0.36, + "learning_rate": 7.352327403013778e-08, + "logits/chosen": -3.124213695526123, + "logits/rejected": -3.111698627471924, + "logps/chosen": -128.89703369140625, + "logps/rejected": -375.939453125, + "loss": 0.458, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12078094482421875, + "rewards/margins": 0.7898651361465454, + "rewards/rejected": -0.6690841913223267, + "step": 140 + }, + { + "epoch": 0.37, + "learning_rate": 7.314983220525603e-08, + "logits/chosen": -3.167116641998291, + "logits/rejected": -3.132208824157715, + "logps/chosen": -108.63416290283203, + "logps/rejected": -422.79461669921875, + "loss": 0.4507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20730248093605042, + "rewards/margins": 0.8063152432441711, + "rewards/rejected": -0.5990127325057983, + "step": 141 + }, + { + "epoch": 0.37, + "learning_rate": 7.277473933453169e-08, + "logits/chosen": -3.122929811477661, + "logits/rejected": -3.1239676475524902, + "logps/chosen": -181.4097900390625, + "logps/rejected": -474.49176025390625, + "loss": 0.4892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1378776580095291, + "rewards/margins": 0.875643253326416, + "rewards/rejected": -0.7377655506134033, + "step": 142 + }, + { + "epoch": 0.37, + "learning_rate": 7.239802216958522e-08, + "logits/chosen": -3.063105344772339, + "logits/rejected": -3.082021474838257, + "logps/chosen": -136.0540771484375, + "logps/rejected": -359.2922058105469, + "loss": 0.4449, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19395524263381958, + "rewards/margins": 0.7311897277832031, + "rewards/rejected": -0.5372344851493835, + "step": 143 + }, + { + "epoch": 0.38, + "learning_rate": 7.201970757788172e-08, + "logits/chosen": -3.137815475463867, + "logits/rejected": -3.1740150451660156, + "logps/chosen": -106.80043029785156, + "logps/rejected": -589.0427856445312, + "loss": 0.4431, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20917245745658875, + "rewards/margins": 1.2896534204483032, + "rewards/rejected": -1.080480933189392, + "step": 144 + }, + { + "epoch": 0.38, + "learning_rate": 7.163982254081475e-08, + "logits/chosen": -3.088249921798706, + "logits/rejected": -3.071399211883545, + "logps/chosen": -116.11766052246094, + "logps/rejected": -295.71075439453125, + "loss": 0.438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14758644998073578, + "rewards/margins": 0.5543491244316101, + "rewards/rejected": -0.4067627191543579, + "step": 145 + }, + { + "epoch": 0.38, + "learning_rate": 7.125839415178203e-08, + "logits/chosen": -3.113950252532959, + "logits/rejected": -3.144627571105957, + "logps/chosen": -155.93276977539062, + "logps/rejected": -377.1415710449219, + "loss": 0.4107, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22590753436088562, + "rewards/margins": 0.910427451133728, + "rewards/rejected": -0.6845200061798096, + "step": 146 + }, + { + "epoch": 0.38, + "learning_rate": 7.087544961425316e-08, + "logits/chosen": -3.1733882427215576, + "logits/rejected": -3.1672675609588623, + "logps/chosen": -144.11981201171875, + "logps/rejected": -449.4358825683594, + "loss": 0.4186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1575450897216797, + "rewards/margins": 1.0383068323135376, + "rewards/rejected": -0.8807617425918579, + "step": 147 + }, + { + "epoch": 0.39, + "learning_rate": 7.049101623982937e-08, + "logits/chosen": -3.086116313934326, + "logits/rejected": -3.0856242179870605, + "logps/chosen": -160.94992065429688, + "logps/rejected": -404.5404968261719, + "loss": 0.3785, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14573746919631958, + "rewards/margins": 0.792841374874115, + "rewards/rejected": -0.6471039056777954, + "step": 148 + }, + { + "epoch": 0.39, + "learning_rate": 7.010512144629579e-08, + "logits/chosen": -3.08474063873291, + "logits/rejected": -3.0368471145629883, + "logps/chosen": -122.29509735107422, + "logps/rejected": -446.4263916015625, + "loss": 0.4065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1817161589860916, + "rewards/margins": 1.043343424797058, + "rewards/rejected": -0.8616272211074829, + "step": 149 + }, + { + "epoch": 0.39, + "learning_rate": 6.971779275566593e-08, + "logits/chosen": -3.097836494445801, + "logits/rejected": -3.083824634552002, + "logps/chosen": -177.43072509765625, + "logps/rejected": -419.4530944824219, + "loss": 0.4069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13998755812644958, + "rewards/margins": 0.985904335975647, + "rewards/rejected": -0.845916748046875, + "step": 150 + }, + { + "epoch": 0.39, + "learning_rate": 6.932905779221879e-08, + "logits/chosen": -3.104818820953369, + "logits/rejected": -3.1025915145874023, + "logps/chosen": -163.50038146972656, + "logps/rejected": -554.3448486328125, + "loss": 0.4283, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12446366250514984, + "rewards/margins": 1.297234296798706, + "rewards/rejected": -1.1727707386016846, + "step": 151 + }, + { + "epoch": 0.4, + "learning_rate": 6.89389442805288e-08, + "logits/chosen": -3.012932777404785, + "logits/rejected": -3.0649890899658203, + "logps/chosen": -196.53561401367188, + "logps/rejected": -605.2732543945312, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1717933714389801, + "rewards/margins": 1.3436012268066406, + "rewards/rejected": -1.171807885169983, + "step": 152 + }, + { + "epoch": 0.4, + "learning_rate": 6.85474800434884e-08, + "logits/chosen": -3.0989861488342285, + "logits/rejected": -3.087739944458008, + "logps/chosen": -140.4935760498047, + "logps/rejected": -506.5846862792969, + "loss": 0.4002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2495521605014801, + "rewards/margins": 1.1953072547912598, + "rewards/rejected": -0.9457550048828125, + "step": 153 + }, + { + "epoch": 0.4, + "learning_rate": 6.815469300032373e-08, + "logits/chosen": -3.0424811840057373, + "logits/rejected": -3.0499229431152344, + "logps/chosen": -143.24826049804688, + "logps/rejected": -396.2330322265625, + "loss": 0.3914, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1333995759487152, + "rewards/margins": 0.976334810256958, + "rewards/rejected": -0.8429352045059204, + "step": 154 + }, + { + "epoch": 0.4, + "learning_rate": 6.776061116460351e-08, + "logits/chosen": -3.071153163909912, + "logits/rejected": -3.1488404273986816, + "logps/chosen": -156.86737060546875, + "logps/rejected": -476.0387878417969, + "loss": 0.3988, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18211136758327484, + "rewards/margins": 0.9895515441894531, + "rewards/rejected": -0.8074401617050171, + "step": 155 + }, + { + "epoch": 0.41, + "learning_rate": 6.7365262642241e-08, + "logits/chosen": -3.117462635040283, + "logits/rejected": -3.116697311401367, + "logps/chosen": -175.70513916015625, + "logps/rejected": -424.628662109375, + "loss": 0.4296, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22379760444164276, + "rewards/margins": 1.1247620582580566, + "rewards/rejected": -0.9009643793106079, + "step": 156 + }, + { + "epoch": 0.41, + "learning_rate": 6.696867562948962e-08, + "logits/chosen": -3.0367488861083984, + "logits/rejected": -3.035552978515625, + "logps/chosen": -138.85536193847656, + "logps/rejected": -398.6291809082031, + "loss": 0.4214, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19651031494140625, + "rewards/margins": 0.8011413812637329, + "rewards/rejected": -0.6046310663223267, + "step": 157 + }, + { + "epoch": 0.41, + "learning_rate": 6.657087841093179e-08, + "logits/chosen": -3.1527538299560547, + "logits/rejected": -3.175872564315796, + "logps/chosen": -103.6400146484375, + "logps/rejected": -300.9288635253906, + "loss": 0.4771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11606903374195099, + "rewards/margins": 0.6217842102050781, + "rewards/rejected": -0.5057151913642883, + "step": 158 + }, + { + "epoch": 0.41, + "learning_rate": 6.61718993574619e-08, + "logits/chosen": -3.1223597526550293, + "logits/rejected": -3.121274471282959, + "logps/chosen": -129.8858184814453, + "logps/rejected": -294.8136901855469, + "loss": 0.4236, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15761986374855042, + "rewards/margins": 0.6521251201629639, + "rewards/rejected": -0.49450528621673584, + "step": 159 + }, + { + "epoch": 0.42, + "learning_rate": 6.577176692426278e-08, + "logits/chosen": -3.0406970977783203, + "logits/rejected": -3.0494632720947266, + "logps/chosen": -134.8408966064453, + "logps/rejected": -588.223388671875, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20207712054252625, + "rewards/margins": 1.3886387348175049, + "rewards/rejected": -1.1865615844726562, + "step": 160 + }, + { + "epoch": 0.42, + "learning_rate": 6.537050964877625e-08, + "logits/chosen": -3.1417288780212402, + "logits/rejected": -3.088653802871704, + "logps/chosen": -124.66554260253906, + "logps/rejected": -284.7332763671875, + "loss": 0.4128, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14900818467140198, + "rewards/margins": 0.5523651242256165, + "rewards/rejected": -0.4033569395542145, + "step": 161 + }, + { + "epoch": 0.42, + "learning_rate": 6.496815614866791e-08, + "logits/chosen": -3.0453598499298096, + "logits/rejected": -3.014371395111084, + "logps/chosen": -184.0735321044922, + "logps/rejected": -374.47393798828125, + "loss": 0.4071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16644670069217682, + "rewards/margins": 0.7951317429542542, + "rewards/rejected": -0.6286849975585938, + "step": 162 + }, + { + "epoch": 0.42, + "learning_rate": 6.456473511978606e-08, + "logits/chosen": -3.1171507835388184, + "logits/rejected": -3.110814332962036, + "logps/chosen": -164.55392456054688, + "logps/rejected": -415.2434997558594, + "loss": 0.4574, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12064743787050247, + "rewards/margins": 0.7504081726074219, + "rewards/rejected": -0.6297607421875, + "step": 163 + }, + { + "epoch": 0.43, + "learning_rate": 6.416027533411518e-08, + "logits/chosen": -3.137462854385376, + "logits/rejected": -3.1472978591918945, + "logps/chosen": -121.03762817382812, + "logps/rejected": -526.964599609375, + "loss": 0.3937, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20923005044460297, + "rewards/margins": 1.2875869274139404, + "rewards/rejected": -1.0783569812774658, + "step": 164 + }, + { + "epoch": 0.43, + "learning_rate": 6.375480563772389e-08, + "logits/chosen": -3.0367088317871094, + "logits/rejected": -3.02640962600708, + "logps/chosen": -145.80581665039062, + "logps/rejected": -401.19879150390625, + "loss": 0.4068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16951218247413635, + "rewards/margins": 0.9511131644248962, + "rewards/rejected": -0.7816009521484375, + "step": 165 + }, + { + "epoch": 0.43, + "learning_rate": 6.334835494870759e-08, + "logits/chosen": -3.130343437194824, + "logits/rejected": -3.1678314208984375, + "logps/chosen": -155.0041961669922, + "logps/rejected": -382.3353576660156, + "loss": 0.3957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11768035590648651, + "rewards/margins": 0.81097412109375, + "rewards/rejected": -0.6932938098907471, + "step": 166 + }, + { + "epoch": 0.43, + "learning_rate": 6.294095225512604e-08, + "logits/chosen": -3.0633976459503174, + "logits/rejected": -3.089944362640381, + "logps/chosen": -108.66032409667969, + "logps/rejected": -276.0200500488281, + "loss": 0.4125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12874794006347656, + "rewards/margins": 0.7705692052841187, + "rewards/rejected": -0.6418212652206421, + "step": 167 + }, + { + "epoch": 0.44, + "learning_rate": 6.253262661293602e-08, + "logits/chosen": -3.117506980895996, + "logits/rejected": -3.1159558296203613, + "logps/chosen": -120.56807708740234, + "logps/rejected": -273.0645446777344, + "loss": 0.4378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14875030517578125, + "rewards/margins": 0.6718338131904602, + "rewards/rejected": -0.523083508014679, + "step": 168 + }, + { + "epoch": 0.44, + "learning_rate": 6.2123407143919e-08, + "logits/chosen": -3.125715732574463, + "logits/rejected": -3.152085781097412, + "logps/chosen": -133.43783569335938, + "logps/rejected": -438.20416259765625, + "loss": 0.4461, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1899467408657074, + "rewards/margins": 0.8798058032989502, + "rewards/rejected": -0.6898590326309204, + "step": 169 + }, + { + "epoch": 0.44, + "learning_rate": 6.17133230336041e-08, + "logits/chosen": -3.0643310546875, + "logits/rejected": -3.0822548866271973, + "logps/chosen": -110.30667877197266, + "logps/rejected": -317.32373046875, + "loss": 0.4526, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15829619765281677, + "rewards/margins": 0.666857123374939, + "rewards/rejected": -0.5085609555244446, + "step": 170 + }, + { + "epoch": 0.45, + "learning_rate": 6.130240352918674e-08, + "logits/chosen": -3.0720131397247314, + "logits/rejected": -3.0445775985717773, + "logps/chosen": -144.40042114257812, + "logps/rejected": -351.1568908691406, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18562622368335724, + "rewards/margins": 0.9527862668037415, + "rewards/rejected": -0.7671600580215454, + "step": 171 + }, + { + "epoch": 0.45, + "learning_rate": 6.089067793744258e-08, + "logits/chosen": -3.1029701232910156, + "logits/rejected": -3.076557159423828, + "logps/chosen": -147.8153076171875, + "logps/rejected": -346.464599609375, + "loss": 0.422, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1903984099626541, + "rewards/margins": 0.9521812200546265, + "rewards/rejected": -0.7617828249931335, + "step": 172 + }, + { + "epoch": 0.45, + "learning_rate": 6.047817562263742e-08, + "logits/chosen": -3.035245895385742, + "logits/rejected": -3.083024501800537, + "logps/chosen": -147.75872802734375, + "logps/rejected": -404.4599609375, + "loss": 0.409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16893844306468964, + "rewards/margins": 1.0352684259414673, + "rewards/rejected": -0.8663300275802612, + "step": 173 + }, + { + "epoch": 0.45, + "learning_rate": 6.0064926004433e-08, + "logits/chosen": -3.1314408779144287, + "logits/rejected": -3.0853843688964844, + "logps/chosen": -182.3887939453125, + "logps/rejected": -422.9280700683594, + "loss": 0.4124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11360245198011398, + "rewards/margins": 1.0501198768615723, + "rewards/rejected": -0.9365173578262329, + "step": 174 + }, + { + "epoch": 0.46, + "learning_rate": 5.965095855578868e-08, + "logits/chosen": -3.02301287651062, + "logits/rejected": -3.006509780883789, + "logps/chosen": -125.65253448486328, + "logps/rejected": -354.51007080078125, + "loss": 0.4604, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1862175017595291, + "rewards/margins": 0.8896942138671875, + "rewards/rejected": -0.7034767270088196, + "step": 175 + }, + { + "epoch": 0.46, + "learning_rate": 5.923630280085947e-08, + "logits/chosen": -3.060257911682129, + "logits/rejected": -3.085587501525879, + "logps/chosen": -144.0787353515625, + "logps/rejected": -625.9449462890625, + "loss": 0.3809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3091293275356293, + "rewards/margins": 1.5021591186523438, + "rewards/rejected": -1.193029761314392, + "step": 176 + }, + { + "epoch": 0.46, + "learning_rate": 5.882098831289043e-08, + "logits/chosen": -3.0454583168029785, + "logits/rejected": -3.1112914085388184, + "logps/chosen": -136.15420532226562, + "logps/rejected": -407.72039794921875, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16591377556324005, + "rewards/margins": 1.0603870153427124, + "rewards/rejected": -0.8944733142852783, + "step": 177 + }, + { + "epoch": 0.46, + "learning_rate": 5.840504471210741e-08, + "logits/chosen": -3.127790927886963, + "logits/rejected": -3.1121292114257812, + "logps/chosen": -147.95989990234375, + "logps/rejected": -451.1527404785156, + "loss": 0.3837, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1636962890625, + "rewards/margins": 1.25377357006073, + "rewards/rejected": -1.09007728099823, + "step": 178 + }, + { + "epoch": 0.47, + "learning_rate": 5.798850166360461e-08, + "logits/chosen": -3.158249855041504, + "logits/rejected": -3.1448564529418945, + "logps/chosen": -148.22645568847656, + "logps/rejected": -428.92962646484375, + "loss": 0.4196, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2014671415090561, + "rewards/margins": 1.0449424982070923, + "rewards/rejected": -0.843475341796875, + "step": 179 + }, + { + "epoch": 0.47, + "learning_rate": 5.757138887522883e-08, + "logits/chosen": -3.0731863975524902, + "logits/rejected": -3.0915160179138184, + "logps/chosen": -110.15003204345703, + "logps/rejected": -275.59271240234375, + "loss": 0.4039, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16256867349147797, + "rewards/margins": 0.5572800040245056, + "rewards/rejected": -0.39471131563186646, + "step": 180 + }, + { + "epoch": 0.47, + "learning_rate": 5.7153736095460704e-08, + "logits/chosen": -3.0482521057128906, + "logits/rejected": -3.0761237144470215, + "logps/chosen": -129.5599365234375, + "logps/rejected": -446.09368896484375, + "loss": 0.3739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2645835876464844, + "rewards/margins": 1.0576530694961548, + "rewards/rejected": -0.7930694818496704, + "step": 181 + }, + { + "epoch": 0.47, + "learning_rate": 5.6735573111293054e-08, + "logits/chosen": -3.148378849029541, + "logits/rejected": -3.096874237060547, + "logps/chosen": -145.73265075683594, + "logps/rejected": -386.54461669921875, + "loss": 0.4268, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0573577880859375, + "rewards/margins": 0.8126785755157471, + "rewards/rejected": -0.7553207874298096, + "step": 182 + }, + { + "epoch": 0.48, + "learning_rate": 5.6316929746106465e-08, + "logits/chosen": -3.0520408153533936, + "logits/rejected": -3.0940439701080322, + "logps/chosen": -159.68341064453125, + "logps/rejected": -520.6629638671875, + "loss": 0.359, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2254360169172287, + "rewards/margins": 1.3818614482879639, + "rewards/rejected": -1.1564254760742188, + "step": 183 + }, + { + "epoch": 0.48, + "learning_rate": 5.589783585754231e-08, + "logits/chosen": -3.0666818618774414, + "logits/rejected": -3.0239691734313965, + "logps/chosen": -118.12657165527344, + "logps/rejected": -326.94024658203125, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12889710068702698, + "rewards/margins": 0.6962021589279175, + "rewards/rejected": -0.5673050284385681, + "step": 184 + }, + { + "epoch": 0.48, + "learning_rate": 5.547832133537327e-08, + "logits/chosen": -3.118840217590332, + "logits/rejected": -3.077786684036255, + "logps/chosen": -138.48992919921875, + "logps/rejected": -414.1672058105469, + "loss": 0.423, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1471691131591797, + "rewards/margins": 1.1473194360733032, + "rewards/rejected": -1.0001503229141235, + "step": 185 + }, + { + "epoch": 0.48, + "learning_rate": 5.5058416099371605e-08, + "logits/chosen": -3.0682311058044434, + "logits/rejected": -3.094142436981201, + "logps/chosen": -173.35546875, + "logps/rejected": -429.258056640625, + "loss": 0.4115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09741058945655823, + "rewards/margins": 1.0206817388534546, + "rewards/rejected": -0.9232711791992188, + "step": 186 + }, + { + "epoch": 0.49, + "learning_rate": 5.463815009717533e-08, + "logits/chosen": -3.0461556911468506, + "logits/rejected": -3.1972198486328125, + "logps/chosen": -142.40771484375, + "logps/rejected": -520.2941284179688, + "loss": 0.3769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18378257751464844, + "rewards/margins": 1.4763531684875488, + "rewards/rejected": -1.2925705909729004, + "step": 187 + }, + { + "epoch": 0.49, + "learning_rate": 5.421755330215223e-08, + "logits/chosen": -3.1149086952209473, + "logits/rejected": -3.1663126945495605, + "logps/chosen": -126.6010971069336, + "logps/rejected": -469.45562744140625, + "loss": 0.3877, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13974876701831818, + "rewards/margins": 1.3001270294189453, + "rewards/rejected": -1.1603782176971436, + "step": 188 + }, + { + "epoch": 0.49, + "learning_rate": 5.379665571126231e-08, + "logits/chosen": -3.0132200717926025, + "logits/rejected": -3.1179261207580566, + "logps/chosen": -134.69857788085938, + "logps/rejected": -508.8202819824219, + "loss": 0.3692, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19374427199363708, + "rewards/margins": 1.3162144422531128, + "rewards/rejected": -1.1224701404571533, + "step": 189 + }, + { + "epoch": 0.49, + "learning_rate": 5.337548734291826e-08, + "logits/chosen": -3.0505282878875732, + "logits/rejected": -3.0952892303466797, + "logps/chosen": -121.44355010986328, + "logps/rejected": -490.31158447265625, + "loss": 0.386, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23464928567409515, + "rewards/margins": 1.1103054285049438, + "rewards/rejected": -0.8756561279296875, + "step": 190 + }, + { + "epoch": 0.5, + "learning_rate": 5.2954078234844666e-08, + "logits/chosen": -3.0578651428222656, + "logits/rejected": -3.021419048309326, + "logps/chosen": -141.45242309570312, + "logps/rejected": -712.8145141601562, + "loss": 0.3979, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15119019150733948, + "rewards/margins": 1.6046578884124756, + "rewards/rejected": -1.453467607498169, + "step": 191 + }, + { + "epoch": 0.5, + "learning_rate": 5.253245844193563e-08, + "logits/chosen": -3.106482982635498, + "logits/rejected": -3.1082916259765625, + "logps/chosen": -123.6873779296875, + "logps/rejected": -443.552490234375, + "loss": 0.4073, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1747463196516037, + "rewards/margins": 1.1339657306671143, + "rewards/rejected": -0.9592193365097046, + "step": 192 + }, + { + "epoch": 0.5, + "learning_rate": 5.2110658034111346e-08, + "logits/chosen": -3.1027393341064453, + "logits/rejected": -3.1154818534851074, + "logps/chosen": -167.00489807128906, + "logps/rejected": -576.5791625976562, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21813851594924927, + "rewards/margins": 1.2892643213272095, + "rewards/rejected": -1.071125864982605, + "step": 193 + }, + { + "epoch": 0.51, + "learning_rate": 5.168870709417341e-08, + "logits/chosen": -3.113203287124634, + "logits/rejected": -3.0936341285705566, + "logps/chosen": -107.32866668701172, + "logps/rejected": -321.30279541015625, + "loss": 0.4686, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16297760605812073, + "rewards/margins": 0.7678695917129517, + "rewards/rejected": -0.6048919558525085, + "step": 194 + }, + { + "epoch": 0.51, + "learning_rate": 5.126663571565939e-08, + "logits/chosen": -3.12161922454834, + "logits/rejected": -3.089170455932617, + "logps/chosen": -119.39836883544922, + "logps/rejected": -461.099365234375, + "loss": 0.4042, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26379168033599854, + "rewards/margins": 1.3730056285858154, + "rewards/rejected": -1.1092140674591064, + "step": 195 + }, + { + "epoch": 0.51, + "learning_rate": 5.0844474000696546e-08, + "logits/chosen": -3.1148953437805176, + "logits/rejected": -3.159472942352295, + "logps/chosen": -132.51531982421875, + "logps/rejected": -447.8483581542969, + "loss": 0.4033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22345352172851562, + "rewards/margins": 0.9675209522247314, + "rewards/rejected": -0.7440674304962158, + "step": 196 + }, + { + "epoch": 0.51, + "learning_rate": 5.042225205785492e-08, + "logits/chosen": -3.106973171234131, + "logits/rejected": -3.1136832237243652, + "logps/chosen": -142.7781982421875, + "logps/rejected": -359.5777893066406, + "loss": 0.3901, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18831978738307953, + "rewards/margins": 0.9333775043487549, + "rewards/rejected": -0.7450577020645142, + "step": 197 + }, + { + "epoch": 0.52, + "learning_rate": 5e-08, + "logits/chosen": -3.12178373336792, + "logits/rejected": -3.1105871200561523, + "logps/chosen": -172.18666076660156, + "logps/rejected": -547.9198608398438, + "loss": 0.38, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12358932197093964, + "rewards/margins": 1.4309685230255127, + "rewards/rejected": -1.307379126548767, + "step": 198 + }, + { + "epoch": 0.52, + "learning_rate": 4.957774794214508e-08, + "logits/chosen": -3.000467538833618, + "logits/rejected": -3.049604892730713, + "logps/chosen": -122.33845520019531, + "logps/rejected": -304.47918701171875, + "loss": 0.3754, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24221229553222656, + "rewards/margins": 0.8004497289657593, + "rewards/rejected": -0.5582374930381775, + "step": 199 + }, + { + "epoch": 0.52, + "learning_rate": 4.915552599930345e-08, + "logits/chosen": -3.157376289367676, + "logits/rejected": -3.1292710304260254, + "logps/chosen": -122.93233489990234, + "logps/rejected": -489.05792236328125, + "loss": 0.4124, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.194041445851326, + "rewards/margins": 1.4583847522735596, + "rewards/rejected": -1.26434326171875, + "step": 200 + }, + { + "epoch": 0.52, + "learning_rate": 4.873336428434061e-08, + "logits/chosen": -3.131438970565796, + "logits/rejected": -3.0890159606933594, + "logps/chosen": -136.0583038330078, + "logps/rejected": -313.7895812988281, + "loss": 0.4346, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10743026435375214, + "rewards/margins": 0.7407188415527344, + "rewards/rejected": -0.6332886219024658, + "step": 201 + }, + { + "epoch": 0.53, + "learning_rate": 4.831129290582659e-08, + "logits/chosen": -3.0684313774108887, + "logits/rejected": -3.079777240753174, + "logps/chosen": -155.2260284423828, + "logps/rejected": -622.0230712890625, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23938828706741333, + "rewards/margins": 1.5188010931015015, + "rewards/rejected": -1.279412865638733, + "step": 202 + }, + { + "epoch": 0.53, + "learning_rate": 4.7889341965888656e-08, + "logits/chosen": -3.0501186847686768, + "logits/rejected": -3.1010570526123047, + "logps/chosen": -140.74917602539062, + "logps/rejected": -559.6866455078125, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25390321016311646, + "rewards/margins": 1.6975173950195312, + "rewards/rejected": -1.4436142444610596, + "step": 203 + }, + { + "epoch": 0.53, + "learning_rate": 4.746754155806437e-08, + "logits/chosen": -3.0440781116485596, + "logits/rejected": -3.048964262008667, + "logps/chosen": -141.3037109375, + "logps/rejected": -442.3330078125, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1778922975063324, + "rewards/margins": 1.16820228099823, + "rewards/rejected": -0.9903099536895752, + "step": 204 + }, + { + "epoch": 0.53, + "learning_rate": 4.7045921765155336e-08, + "logits/chosen": -3.0561375617980957, + "logits/rejected": -3.056830406188965, + "logps/chosen": -173.45457458496094, + "logps/rejected": -454.17840576171875, + "loss": 0.374, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25037384033203125, + "rewards/margins": 1.2324081659317017, + "rewards/rejected": -0.9820343255996704, + "step": 205 + }, + { + "epoch": 0.54, + "learning_rate": 4.662451265708174e-08, + "logits/chosen": -3.0579328536987305, + "logits/rejected": -3.092219829559326, + "logps/chosen": -144.05319213867188, + "logps/rejected": -624.7197875976562, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.306478887796402, + "rewards/margins": 1.8998017311096191, + "rewards/rejected": -1.5933228731155396, + "step": 206 + }, + { + "epoch": 0.54, + "learning_rate": 4.6203344288737694e-08, + "logits/chosen": -3.061908006668091, + "logits/rejected": -3.0249197483062744, + "logps/chosen": -130.68167114257812, + "logps/rejected": -395.1171875, + "loss": 0.3645, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1997455656528473, + "rewards/margins": 1.2156269550323486, + "rewards/rejected": -1.0158812999725342, + "step": 207 + }, + { + "epoch": 0.54, + "learning_rate": 4.578244669784776e-08, + "logits/chosen": -3.113823413848877, + "logits/rejected": -3.053581476211548, + "logps/chosen": -138.19027709960938, + "logps/rejected": -434.6611022949219, + "loss": 0.4038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22145310044288635, + "rewards/margins": 1.1242377758026123, + "rewards/rejected": -0.9027847051620483, + "step": 208 + }, + { + "epoch": 0.54, + "learning_rate": 4.536184990282467e-08, + "logits/chosen": -3.017637252807617, + "logits/rejected": -3.090458631515503, + "logps/chosen": -140.44320678710938, + "logps/rejected": -552.8594970703125, + "loss": 0.38, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21458931267261505, + "rewards/margins": 1.7018940448760986, + "rewards/rejected": -1.4873046875, + "step": 209 + }, + { + "epoch": 0.55, + "learning_rate": 4.49415839006284e-08, + "logits/chosen": -3.1187472343444824, + "logits/rejected": -3.0913283824920654, + "logps/chosen": -128.6487579345703, + "logps/rejected": -342.09521484375, + "loss": 0.3707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1922958493232727, + "rewards/margins": 1.065239667892456, + "rewards/rejected": -0.8729438781738281, + "step": 210 + }, + { + "epoch": 0.55, + "learning_rate": 4.452167866462674e-08, + "logits/chosen": -3.1314468383789062, + "logits/rejected": -3.1199417114257812, + "logps/chosen": -141.4825897216797, + "logps/rejected": -398.4126281738281, + "loss": 0.3964, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29863661527633667, + "rewards/margins": 1.1523520946502686, + "rewards/rejected": -0.8537155389785767, + "step": 211 + }, + { + "epoch": 0.55, + "learning_rate": 4.4102164142457706e-08, + "logits/chosen": -3.10848331451416, + "logits/rejected": -3.0845112800598145, + "logps/chosen": -130.8562469482422, + "logps/rejected": -374.53680419921875, + "loss": 0.421, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11655083298683167, + "rewards/margins": 1.0813426971435547, + "rewards/rejected": -0.9647918939590454, + "step": 212 + }, + { + "epoch": 0.55, + "learning_rate": 4.368307025389355e-08, + "logits/chosen": -3.107461929321289, + "logits/rejected": -3.1078600883483887, + "logps/chosen": -145.94839477539062, + "logps/rejected": -310.96942138671875, + "loss": 0.3735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21406860649585724, + "rewards/margins": 0.8796055316925049, + "rewards/rejected": -0.6655368804931641, + "step": 213 + }, + { + "epoch": 0.56, + "learning_rate": 4.326442688870696e-08, + "logits/chosen": -3.102271556854248, + "logits/rejected": -3.0257062911987305, + "logps/chosen": -136.50765991210938, + "logps/rejected": -583.9703369140625, + "loss": 0.3399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25352174043655396, + "rewards/margins": 1.7368652820587158, + "rewards/rejected": -1.483343482017517, + "step": 214 + }, + { + "epoch": 0.56, + "learning_rate": 4.2846263904539304e-08, + "logits/chosen": -3.118605613708496, + "logits/rejected": -3.0823729038238525, + "logps/chosen": -174.63475036621094, + "logps/rejected": -582.201416015625, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19474869966506958, + "rewards/margins": 1.6431740522384644, + "rewards/rejected": -1.44842529296875, + "step": 215 + }, + { + "epoch": 0.56, + "learning_rate": 4.2428611124771177e-08, + "logits/chosen": -3.060210943222046, + "logits/rejected": -3.0923938751220703, + "logps/chosen": -136.0845489501953, + "logps/rejected": -479.44378662109375, + "loss": 0.3583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3248840570449829, + "rewards/margins": 1.3295807838439941, + "rewards/rejected": -1.0046966075897217, + "step": 216 + }, + { + "epoch": 0.57, + "learning_rate": 4.201149833639539e-08, + "logits/chosen": -3.05985164642334, + "logits/rejected": -3.0141215324401855, + "logps/chosen": -133.771240234375, + "logps/rejected": -688.7352294921875, + "loss": 0.4076, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20426636934280396, + "rewards/margins": 1.7070343494415283, + "rewards/rejected": -1.5027680397033691, + "step": 217 + }, + { + "epoch": 0.57, + "learning_rate": 4.1594955287892594e-08, + "logits/chosen": -3.0680184364318848, + "logits/rejected": -3.060615062713623, + "logps/chosen": -137.950439453125, + "logps/rejected": -255.37913513183594, + "loss": 0.3982, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16316261887550354, + "rewards/margins": 0.7176288366317749, + "rewards/rejected": -0.5544662475585938, + "step": 218 + }, + { + "epoch": 0.57, + "learning_rate": 4.117901168710959e-08, + "logits/chosen": -3.1063146591186523, + "logits/rejected": -3.1101603507995605, + "logps/chosen": -149.04336547851562, + "logps/rejected": -347.8231201171875, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1972709596157074, + "rewards/margins": 1.003278374671936, + "rewards/rejected": -0.8060073852539062, + "step": 219 + }, + { + "epoch": 0.57, + "learning_rate": 4.076369719914054e-08, + "logits/chosen": -3.0379066467285156, + "logits/rejected": -3.048675060272217, + "logps/chosen": -143.7854766845703, + "logps/rejected": -492.3360290527344, + "loss": 0.3626, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28113633394241333, + "rewards/margins": 1.3021172285079956, + "rewards/rejected": -1.0209808349609375, + "step": 220 + }, + { + "epoch": 0.58, + "learning_rate": 4.034904144421134e-08, + "logits/chosen": -3.103681802749634, + "logits/rejected": -3.1543378829956055, + "logps/chosen": -149.78231811523438, + "logps/rejected": -593.4102172851562, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30918198823928833, + "rewards/margins": 1.6979118585586548, + "rewards/rejected": -1.3887298107147217, + "step": 221 + }, + { + "epoch": 0.58, + "learning_rate": 3.9935073995566984e-08, + "logits/chosen": -3.0458157062530518, + "logits/rejected": -3.1049439907073975, + "logps/chosen": -150.04849243164062, + "logps/rejected": -521.8490600585938, + "loss": 0.3508, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18704529106616974, + "rewards/margins": 1.3752257823944092, + "rewards/rejected": -1.1881805658340454, + "step": 222 + }, + { + "epoch": 0.58, + "learning_rate": 3.952182437736256e-08, + "logits/chosen": -3.0719566345214844, + "logits/rejected": -3.1455349922180176, + "logps/chosen": -128.05758666992188, + "logps/rejected": -538.54443359375, + "loss": 0.3765, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16908951103687286, + "rewards/margins": 1.9167076349258423, + "rewards/rejected": -1.7476180791854858, + "step": 223 + }, + { + "epoch": 0.58, + "learning_rate": 3.910932206255742e-08, + "logits/chosen": -3.155580997467041, + "logits/rejected": -3.213568925857544, + "logps/chosen": -164.4576416015625, + "logps/rejected": -542.5440673828125, + "loss": 0.3451, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25897711515426636, + "rewards/margins": 1.8809224367141724, + "rewards/rejected": -1.6219452619552612, + "step": 224 + }, + { + "epoch": 0.59, + "learning_rate": 3.8697596470813255e-08, + "logits/chosen": -3.109646797180176, + "logits/rejected": -3.0561375617980957, + "logps/chosen": -109.14723205566406, + "logps/rejected": -271.1050720214844, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21373865008354187, + "rewards/margins": 0.9650532007217407, + "rewards/rejected": -0.7513145804405212, + "step": 225 + }, + { + "epoch": 0.59, + "learning_rate": 3.828667696639589e-08, + "logits/chosen": -3.06721830368042, + "logits/rejected": -3.07922101020813, + "logps/chosen": -93.32211303710938, + "logps/rejected": -384.901123046875, + "loss": 0.3517, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27744218707084656, + "rewards/margins": 1.1149132251739502, + "rewards/rejected": -0.8374710083007812, + "step": 226 + }, + { + "epoch": 0.59, + "learning_rate": 3.7876592856081e-08, + "logits/chosen": -2.973531723022461, + "logits/rejected": -3.0445971488952637, + "logps/chosen": -155.18008422851562, + "logps/rejected": -465.4353332519531, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20190849900245667, + "rewards/margins": 1.2541782855987549, + "rewards/rejected": -1.0522698163986206, + "step": 227 + }, + { + "epoch": 0.59, + "learning_rate": 3.7467373387063964e-08, + "logits/chosen": -3.0962371826171875, + "logits/rejected": -3.149764060974121, + "logps/chosen": -130.81246948242188, + "logps/rejected": -762.2166748046875, + "loss": 0.3266, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2848625183105469, + "rewards/margins": 1.9807839393615723, + "rewards/rejected": -1.6959214210510254, + "step": 228 + }, + { + "epoch": 0.6, + "learning_rate": 3.705904774487396e-08, + "logits/chosen": -3.060342311859131, + "logits/rejected": -3.082388401031494, + "logps/chosen": -164.32000732421875, + "logps/rejected": -363.2367858886719, + "loss": 0.366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19093094766139984, + "rewards/margins": 1.1379265785217285, + "rewards/rejected": -0.9469956159591675, + "step": 229 + }, + { + "epoch": 0.6, + "learning_rate": 3.665164505129241e-08, + "logits/chosen": -3.0211615562438965, + "logits/rejected": -3.0282015800476074, + "logps/chosen": -140.00103759765625, + "logps/rejected": -501.42779541015625, + "loss": 0.3454, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32891273498535156, + "rewards/margins": 1.5603702068328857, + "rewards/rejected": -1.2314574718475342, + "step": 230 + }, + { + "epoch": 0.6, + "learning_rate": 3.624519436227609e-08, + "logits/chosen": -3.0260703563690186, + "logits/rejected": -3.0366005897521973, + "logps/chosen": -125.80831146240234, + "logps/rejected": -366.4967956542969, + "loss": 0.3946, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2382335662841797, + "rewards/margins": 1.1068661212921143, + "rewards/rejected": -0.8686325550079346, + "step": 231 + }, + { + "epoch": 0.6, + "learning_rate": 3.5839724665884794e-08, + "logits/chosen": -3.0899527072906494, + "logits/rejected": -3.1005477905273438, + "logps/chosen": -155.22760009765625, + "logps/rejected": -386.8271789550781, + "loss": 0.3621, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2694377899169922, + "rewards/margins": 1.2149364948272705, + "rewards/rejected": -0.9454987049102783, + "step": 232 + }, + { + "epoch": 0.61, + "learning_rate": 3.543526488021394e-08, + "logits/chosen": -3.0345847606658936, + "logits/rejected": -3.119518756866455, + "logps/chosen": -122.65310668945312, + "logps/rejected": -501.6441650390625, + "loss": 0.3392, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1570407897233963, + "rewards/margins": 1.4329464435577393, + "rewards/rejected": -1.2759056091308594, + "step": 233 + }, + { + "epoch": 0.61, + "learning_rate": 3.50318438513321e-08, + "logits/chosen": -3.1712489128112793, + "logits/rejected": -3.150143623352051, + "logps/chosen": -150.77947998046875, + "logps/rejected": -474.72979736328125, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2348732054233551, + "rewards/margins": 1.2991234064102173, + "rewards/rejected": -1.0642502307891846, + "step": 234 + }, + { + "epoch": 0.61, + "learning_rate": 3.4629490351223756e-08, + "logits/chosen": -3.0973873138427734, + "logits/rejected": -3.1487507820129395, + "logps/chosen": -137.9251708984375, + "logps/rejected": -440.03167724609375, + "loss": 0.3672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12311249226331711, + "rewards/margins": 1.3564605712890625, + "rewards/rejected": -1.2333481311798096, + "step": 235 + }, + { + "epoch": 0.61, + "learning_rate": 3.4228233075737225e-08, + "logits/chosen": -3.1080408096313477, + "logits/rejected": -3.1381096839904785, + "logps/chosen": -130.6728057861328, + "logps/rejected": -331.5389099121094, + "loss": 0.3873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20646744966506958, + "rewards/margins": 0.954938530921936, + "rewards/rejected": -0.7484710812568665, + "step": 236 + }, + { + "epoch": 0.62, + "learning_rate": 3.382810064253809e-08, + "logits/chosen": -3.082409381866455, + "logits/rejected": -3.070629119873047, + "logps/chosen": -128.09371948242188, + "logps/rejected": -588.0408935546875, + "loss": 0.3378, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19189147651195526, + "rewards/margins": 1.8622055053710938, + "rewards/rejected": -1.670314073562622, + "step": 237 + }, + { + "epoch": 0.62, + "learning_rate": 3.342912158906821e-08, + "logits/chosen": -3.1530096530914307, + "logits/rejected": -3.1619908809661865, + "logps/chosen": -155.60601806640625, + "logps/rejected": -629.6569213867188, + "loss": 0.339, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25491029024124146, + "rewards/margins": 1.9192872047424316, + "rewards/rejected": -1.664376974105835, + "step": 238 + }, + { + "epoch": 0.62, + "learning_rate": 3.303132437051039e-08, + "logits/chosen": -3.147216796875, + "logits/rejected": -3.1533141136169434, + "logps/chosen": -144.13067626953125, + "logps/rejected": -587.775146484375, + "loss": 0.3758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2379276156425476, + "rewards/margins": 1.8831470012664795, + "rewards/rejected": -1.645219326019287, + "step": 239 + }, + { + "epoch": 0.62, + "learning_rate": 3.263473735775899e-08, + "logits/chosen": -3.109302043914795, + "logits/rejected": -3.0899548530578613, + "logps/chosen": -106.76813507080078, + "logps/rejected": -378.2903747558594, + "loss": 0.399, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22799912095069885, + "rewards/margins": 1.2562493085861206, + "rewards/rejected": -1.0282502174377441, + "step": 240 + }, + { + "epoch": 0.63, + "learning_rate": 3.223938883539649e-08, + "logits/chosen": -3.0683140754699707, + "logits/rejected": -3.0690083503723145, + "logps/chosen": -141.7244873046875, + "logps/rejected": -295.35882568359375, + "loss": 0.3967, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22378769516944885, + "rewards/margins": 0.8653526306152344, + "rewards/rejected": -0.6415649652481079, + "step": 241 + }, + { + "epoch": 0.63, + "learning_rate": 3.184530699967627e-08, + "logits/chosen": -3.1177544593811035, + "logits/rejected": -3.104226589202881, + "logps/chosen": -126.78414154052734, + "logps/rejected": -494.50762939453125, + "loss": 0.3674, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2586402893066406, + "rewards/margins": 1.521445393562317, + "rewards/rejected": -1.2628052234649658, + "step": 242 + }, + { + "epoch": 0.63, + "learning_rate": 3.145251995651161e-08, + "logits/chosen": -3.0051841735839844, + "logits/rejected": -2.984462261199951, + "logps/chosen": -171.934326171875, + "logps/rejected": -487.9618835449219, + "loss": 0.4204, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.250497430562973, + "rewards/margins": 1.2931640148162842, + "rewards/rejected": -1.0426666736602783, + "step": 243 + }, + { + "epoch": 0.64, + "learning_rate": 3.10610557194712e-08, + "logits/chosen": -3.107525110244751, + "logits/rejected": -3.1017918586730957, + "logps/chosen": -140.58621215820312, + "logps/rejected": -386.7994384765625, + "loss": 0.4061, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20550614595413208, + "rewards/margins": 1.0572991371154785, + "rewards/rejected": -0.8517929315567017, + "step": 244 + }, + { + "epoch": 0.64, + "learning_rate": 3.0670942207781204e-08, + "logits/chosen": -3.154796600341797, + "logits/rejected": -3.12007474899292, + "logps/chosen": -132.6271514892578, + "logps/rejected": -334.91796875, + "loss": 0.381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23017540574073792, + "rewards/margins": 1.005392074584961, + "rewards/rejected": -0.7752166986465454, + "step": 245 + }, + { + "epoch": 0.64, + "learning_rate": 3.028220724433408e-08, + "logits/chosen": -3.1033935546875, + "logits/rejected": -3.0821425914764404, + "logps/chosen": -92.9959487915039, + "logps/rejected": -680.812744140625, + "loss": 0.3672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24902991950511932, + "rewards/margins": 1.8795413970947266, + "rewards/rejected": -1.6305114030838013, + "step": 246 + }, + { + "epoch": 0.64, + "learning_rate": 2.9894878553704205e-08, + "logits/chosen": -3.1171681880950928, + "logits/rejected": -3.0782971382141113, + "logps/chosen": -134.26719665527344, + "logps/rejected": -476.56219482421875, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3303726315498352, + "rewards/margins": 1.8530198335647583, + "rewards/rejected": -1.5226471424102783, + "step": 247 + }, + { + "epoch": 0.65, + "learning_rate": 2.9508983760170636e-08, + "logits/chosen": -3.0196595191955566, + "logits/rejected": -3.0827298164367676, + "logps/chosen": -153.107666015625, + "logps/rejected": -808.8009033203125, + "loss": 0.3269, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3543693721294403, + "rewards/margins": 2.5410943031311035, + "rewards/rejected": -2.186724901199341, + "step": 248 + }, + { + "epoch": 0.65, + "learning_rate": 2.9124550385746854e-08, + "logits/chosen": -3.1462345123291016, + "logits/rejected": -3.1062440872192383, + "logps/chosen": -141.59365844726562, + "logps/rejected": -339.71826171875, + "loss": 0.3492, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16880226135253906, + "rewards/margins": 1.1764392852783203, + "rewards/rejected": -1.0076370239257812, + "step": 249 + }, + { + "epoch": 0.65, + "learning_rate": 2.8741605848217976e-08, + "logits/chosen": -3.082245349884033, + "logits/rejected": -3.0999345779418945, + "logps/chosen": -106.54196166992188, + "logps/rejected": -421.5709228515625, + "loss": 0.3649, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3080299496650696, + "rewards/margins": 1.4677605628967285, + "rewards/rejected": -1.1597305536270142, + "step": 250 + }, + { + "epoch": 0.65, + "learning_rate": 2.8360177459185263e-08, + "logits/chosen": -3.086772918701172, + "logits/rejected": -3.0751280784606934, + "logps/chosen": -163.43899536132812, + "logps/rejected": -492.8002624511719, + "loss": 0.366, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16303712129592896, + "rewards/margins": 1.3154892921447754, + "rewards/rejected": -1.1524521112442017, + "step": 251 + }, + { + "epoch": 0.66, + "learning_rate": 2.798029242211828e-08, + "logits/chosen": -3.0257349014282227, + "logits/rejected": -3.083235740661621, + "logps/chosen": -198.86679077148438, + "logps/rejected": -383.24853515625, + "loss": 0.3539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26516610383987427, + "rewards/margins": 1.2086994647979736, + "rewards/rejected": -0.9435333013534546, + "step": 252 + }, + { + "epoch": 0.66, + "learning_rate": 2.760197783041477e-08, + "logits/chosen": -3.0761842727661133, + "logits/rejected": -3.0633435249328613, + "logps/chosen": -135.3474578857422, + "logps/rejected": -342.6011657714844, + "loss": 0.3947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16971702873706818, + "rewards/margins": 0.9300686120986938, + "rewards/rejected": -0.7603515386581421, + "step": 253 + }, + { + "epoch": 0.66, + "learning_rate": 2.7225260665468296e-08, + "logits/chosen": -3.0962343215942383, + "logits/rejected": -3.142874240875244, + "logps/chosen": -118.26935577392578, + "logps/rejected": -336.3500061035156, + "loss": 0.336, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20697899162769318, + "rewards/margins": 1.1443859338760376, + "rewards/rejected": -0.9374069571495056, + "step": 254 + }, + { + "epoch": 0.66, + "learning_rate": 2.685016779474396e-08, + "logits/chosen": -3.010096311569214, + "logits/rejected": -3.077705144882202, + "logps/chosen": -131.54014587402344, + "logps/rejected": -360.7338562011719, + "loss": 0.4065, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2234928011894226, + "rewards/margins": 0.9935200214385986, + "rewards/rejected": -0.7700271606445312, + "step": 255 + }, + { + "epoch": 0.67, + "learning_rate": 2.6476725969862225e-08, + "logits/chosen": -3.0756826400756836, + "logits/rejected": -3.0180020332336426, + "logps/chosen": -128.92808532714844, + "logps/rejected": -294.6539306640625, + "loss": 0.3743, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17931900918483734, + "rewards/margins": 0.7139885425567627, + "rewards/rejected": -0.5346695184707642, + "step": 256 + }, + { + "epoch": 0.67, + "learning_rate": 2.6104961824690963e-08, + "logits/chosen": -3.1319265365600586, + "logits/rejected": -3.128981828689575, + "logps/chosen": -162.00518798828125, + "logps/rejected": -500.6263122558594, + "loss": 0.3294, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2774166166782379, + "rewards/margins": 1.4321987628936768, + "rewards/rejected": -1.1547820568084717, + "step": 257 + }, + { + "epoch": 0.67, + "learning_rate": 2.573490187344596e-08, + "logits/chosen": -3.127148151397705, + "logits/rejected": -3.210444927215576, + "logps/chosen": -168.75949096679688, + "logps/rejected": -698.3175048828125, + "loss": 0.3046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1860218048095703, + "rewards/margins": 1.9472064971923828, + "rewards/rejected": -1.7611846923828125, + "step": 258 + }, + { + "epoch": 0.67, + "learning_rate": 2.5366572508799884e-08, + "logits/chosen": -3.0639162063598633, + "logits/rejected": -3.0528745651245117, + "logps/chosen": -145.06219482421875, + "logps/rejected": -263.43475341796875, + "loss": 0.3878, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2784217894077301, + "rewards/margins": 0.7674171924591064, + "rewards/rejected": -0.48899537324905396, + "step": 259 + }, + { + "epoch": 0.68, + "learning_rate": 2.500000000000001e-08, + "logits/chosen": -3.0642471313476562, + "logits/rejected": -3.0622496604919434, + "logps/chosen": -146.6463623046875, + "logps/rejected": -530.0341186523438, + "loss": 0.3465, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16456718742847443, + "rewards/margins": 1.7066891193389893, + "rewards/rejected": -1.5421218872070312, + "step": 260 + }, + { + "epoch": 0.68, + "learning_rate": 2.4635210490994645e-08, + "logits/chosen": -3.1362152099609375, + "logits/rejected": -3.1139214038848877, + "logps/chosen": -143.52554321289062, + "logps/rejected": -481.79010009765625, + "loss": 0.3486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2767898440361023, + "rewards/margins": 1.3131271600723267, + "rewards/rejected": -1.0363373756408691, + "step": 261 + }, + { + "epoch": 0.68, + "learning_rate": 2.427222999856857e-08, + "logits/chosen": -3.0512208938598633, + "logits/rejected": -3.0986859798431396, + "logps/chosen": -159.63925170898438, + "logps/rejected": -516.5069580078125, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23979301750659943, + "rewards/margins": 1.5997326374053955, + "rewards/rejected": -1.3599395751953125, + "step": 262 + }, + { + "epoch": 0.68, + "learning_rate": 2.391108441048753e-08, + "logits/chosen": -3.0983283519744873, + "logits/rejected": -3.099123477935791, + "logps/chosen": -120.68556213378906, + "logps/rejected": -414.731689453125, + "loss": 0.364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26591530442237854, + "rewards/margins": 1.3052746057510376, + "rewards/rejected": -1.0393593311309814, + "step": 263 + }, + { + "epoch": 0.69, + "learning_rate": 2.3551799483651892e-08, + "logits/chosen": -3.0823845863342285, + "logits/rejected": -3.072615623474121, + "logps/chosen": -111.11195373535156, + "logps/rejected": -320.9544677734375, + "loss": 0.3776, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23154641687870026, + "rewards/margins": 0.9322040677070618, + "rewards/rejected": -0.7006576657295227, + "step": 264 + }, + { + "epoch": 0.69, + "learning_rate": 2.3194400842259686e-08, + "logits/chosen": -3.123136520385742, + "logits/rejected": -3.0886735916137695, + "logps/chosen": -115.32167053222656, + "logps/rejected": -373.9757385253906, + "loss": 0.3844, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2425697296857834, + "rewards/margins": 1.1250801086425781, + "rewards/rejected": -0.8825104236602783, + "step": 265 + }, + { + "epoch": 0.69, + "learning_rate": 2.2838913975979078e-08, + "logits/chosen": -3.0424907207489014, + "logits/rejected": -3.0630760192871094, + "logps/chosen": -119.13932037353516, + "logps/rejected": -506.9454040527344, + "loss": 0.3652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1260600984096527, + "rewards/margins": 1.2987287044525146, + "rewards/rejected": -1.17266845703125, + "step": 266 + }, + { + "epoch": 0.7, + "learning_rate": 2.2485364238130432e-08, + "logits/chosen": -3.083498001098633, + "logits/rejected": -3.0685081481933594, + "logps/chosen": -150.10183715820312, + "logps/rejected": -415.05126953125, + "loss": 0.3794, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27804261445999146, + "rewards/margins": 1.1845535039901733, + "rewards/rejected": -0.9065109491348267, + "step": 267 + }, + { + "epoch": 0.7, + "learning_rate": 2.2133776843878183e-08, + "logits/chosen": -3.1332123279571533, + "logits/rejected": -3.122234344482422, + "logps/chosen": -110.51592254638672, + "logps/rejected": -325.43292236328125, + "loss": 0.3552, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18421593308448792, + "rewards/margins": 1.0579991340637207, + "rewards/rejected": -0.8737831115722656, + "step": 268 + }, + { + "epoch": 0.7, + "learning_rate": 2.1784176868432376e-08, + "logits/chosen": -3.066882848739624, + "logits/rejected": -3.0996575355529785, + "logps/chosen": -140.90170288085938, + "logps/rejected": -605.6708374023438, + "loss": 0.3488, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22113724052906036, + "rewards/margins": 1.5780906677246094, + "rewards/rejected": -1.3569533824920654, + "step": 269 + }, + { + "epoch": 0.7, + "learning_rate": 2.1436589245260372e-08, + "logits/chosen": -3.032369613647461, + "logits/rejected": -3.0126020908355713, + "logps/chosen": -120.15177154541016, + "logps/rejected": -354.4290771484375, + "loss": 0.3817, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3326583802700043, + "rewards/margins": 1.0539535284042358, + "rewards/rejected": -0.721295177936554, + "step": 270 + }, + { + "epoch": 0.71, + "learning_rate": 2.1091038764308638e-08, + "logits/chosen": -3.0918006896972656, + "logits/rejected": -3.092691421508789, + "logps/chosen": -136.9126434326172, + "logps/rejected": -368.43609619140625, + "loss": 0.3899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20632323622703552, + "rewards/margins": 0.8597282767295837, + "rewards/rejected": -0.6534050107002258, + "step": 271 + }, + { + "epoch": 0.71, + "learning_rate": 2.074755007023461e-08, + "logits/chosen": -3.142052173614502, + "logits/rejected": -3.1522951126098633, + "logps/chosen": -159.67703247070312, + "logps/rejected": -490.4956359863281, + "loss": 0.3514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2849281430244446, + "rewards/margins": 1.530062198638916, + "rewards/rejected": -1.2451339960098267, + "step": 272 + }, + { + "epoch": 0.71, + "learning_rate": 2.0406147660649132e-08, + "logits/chosen": -3.172978639602661, + "logits/rejected": -3.1413447856903076, + "logps/chosen": -140.40744018554688, + "logps/rejected": -505.46588134765625, + "loss": 0.3167, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23398742079734802, + "rewards/margins": 1.420430064201355, + "rewards/rejected": -1.1864426136016846, + "step": 273 + }, + { + "epoch": 0.71, + "learning_rate": 2.0066855884369243e-08, + "logits/chosen": -3.0580716133117676, + "logits/rejected": -3.0497236251831055, + "logps/chosen": -126.29672241210938, + "logps/rejected": -619.5652465820312, + "loss": 0.3548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19321022927761078, + "rewards/margins": 1.7432911396026611, + "rewards/rejected": -1.5500808954238892, + "step": 274 + }, + { + "epoch": 0.72, + "learning_rate": 1.9729698939681644e-08, + "logits/chosen": -3.039384365081787, + "logits/rejected": -3.103722095489502, + "logps/chosen": -130.70132446289062, + "logps/rejected": -545.0947265625, + "loss": 0.3425, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2566288113594055, + "rewards/margins": 1.7050449848175049, + "rewards/rejected": -1.4484162330627441, + "step": 275 + }, + { + "epoch": 0.72, + "learning_rate": 1.9394700872616852e-08, + "logits/chosen": -3.144193172454834, + "logits/rejected": -3.126096248626709, + "logps/chosen": -109.2167739868164, + "logps/rejected": -436.8924865722656, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28429490327835083, + "rewards/margins": 1.4615066051483154, + "rewards/rejected": -1.1772117614746094, + "step": 276 + }, + { + "epoch": 0.72, + "learning_rate": 1.906188557523427e-08, + "logits/chosen": -3.057832717895508, + "logits/rejected": -3.062720775604248, + "logps/chosen": -164.27493286132812, + "logps/rejected": -317.0714111328125, + "loss": 0.3623, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2613876461982727, + "rewards/margins": 0.7885650992393494, + "rewards/rejected": -0.5271774530410767, + "step": 277 + }, + { + "epoch": 0.72, + "learning_rate": 1.873127678391816e-08, + "logits/chosen": -3.114840507507324, + "logits/rejected": -3.0950145721435547, + "logps/chosen": -125.48570251464844, + "logps/rejected": -394.4592590332031, + "loss": 0.4063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24489173293113708, + "rewards/margins": 1.218949556350708, + "rewards/rejected": -0.9740577936172485, + "step": 278 + }, + { + "epoch": 0.73, + "learning_rate": 1.8402898077684803e-08, + "logits/chosen": -3.0511953830718994, + "logits/rejected": -3.0520553588867188, + "logps/chosen": -128.8836212158203, + "logps/rejected": -781.9525146484375, + "loss": 0.3201, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27896881103515625, + "rewards/margins": 2.4571244716644287, + "rewards/rejected": -2.1781556606292725, + "step": 279 + }, + { + "epoch": 0.73, + "learning_rate": 1.807677287650083e-08, + "logits/chosen": -3.1480729579925537, + "logits/rejected": -3.1164298057556152, + "logps/chosen": -111.92008209228516, + "logps/rejected": -260.72088623046875, + "loss": 0.3447, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26701509952545166, + "rewards/margins": 0.9406257271766663, + "rewards/rejected": -0.6736106872558594, + "step": 280 + }, + { + "epoch": 0.73, + "learning_rate": 1.775292443961291e-08, + "logits/chosen": -3.0646095275878906, + "logits/rejected": -3.091999053955078, + "logps/chosen": -153.9076385498047, + "logps/rejected": -588.2393798828125, + "loss": 0.3256, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2789306640625, + "rewards/margins": 1.8430397510528564, + "rewards/rejected": -1.5641090869903564, + "step": 281 + }, + { + "epoch": 0.73, + "learning_rate": 1.74313758638889e-08, + "logits/chosen": -3.0618369579315186, + "logits/rejected": -3.089365005493164, + "logps/chosen": -198.69952392578125, + "logps/rejected": -499.874267578125, + "loss": 0.3357, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2757667601108551, + "rewards/margins": 1.621416449546814, + "rewards/rejected": -1.3456497192382812, + "step": 282 + }, + { + "epoch": 0.74, + "learning_rate": 1.7112150082170566e-08, + "logits/chosen": -3.1552929878234863, + "logits/rejected": -3.2023773193359375, + "logps/chosen": -137.3826446533203, + "logps/rejected": -478.3328857421875, + "loss": 0.3556, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2759040892124176, + "rewards/margins": 1.8279716968536377, + "rewards/rejected": -1.552067518234253, + "step": 283 + }, + { + "epoch": 0.74, + "learning_rate": 1.679526986163804e-08, + "logits/chosen": -3.061192512512207, + "logits/rejected": -3.0726776123046875, + "logps/chosen": -113.25704956054688, + "logps/rejected": -387.6874084472656, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1630813628435135, + "rewards/margins": 0.9997634887695312, + "rewards/rejected": -0.8366822004318237, + "step": 284 + }, + { + "epoch": 0.74, + "learning_rate": 1.648075780218607e-08, + "logits/chosen": -3.115051746368408, + "logits/rejected": -3.0685276985168457, + "logps/chosen": -156.52896118164062, + "logps/rejected": -407.8714294433594, + "loss": 0.3011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2054435759782791, + "rewards/margins": 1.2169342041015625, + "rewards/rejected": -1.0114905834197998, + "step": 285 + }, + { + "epoch": 0.74, + "learning_rate": 1.6168636334812123e-08, + "logits/chosen": -3.1385092735290527, + "logits/rejected": -3.0359182357788086, + "logps/chosen": -172.64096069335938, + "logps/rejected": -593.7811889648438, + "loss": 0.3761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3500679135322571, + "rewards/margins": 1.6572144031524658, + "rewards/rejected": -1.3071465492248535, + "step": 286 + }, + { + "epoch": 0.75, + "learning_rate": 1.5858927720016707e-08, + "logits/chosen": -3.1064915657043457, + "logits/rejected": -3.105884552001953, + "logps/chosen": -163.50604248046875, + "logps/rejected": -477.107666015625, + "loss": 0.367, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3457500636577606, + "rewards/margins": 1.48432195186615, + "rewards/rejected": -1.1385719776153564, + "step": 287 + }, + { + "epoch": 0.75, + "learning_rate": 1.5551654046215668e-08, + "logits/chosen": -3.1414055824279785, + "logits/rejected": -3.103095293045044, + "logps/chosen": -140.5720672607422, + "logps/rejected": -429.32977294921875, + "loss": 0.3535, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08684921264648438, + "rewards/margins": 1.1189522743225098, + "rewards/rejected": -1.0321030616760254, + "step": 288 + }, + { + "epoch": 0.75, + "learning_rate": 1.5246837228164906e-08, + "logits/chosen": -2.9903547763824463, + "logits/rejected": -3.029547691345215, + "logps/chosen": -165.12066650390625, + "logps/rejected": -398.7313537597656, + "loss": 0.3955, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3338371515274048, + "rewards/margins": 1.4964752197265625, + "rewards/rejected": -1.1626380681991577, + "step": 289 + }, + { + "epoch": 0.76, + "learning_rate": 1.494449900539737e-08, + "logits/chosen": -2.9813618659973145, + "logits/rejected": -3.0377001762390137, + "logps/chosen": -126.50827026367188, + "logps/rejected": -488.7496032714844, + "loss": 0.3751, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2673698365688324, + "rewards/margins": 1.7252862453460693, + "rewards/rejected": -1.457916259765625, + "step": 290 + }, + { + "epoch": 0.76, + "learning_rate": 1.4644660940672625e-08, + "logits/chosen": -3.0625734329223633, + "logits/rejected": -3.053178310394287, + "logps/chosen": -134.52951049804688, + "logps/rejected": -453.44793701171875, + "loss": 0.3831, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1972576081752777, + "rewards/margins": 1.2142833471298218, + "rewards/rejected": -1.0170257091522217, + "step": 291 + }, + { + "epoch": 0.76, + "learning_rate": 1.4347344418438989e-08, + "logits/chosen": -3.107316017150879, + "logits/rejected": -3.0938196182250977, + "logps/chosen": -157.32879638671875, + "logps/rejected": -369.9410400390625, + "loss": 0.3679, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24346923828125, + "rewards/margins": 1.2363502979278564, + "rewards/rejected": -0.9928810000419617, + "step": 292 + }, + { + "epoch": 0.76, + "learning_rate": 1.4052570643308375e-08, + "logits/chosen": -3.082028865814209, + "logits/rejected": -3.0998575687408447, + "logps/chosen": -101.75157165527344, + "logps/rejected": -277.094482421875, + "loss": 0.3835, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.283560186624527, + "rewards/margins": 0.90825355052948, + "rewards/rejected": -0.6246933341026306, + "step": 293 + }, + { + "epoch": 0.77, + "learning_rate": 1.376036063854401e-08, + "logits/chosen": -3.0354342460632324, + "logits/rejected": -3.0269651412963867, + "logps/chosen": -131.7913360595703, + "logps/rejected": -355.32244873046875, + "loss": 0.3816, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2228187620639801, + "rewards/margins": 0.9619201421737671, + "rewards/rejected": -0.7391014099121094, + "step": 294 + }, + { + "epoch": 0.77, + "learning_rate": 1.3470735244561027e-08, + "logits/chosen": -3.0342299938201904, + "logits/rejected": -3.0654044151306152, + "logps/chosen": -123.45856475830078, + "logps/rejected": -522.976318359375, + "loss": 0.3084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22185860574245453, + "rewards/margins": 1.6714344024658203, + "rewards/rejected": -1.4495757818222046, + "step": 295 + }, + { + "epoch": 0.77, + "learning_rate": 1.3183715117440141e-08, + "logits/chosen": -3.113064765930176, + "logits/rejected": -3.131131410598755, + "logps/chosen": -154.7491455078125, + "logps/rejected": -392.95794677734375, + "loss": 0.3627, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21348458528518677, + "rewards/margins": 1.1484615802764893, + "rewards/rejected": -0.9349769353866577, + "step": 296 + }, + { + "epoch": 0.77, + "learning_rate": 1.2899320727454472e-08, + "logits/chosen": -3.135582685470581, + "logits/rejected": -3.095601797103882, + "logps/chosen": -138.34165954589844, + "logps/rejected": -388.74322509765625, + "loss": 0.3063, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21225738525390625, + "rewards/margins": 1.3696579933166504, + "rewards/rejected": -1.1574004888534546, + "step": 297 + }, + { + "epoch": 0.78, + "learning_rate": 1.2617572357609563e-08, + "logits/chosen": -3.0632753372192383, + "logits/rejected": -3.0762524604797363, + "logps/chosen": -111.9055404663086, + "logps/rejected": -317.7209777832031, + "loss": 0.3442, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24396133422851562, + "rewards/margins": 0.9858566522598267, + "rewards/rejected": -0.741895318031311, + "step": 298 + }, + { + "epoch": 0.78, + "learning_rate": 1.2338490102196825e-08, + "logits/chosen": -3.086026191711426, + "logits/rejected": -3.140312671661377, + "logps/chosen": -142.45790100097656, + "logps/rejected": -292.039306640625, + "loss": 0.3855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2416786253452301, + "rewards/margins": 0.8370262384414673, + "rewards/rejected": -0.5953476428985596, + "step": 299 + }, + { + "epoch": 0.78, + "learning_rate": 1.2062093865360456e-08, + "logits/chosen": -3.1479969024658203, + "logits/rejected": -3.1305837631225586, + "logps/chosen": -120.15535736083984, + "logps/rejected": -693.50439453125, + "loss": 0.334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23507651686668396, + "rewards/margins": 2.0516345500946045, + "rewards/rejected": -1.8165581226348877, + "step": 300 + }, + { + "epoch": 0.78, + "learning_rate": 1.1788403359677768e-08, + "logits/chosen": -3.0850274562835693, + "logits/rejected": -3.1163816452026367, + "logps/chosen": -115.1484375, + "logps/rejected": -348.0543212890625, + "loss": 0.3721, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21234360337257385, + "rewards/margins": 1.0788284540176392, + "rewards/rejected": -0.8664848208427429, + "step": 301 + }, + { + "epoch": 0.79, + "learning_rate": 1.1517438104753386e-08, + "logits/chosen": -3.1532251834869385, + "logits/rejected": -3.099980354309082, + "logps/chosen": -121.92172241210938, + "logps/rejected": -223.67178344726562, + "loss": 0.3618, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27862548828125, + "rewards/margins": 0.8112159967422485, + "rewards/rejected": -0.5325905084609985, + "step": 302 + }, + { + "epoch": 0.79, + "learning_rate": 1.1249217425827062e-08, + "logits/chosen": -3.1141209602355957, + "logits/rejected": -3.1325883865356445, + "logps/chosen": -132.3360137939453, + "logps/rejected": -360.6177062988281, + "loss": 0.3555, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30232924222946167, + "rewards/margins": 1.043317437171936, + "rewards/rejected": -0.7409881949424744, + "step": 303 + }, + { + "epoch": 0.79, + "learning_rate": 1.0983760452395412e-08, + "logits/chosen": -3.0527801513671875, + "logits/rejected": -3.094419240951538, + "logps/chosen": -178.42477416992188, + "logps/rejected": -398.4735412597656, + "loss": 0.3414, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28841400146484375, + "rewards/margins": 1.485116720199585, + "rewards/rejected": -1.1967025995254517, + "step": 304 + }, + { + "epoch": 0.79, + "learning_rate": 1.07210861168476e-08, + "logits/chosen": -3.0783767700195312, + "logits/rejected": -3.134692668914795, + "logps/chosen": -130.07635498046875, + "logps/rejected": -712.7322998046875, + "loss": 0.31, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23891526460647583, + "rewards/margins": 2.519615888595581, + "rewards/rejected": -2.28070068359375, + "step": 305 + }, + { + "epoch": 0.8, + "learning_rate": 1.0461213153115079e-08, + "logits/chosen": -3.0982489585876465, + "logits/rejected": -3.0775463581085205, + "logps/chosen": -147.48385620117188, + "logps/rejected": -384.6341552734375, + "loss": 0.3757, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2566158175468445, + "rewards/margins": 1.1816143989562988, + "rewards/rejected": -0.9249985218048096, + "step": 306 + }, + { + "epoch": 0.8, + "learning_rate": 1.0204160095335479e-08, + "logits/chosen": -3.0512003898620605, + "logits/rejected": -3.0850772857666016, + "logps/chosen": -128.02899169921875, + "logps/rejected": -373.1639404296875, + "loss": 0.3904, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32291489839553833, + "rewards/margins": 1.167243242263794, + "rewards/rejected": -0.8443283438682556, + "step": 307 + }, + { + "epoch": 0.8, + "learning_rate": 9.94994527653078e-09, + "logits/chosen": -3.099242687225342, + "logits/rejected": -3.101748466491699, + "logps/chosen": -118.91464233398438, + "logps/rejected": -324.755126953125, + "loss": 0.3495, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17425614595413208, + "rewards/margins": 1.1809669733047485, + "rewards/rejected": -1.0067107677459717, + "step": 308 + }, + { + "epoch": 0.8, + "learning_rate": 9.69858682729976e-09, + "logits/chosen": -3.0781593322753906, + "logits/rejected": -3.1422641277313232, + "logps/chosen": -148.11175537109375, + "logps/rejected": -436.13055419921875, + "loss": 0.3888, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18818512558937073, + "rewards/margins": 1.4009292125701904, + "rewards/rejected": -1.2127442359924316, + "step": 309 + }, + { + "epoch": 0.81, + "learning_rate": 9.450102674524951e-09, + "logits/chosen": -3.113703489303589, + "logits/rejected": -3.1212120056152344, + "logps/chosen": -146.293212890625, + "logps/rejected": -344.3795166015625, + "loss": 0.3712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2525535821914673, + "rewards/margins": 1.176398515701294, + "rewards/rejected": -0.9238449335098267, + "step": 310 + }, + { + "epoch": 0.81, + "learning_rate": 9.204510540094096e-09, + "logits/chosen": -3.052727222442627, + "logits/rejected": -3.0790696144104004, + "logps/chosen": -111.55781555175781, + "logps/rejected": -646.8054809570312, + "loss": 0.3131, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24915924668312073, + "rewards/margins": 2.258625030517578, + "rewards/rejected": -2.0094659328460693, + "step": 311 + }, + { + "epoch": 0.81, + "learning_rate": 8.961827939636196e-09, + "logits/chosen": -3.1491856575012207, + "logits/rejected": -3.184011220932007, + "logps/chosen": -115.5438232421875, + "logps/rejected": -566.884765625, + "loss": 0.3253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2532386779785156, + "rewards/margins": 1.8853157758712769, + "rewards/rejected": -1.6320770978927612, + "step": 312 + }, + { + "epoch": 0.82, + "learning_rate": 8.72207218127231e-09, + "logits/chosen": -3.066465139389038, + "logits/rejected": -3.0887880325317383, + "logps/chosen": -138.05902099609375, + "logps/rejected": -432.7650146484375, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4325065612792969, + "rewards/margins": 1.6670067310333252, + "rewards/rejected": -1.2345001697540283, + "step": 313 + }, + { + "epoch": 0.82, + "learning_rate": 8.485260364381186e-09, + "logits/chosen": -3.142970561981201, + "logits/rejected": -3.155651330947876, + "logps/chosen": -139.010009765625, + "logps/rejected": -393.8888854980469, + "loss": 0.341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19467583298683167, + "rewards/margins": 0.9861538410186768, + "rewards/rejected": -0.7914779782295227, + "step": 314 + }, + { + "epoch": 0.82, + "learning_rate": 8.251409378379637e-09, + "logits/chosen": -3.109827995300293, + "logits/rejected": -3.131869316101074, + "logps/chosen": -133.32427978515625, + "logps/rejected": -482.70440673828125, + "loss": 0.3379, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23106880486011505, + "rewards/margins": 1.6445331573486328, + "rewards/rejected": -1.4134643077850342, + "step": 315 + }, + { + "epoch": 0.82, + "learning_rate": 8.020535901518049e-09, + "logits/chosen": -3.0863730907440186, + "logits/rejected": -3.1506857872009277, + "logps/chosen": -131.52586364746094, + "logps/rejected": -331.5972900390625, + "loss": 0.3368, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25826340913772583, + "rewards/margins": 1.218632459640503, + "rewards/rejected": -0.9603691101074219, + "step": 316 + }, + { + "epoch": 0.83, + "learning_rate": 7.792656399690923e-09, + "logits/chosen": -3.061258316040039, + "logits/rejected": -3.094789981842041, + "logps/chosen": -121.12847900390625, + "logps/rejected": -344.78021240234375, + "loss": 0.4043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19070854783058167, + "rewards/margins": 1.0861706733703613, + "rewards/rejected": -0.8954620957374573, + "step": 317 + }, + { + "epoch": 0.83, + "learning_rate": 7.567787125262449e-09, + "logits/chosen": -3.0228512287139893, + "logits/rejected": -3.1004161834716797, + "logps/chosen": -144.6453857421875, + "logps/rejected": -313.4376525878906, + "loss": 0.3415, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21277160942554474, + "rewards/margins": 1.0284333229064941, + "rewards/rejected": -0.8156616687774658, + "step": 318 + }, + { + "epoch": 0.83, + "learning_rate": 7.345944115907421e-09, + "logits/chosen": -3.0659589767456055, + "logits/rejected": -3.079639196395874, + "logps/chosen": -150.5919647216797, + "logps/rejected": -353.451171875, + "loss": 0.4069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13690948486328125, + "rewards/margins": 0.9865188598632812, + "rewards/rejected": -0.849609375, + "step": 319 + }, + { + "epoch": 0.83, + "learning_rate": 7.1271431934674446e-09, + "logits/chosen": -3.137678623199463, + "logits/rejected": -3.1370582580566406, + "logps/chosen": -92.36897277832031, + "logps/rejected": -355.3284912109375, + "loss": 0.3232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22925835847854614, + "rewards/margins": 1.230236530303955, + "rewards/rejected": -1.0009781122207642, + "step": 320 + }, + { + "epoch": 0.84, + "learning_rate": 6.911399962822517e-09, + "logits/chosen": -3.0771121978759766, + "logits/rejected": -3.0687947273254395, + "logps/chosen": -117.04313659667969, + "logps/rejected": -319.3674621582031, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24401971697807312, + "rewards/margins": 1.0575100183486938, + "rewards/rejected": -0.8134903311729431, + "step": 321 + }, + { + "epoch": 0.84, + "learning_rate": 6.698729810778064e-09, + "logits/chosen": -3.09818172454834, + "logits/rejected": -3.1306076049804688, + "logps/chosen": -115.38369750976562, + "logps/rejected": -514.0142822265625, + "loss": 0.3736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17857857048511505, + "rewards/margins": 1.765002965927124, + "rewards/rejected": -1.5864243507385254, + "step": 322 + }, + { + "epoch": 0.84, + "learning_rate": 6.48914790496759e-09, + "logits/chosen": -3.1151697635650635, + "logits/rejected": -3.147171974182129, + "logps/chosen": -161.71298217773438, + "logps/rejected": -520.0836181640625, + "loss": 0.3066, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.33321458101272583, + "rewards/margins": 1.938906192779541, + "rewards/rejected": -1.60569167137146, + "step": 323 + }, + { + "epoch": 0.84, + "learning_rate": 6.282669192770895e-09, + "logits/chosen": -3.103391408920288, + "logits/rejected": -3.0750741958618164, + "logps/chosen": -144.37957763671875, + "logps/rejected": -297.2162170410156, + "loss": 0.3707, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17323455214500427, + "rewards/margins": 1.074131727218628, + "rewards/rejected": -0.900897204875946, + "step": 324 + }, + { + "epoch": 0.85, + "learning_rate": 6.0793084002480285e-09, + "logits/chosen": -2.999894857406616, + "logits/rejected": -3.0555062294006348, + "logps/chosen": -127.45551300048828, + "logps/rejected": -516.6705322265625, + "loss": 0.3405, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2047290802001953, + "rewards/margins": 1.4637203216552734, + "rewards/rejected": -1.2589912414550781, + "step": 325 + }, + { + "epoch": 0.85, + "learning_rate": 5.8790800310890456e-09, + "logits/chosen": -3.1608054637908936, + "logits/rejected": -3.159141778945923, + "logps/chosen": -123.09164428710938, + "logps/rejected": -466.0548095703125, + "loss": 0.3215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29471588134765625, + "rewards/margins": 1.5214836597442627, + "rewards/rejected": -1.2267677783966064, + "step": 326 + }, + { + "epoch": 0.85, + "learning_rate": 5.681998365579593e-09, + "logits/chosen": -3.2106728553771973, + "logits/rejected": -3.204019546508789, + "logps/chosen": -151.1651611328125, + "logps/rejected": -439.2634582519531, + "loss": 0.2971, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2246452271938324, + "rewards/margins": 1.5282402038574219, + "rewards/rejected": -1.303594946861267, + "step": 327 + }, + { + "epoch": 0.85, + "learning_rate": 5.488077459582424e-09, + "logits/chosen": -3.049882411956787, + "logits/rejected": -3.0453758239746094, + "logps/chosen": -140.39022827148438, + "logps/rejected": -417.80712890625, + "loss": 0.3688, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2798473536968231, + "rewards/margins": 1.2735538482666016, + "rewards/rejected": -0.9937065839767456, + "step": 328 + }, + { + "epoch": 0.86, + "learning_rate": 5.297331143534972e-09, + "logits/chosen": -3.1376771926879883, + "logits/rejected": -3.1115522384643555, + "logps/chosen": -155.25088500976562, + "logps/rejected": -319.6158142089844, + "loss": 0.3412, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22655794024467468, + "rewards/margins": 0.9005722403526306, + "rewards/rejected": -0.6740143299102783, + "step": 329 + }, + { + "epoch": 0.86, + "learning_rate": 5.109773021462921e-09, + "logits/chosen": -3.103945732116699, + "logits/rejected": -3.067258834838867, + "logps/chosen": -143.50953674316406, + "logps/rejected": -333.2168884277344, + "loss": 0.376, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20835494995117188, + "rewards/margins": 1.0184532403945923, + "rewards/rejected": -0.8100982904434204, + "step": 330 + }, + { + "epoch": 0.86, + "learning_rate": 4.925416470009991e-09, + "logits/chosen": -3.0779662132263184, + "logits/rejected": -3.079745292663574, + "logps/chosen": -147.1943359375, + "logps/rejected": -281.523681640625, + "loss": 0.3566, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2442607879638672, + "rewards/margins": 0.912289023399353, + "rewards/rejected": -0.6680282354354858, + "step": 331 + }, + { + "epoch": 0.86, + "learning_rate": 4.744274637483936e-09, + "logits/chosen": -3.1291956901550293, + "logits/rejected": -3.1547982692718506, + "logps/chosen": -133.59445190429688, + "logps/rejected": -477.47808837890625, + "loss": 0.3404, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2872581481933594, + "rewards/margins": 1.6077568531036377, + "rewards/rejected": -1.3204987049102783, + "step": 332 + }, + { + "epoch": 0.87, + "learning_rate": 4.566360442918754e-09, + "logits/chosen": -3.1087541580200195, + "logits/rejected": -3.082801103591919, + "logps/chosen": -158.18869018554688, + "logps/rejected": -513.39697265625, + "loss": 0.3347, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29181593656539917, + "rewards/margins": 1.3726829290390015, + "rewards/rejected": -1.080867052078247, + "step": 333 + }, + { + "epoch": 0.87, + "learning_rate": 4.391686575153331e-09, + "logits/chosen": -2.974545955657959, + "logits/rejected": -3.008127212524414, + "logps/chosen": -151.16619873046875, + "logps/rejected": -776.9129638671875, + "loss": 0.3652, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4435432553291321, + "rewards/margins": 2.6112587451934814, + "rewards/rejected": -2.167715549468994, + "step": 334 + }, + { + "epoch": 0.87, + "learning_rate": 4.220265491926489e-09, + "logits/chosen": -3.123908519744873, + "logits/rejected": -3.122020721435547, + "logps/chosen": -131.36849975585938, + "logps/rejected": -333.81219482421875, + "loss": 0.3613, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25182420015335083, + "rewards/margins": 1.2656569480895996, + "rewards/rejected": -1.0138328075408936, + "step": 335 + }, + { + "epoch": 0.88, + "learning_rate": 4.05210941898847e-09, + "logits/chosen": -3.0682382583618164, + "logits/rejected": -3.027294397354126, + "logps/chosen": -114.59201049804688, + "logps/rejected": -759.4576416015625, + "loss": 0.3825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16591531038284302, + "rewards/margins": 2.85387921333313, + "rewards/rejected": -2.6879639625549316, + "step": 336 + }, + { + "epoch": 0.88, + "learning_rate": 3.887230349229015e-09, + "logits/chosen": -3.1349430084228516, + "logits/rejected": -3.1484689712524414, + "logps/chosen": -140.29833984375, + "logps/rejected": -3557.833251953125, + "loss": 0.3125, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3420761227607727, + "rewards/margins": 4.276548862457275, + "rewards/rejected": -3.9344727993011475, + "step": 337 + }, + { + "epoch": 0.88, + "learning_rate": 3.725640041822026e-09, + "logits/chosen": -3.054661989212036, + "logits/rejected": -3.043877601623535, + "logps/chosen": -110.30519104003906, + "logps/rejected": -404.71319580078125, + "loss": 0.4069, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2167404294013977, + "rewards/margins": 1.0862877368927002, + "rewards/rejected": -0.8695472478866577, + "step": 338 + }, + { + "epoch": 0.88, + "learning_rate": 3.5673500213868946e-09, + "logits/chosen": -3.0653443336486816, + "logits/rejected": -3.074563503265381, + "logps/chosen": -159.30133056640625, + "logps/rejected": -435.9642028808594, + "loss": 0.3452, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17591705918312073, + "rewards/margins": 1.6818360090255737, + "rewards/rejected": -1.5059189796447754, + "step": 339 + }, + { + "epoch": 0.89, + "learning_rate": 3.4123715771665783e-09, + "logits/chosen": -3.070838451385498, + "logits/rejected": -3.0857999324798584, + "logps/chosen": -98.05655670166016, + "logps/rejected": -431.9482421875, + "loss": 0.3589, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23989105224609375, + "rewards/margins": 1.3920929431915283, + "rewards/rejected": -1.1522018909454346, + "step": 340 + }, + { + "epoch": 0.89, + "learning_rate": 3.260715762222449e-09, + "logits/chosen": -3.046675205230713, + "logits/rejected": -3.1024117469787598, + "logps/chosen": -154.40762329101562, + "logps/rejected": -530.4766845703125, + "loss": 0.3122, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27670592069625854, + "rewards/margins": 1.7404954433441162, + "rewards/rejected": -1.463789463043213, + "step": 341 + }, + { + "epoch": 0.89, + "learning_rate": 3.1123933926459844e-09, + "logits/chosen": -3.039273262023926, + "logits/rejected": -3.028031349182129, + "logps/chosen": -125.91340637207031, + "logps/rejected": -418.2550964355469, + "loss": 0.3735, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19844475388526917, + "rewards/margins": 1.2038943767547607, + "rewards/rejected": -1.005449652671814, + "step": 342 + }, + { + "epoch": 0.89, + "learning_rate": 2.9674150467873527e-09, + "logits/chosen": -3.0981762409210205, + "logits/rejected": -2.9555673599243164, + "logps/chosen": -129.6515655517578, + "logps/rejected": -343.8109436035156, + "loss": 0.3543, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.13739433884620667, + "rewards/margins": 0.832676351070404, + "rewards/rejected": -0.695281982421875, + "step": 343 + }, + { + "epoch": 0.9, + "learning_rate": 2.825791064500993e-09, + "logits/chosen": -3.076568603515625, + "logits/rejected": -3.0985107421875, + "logps/chosen": -124.53006744384766, + "logps/rejected": -678.3534545898438, + "loss": 0.3198, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3127872347831726, + "rewards/margins": 2.613102912902832, + "rewards/rejected": -2.3003158569335938, + "step": 344 + }, + { + "epoch": 0.9, + "learning_rate": 2.687531546408156e-09, + "logits/chosen": -3.0591113567352295, + "logits/rejected": -3.0309131145477295, + "logps/chosen": -148.7603759765625, + "logps/rejected": -511.5507507324219, + "loss": 0.2995, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40447235107421875, + "rewards/margins": 1.627964735031128, + "rewards/rejected": -1.2234923839569092, + "step": 345 + }, + { + "epoch": 0.9, + "learning_rate": 2.5526463531765464e-09, + "logits/chosen": -3.1571407318115234, + "logits/rejected": -3.177335262298584, + "logps/chosen": -112.49705505371094, + "logps/rejected": -382.7959899902344, + "loss": 0.3633, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27791139483451843, + "rewards/margins": 1.2657151222229004, + "rewards/rejected": -0.9878036975860596, + "step": 346 + }, + { + "epoch": 0.9, + "learning_rate": 2.4211451048170294e-09, + "logits/chosen": -3.1260523796081543, + "logits/rejected": -3.1639766693115234, + "logps/chosen": -118.96934509277344, + "logps/rejected": -376.50604248046875, + "loss": 0.3579, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1940658688545227, + "rewards/margins": 1.5771026611328125, + "rewards/rejected": -1.3830368518829346, + "step": 347 + }, + { + "epoch": 0.91, + "learning_rate": 2.293037179997559e-09, + "logits/chosen": -3.119149684906006, + "logits/rejected": -3.113584518432617, + "logps/chosen": -149.94741821289062, + "logps/rejected": -336.77789306640625, + "loss": 0.337, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2642860412597656, + "rewards/margins": 0.9061164855957031, + "rewards/rejected": -0.6418304443359375, + "step": 348 + }, + { + "epoch": 0.91, + "learning_rate": 2.1683317153742777e-09, + "logits/chosen": -3.046600341796875, + "logits/rejected": -3.040527105331421, + "logps/chosen": -121.35411834716797, + "logps/rejected": -226.67779541015625, + "loss": 0.3473, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3007522523403168, + "rewards/margins": 0.86669921875, + "rewards/rejected": -0.5659469366073608, + "step": 349 + }, + { + "epoch": 0.91, + "learning_rate": 2.047037604939894e-09, + "logits/chosen": -3.093510866165161, + "logits/rejected": -3.093888759613037, + "logps/chosen": -120.12400817871094, + "logps/rejected": -437.28765869140625, + "loss": 0.3638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24623680114746094, + "rewards/margins": 1.30381441116333, + "rewards/rejected": -1.0575776100158691, + "step": 350 + }, + { + "epoch": 0.91, + "learning_rate": 1.92916349938938e-09, + "logits/chosen": -3.1023614406585693, + "logits/rejected": -3.0990090370178223, + "logps/chosen": -149.35186767578125, + "logps/rejected": -545.4013671875, + "loss": 0.3716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20757217705249786, + "rewards/margins": 1.6754859685897827, + "rewards/rejected": -1.4679138660430908, + "step": 351 + }, + { + "epoch": 0.92, + "learning_rate": 1.8147178055029578e-09, + "logits/chosen": -3.083134889602661, + "logits/rejected": -3.1658530235290527, + "logps/chosen": -131.80514526367188, + "logps/rejected": -399.3026428222656, + "loss": 0.3301, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3187858462333679, + "rewards/margins": 1.411339521408081, + "rewards/rejected": -1.092553734779358, + "step": 352 + }, + { + "epoch": 0.92, + "learning_rate": 1.70370868554659e-09, + "logits/chosen": -3.0664656162261963, + "logits/rejected": -3.1133174896240234, + "logps/chosen": -148.16253662109375, + "logps/rejected": -468.0162658691406, + "loss": 0.3538, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3665878176689148, + "rewards/margins": 1.6564728021621704, + "rewards/rejected": -1.2898850440979004, + "step": 353 + }, + { + "epoch": 0.92, + "learning_rate": 1.596144056689791e-09, + "logits/chosen": -3.107264995574951, + "logits/rejected": -3.0623974800109863, + "logps/chosen": -128.50469970703125, + "logps/rejected": -438.8287353515625, + "loss": 0.3029, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17391356825828552, + "rewards/margins": 1.4868788719177246, + "rewards/rejected": -1.3129653930664062, + "step": 354 + }, + { + "epoch": 0.92, + "learning_rate": 1.4920315904410063e-09, + "logits/chosen": -3.0516042709350586, + "logits/rejected": -3.094362735748291, + "logps/chosen": -149.95062255859375, + "logps/rejected": -360.1581115722656, + "loss": 0.3505, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23488159477710724, + "rewards/margins": 1.2840362787246704, + "rewards/rejected": -1.0491547584533691, + "step": 355 + }, + { + "epoch": 0.93, + "learning_rate": 1.3913787121004716e-09, + "logits/chosen": -3.0464959144592285, + "logits/rejected": -3.0556514263153076, + "logps/chosen": -131.27955627441406, + "logps/rejected": -325.41552734375, + "loss": 0.354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1999691128730774, + "rewards/margins": 0.8598293662071228, + "rewards/rejected": -0.6598602533340454, + "step": 356 + }, + { + "epoch": 0.93, + "learning_rate": 1.2941926002306536e-09, + "logits/chosen": -3.022416591644287, + "logits/rejected": -2.962761402130127, + "logps/chosen": -105.91797637939453, + "logps/rejected": -373.7787780761719, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26548653841018677, + "rewards/margins": 1.1021641492843628, + "rewards/rejected": -0.8366775512695312, + "step": 357 + }, + { + "epoch": 0.93, + "learning_rate": 1.2004801861442372e-09, + "logits/chosen": -3.1393775939941406, + "logits/rejected": -3.11004900932312, + "logps/chosen": -140.13723754882812, + "logps/rejected": -407.5042724609375, + "loss": 0.3588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.39037054777145386, + "rewards/margins": 1.384440302848816, + "rewards/rejected": -0.9940696954727173, + "step": 358 + }, + { + "epoch": 0.93, + "learning_rate": 1.1102481534098374e-09, + "logits/chosen": -3.0817925930023193, + "logits/rejected": -3.1098384857177734, + "logps/chosen": -146.45736694335938, + "logps/rejected": -348.31622314453125, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24858170747756958, + "rewards/margins": 1.0486763715744019, + "rewards/rejected": -0.8000946044921875, + "step": 359 + }, + { + "epoch": 0.94, + "learning_rate": 1.0235029373752758e-09, + "logits/chosen": -3.0965938568115234, + "logits/rejected": -3.1928141117095947, + "logps/chosen": -183.8480224609375, + "logps/rejected": -784.5967407226562, + "loss": 0.3115, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18636399507522583, + "rewards/margins": 2.3572657108306885, + "rewards/rejected": -2.1709015369415283, + "step": 360 + }, + { + "epoch": 0.94, + "learning_rate": 9.402507247086576e-10, + "logits/chosen": -3.1334426403045654, + "logits/rejected": -3.140427589416504, + "logps/chosen": -157.9389190673828, + "logps/rejected": -391.07855224609375, + "loss": 0.3324, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22665292024612427, + "rewards/margins": 1.1613911390304565, + "rewards/rejected": -0.9347381591796875, + "step": 361 + }, + { + "epoch": 0.94, + "learning_rate": 8.604974529571041e-10, + "logits/chosen": -3.1928160190582275, + "logits/rejected": -3.1909329891204834, + "logps/chosen": -177.74612426757812, + "logps/rejected": -577.0257568359375, + "loss": 0.3274, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2388664335012436, + "rewards/margins": 2.0856103897094727, + "rewards/rejected": -1.8467438220977783, + "step": 362 + }, + { + "epoch": 0.95, + "learning_rate": 7.842488101232891e-10, + "logits/chosen": -3.134172201156616, + "logits/rejected": -3.169459342956543, + "logps/chosen": -176.11062622070312, + "logps/rejected": -550.9486083984375, + "loss": 0.3489, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1976669281721115, + "rewards/margins": 1.726283311843872, + "rewards/rejected": -1.5286164283752441, + "step": 363 + }, + { + "epoch": 0.95, + "learning_rate": 7.1151023425981e-10, + "logits/chosen": -3.0770182609558105, + "logits/rejected": -3.01936674118042, + "logps/chosen": -132.54241943359375, + "logps/rejected": -360.2615966796875, + "loss": 0.3477, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23301354050636292, + "rewards/margins": 1.0043301582336426, + "rewards/rejected": -0.7713165283203125, + "step": 364 + }, + { + "epoch": 0.95, + "learning_rate": 6.422869130812914e-10, + "logits/chosen": -3.1444499492645264, + "logits/rejected": -3.1060447692871094, + "logps/chosen": -150.38070678710938, + "logps/rejected": -355.20672607421875, + "loss": 0.2942, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22486534714698792, + "rewards/margins": 0.9869213104248047, + "rewards/rejected": -0.7620559930801392, + "step": 365 + }, + { + "epoch": 0.95, + "learning_rate": 5.765837835944309e-10, + "logits/chosen": -2.994274854660034, + "logits/rejected": -3.024301052093506, + "logps/chosen": -145.0454864501953, + "logps/rejected": -598.1958618164062, + "loss": 0.3805, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.190501406788826, + "rewards/margins": 1.8097426891326904, + "rewards/rejected": -1.6192413568496704, + "step": 366 + }, + { + "epoch": 0.96, + "learning_rate": 5.144055317458818e-10, + "logits/chosen": -3.090358257293701, + "logits/rejected": -3.103949546813965, + "logps/chosen": -115.93931579589844, + "logps/rejected": -447.63818359375, + "loss": 0.3403, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3306999206542969, + "rewards/margins": 1.533979058265686, + "rewards/rejected": -1.2032791376113892, + "step": 367 + }, + { + "epoch": 0.96, + "learning_rate": 4.5575659208805796e-10, + "logits/chosen": -3.117487907409668, + "logits/rejected": -3.1299476623535156, + "logps/chosen": -120.9632568359375, + "logps/rejected": -506.5495300292969, + "loss": 0.3435, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2837020754814148, + "rewards/margins": 1.6245148181915283, + "rewards/rejected": -1.3408126831054688, + "step": 368 + }, + { + "epoch": 0.96, + "learning_rate": 4.0064114746284906e-10, + "logits/chosen": -3.164487838745117, + "logits/rejected": -3.151533603668213, + "logps/chosen": -132.29762268066406, + "logps/rejected": -470.7892761230469, + "loss": 0.3132, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3429889678955078, + "rewards/margins": 1.6364933252334595, + "rewards/rejected": -1.2935043573379517, + "step": 369 + }, + { + "epoch": 0.96, + "learning_rate": 3.4906312870331965e-10, + "logits/chosen": -3.1015052795410156, + "logits/rejected": -3.0800228118896484, + "logps/chosen": -126.4520263671875, + "logps/rejected": -310.5251770019531, + "loss": 0.3611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27452394366264343, + "rewards/margins": 1.1366493701934814, + "rewards/rejected": -0.8621254563331604, + "step": 370 + }, + { + "epoch": 0.97, + "learning_rate": 3.010262143533393e-10, + "logits/chosen": -3.0757007598876953, + "logits/rejected": -3.1023943424224854, + "logps/chosen": -151.16751098632812, + "logps/rejected": -348.7694396972656, + "loss": 0.3393, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19778938591480255, + "rewards/margins": 1.064815878868103, + "rewards/rejected": -0.8670265078544617, + "step": 371 + }, + { + "epoch": 0.97, + "learning_rate": 2.5653383040524224e-10, + "logits/chosen": -3.109180450439453, + "logits/rejected": -3.1110124588012695, + "logps/chosen": -115.38752746582031, + "logps/rejected": -622.2870483398438, + "loss": 0.3281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1446003019809723, + "rewards/margins": 1.8145725727081299, + "rewards/rejected": -1.66997230052948, + "step": 372 + }, + { + "epoch": 0.97, + "learning_rate": 2.1558915005548962e-10, + "logits/chosen": -2.998800754547119, + "logits/rejected": -3.1015243530273438, + "logps/chosen": -163.66485595703125, + "logps/rejected": -571.9586181640625, + "loss": 0.3541, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2038322389125824, + "rewards/margins": 1.9492065906524658, + "rewards/rejected": -1.7453744411468506, + "step": 373 + }, + { + "epoch": 0.97, + "learning_rate": 1.7819509347835048e-10, + "logits/chosen": -3.119690179824829, + "logits/rejected": -3.0617294311523438, + "logps/chosen": -148.47132873535156, + "logps/rejected": -539.1243896484375, + "loss": 0.363, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21050149202346802, + "rewards/margins": 1.7391865253448486, + "rewards/rejected": -1.5286849737167358, + "step": 374 + }, + { + "epoch": 0.98, + "learning_rate": 1.4435432761762955e-10, + "logits/chosen": -3.1005163192749023, + "logits/rejected": -3.0818092823028564, + "logps/chosen": -131.9927978515625, + "logps/rejected": -557.9573974609375, + "loss": 0.3315, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18743211030960083, + "rewards/margins": 1.5037895441055298, + "rewards/rejected": -1.3163574934005737, + "step": 375 + }, + { + "epoch": 0.98, + "learning_rate": 1.1406926599646372e-10, + "logits/chosen": -3.119384765625, + "logits/rejected": -3.0937106609344482, + "logps/chosen": -152.55123901367188, + "logps/rejected": -561.76416015625, + "loss": 0.3537, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1760711818933487, + "rewards/margins": 1.8210785388946533, + "rewards/rejected": -1.6450073719024658, + "step": 376 + }, + { + "epoch": 0.98, + "learning_rate": 8.73420685452042e-11, + "logits/chosen": -3.100228786468506, + "logits/rejected": -3.091385841369629, + "logps/chosen": -146.428955078125, + "logps/rejected": -341.39886474609375, + "loss": 0.3723, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3318824768066406, + "rewards/margins": 1.0300759077072144, + "rewards/rejected": -0.6981934309005737, + "step": 377 + }, + { + "epoch": 0.98, + "learning_rate": 6.417464144736207e-11, + "logits/chosen": -3.1122045516967773, + "logits/rejected": -3.1158978939056396, + "logps/chosen": -133.3392333984375, + "logps/rejected": -324.11993408203125, + "loss": 0.3534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24146156013011932, + "rewards/margins": 1.025688648223877, + "rewards/rejected": -0.7842270135879517, + "step": 378 + }, + { + "epoch": 0.99, + "learning_rate": 4.4568637003633555e-11, + "logits/chosen": -3.1414191722869873, + "logits/rejected": -3.105337619781494, + "logps/chosen": -142.4365692138672, + "logps/rejected": -481.9650573730469, + "loss": 0.2958, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.351531982421875, + "rewards/margins": 1.5732452869415283, + "rewards/rejected": -1.2217133045196533, + "step": 379 + }, + { + "epoch": 0.99, + "learning_rate": 2.8525453514099962e-11, + "logits/chosen": -3.076094627380371, + "logits/rejected": -3.062622547149658, + "logps/chosen": -132.037109375, + "logps/rejected": -338.8421630859375, + "loss": 0.3622, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1857704222202301, + "rewards/margins": 1.1013495922088623, + "rewards/rejected": -0.9155791997909546, + "step": 380 + }, + { + "epoch": 0.99, + "learning_rate": 1.6046235178474034e-11, + "logits/chosen": -3.109017848968506, + "logits/rejected": -3.099512815475464, + "logps/chosen": -121.00896453857422, + "logps/rejected": -266.34869384765625, + "loss": 0.3705, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22879715263843536, + "rewards/margins": 0.9055389165878296, + "rewards/rejected": -0.6767417788505554, + "step": 381 + }, + { + "epoch": 0.99, + "learning_rate": 7.13187201450971e-12, + "logits/chosen": -3.0975537300109863, + "logits/rejected": -3.0752837657928467, + "logps/chosen": -119.10079193115234, + "logps/rejected": -424.52947998046875, + "loss": 0.3178, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22042961418628693, + "rewards/margins": 1.150675654411316, + "rewards/rejected": -0.9302459955215454, + "step": 382 + }, + { + "epoch": 1.0, + "learning_rate": 1.7829997945084663e-12, + "logits/chosen": -3.103525161743164, + "logits/rejected": -3.088527202606201, + "logps/chosen": -150.5305938720703, + "logps/rejected": -571.6788330078125, + "loss": 0.3292, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2615249752998352, + "rewards/margins": 2.023759603500366, + "rewards/rejected": -1.7622345685958862, + "step": 383 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "logits/chosen": -3.096859931945801, + "logits/rejected": -3.0658698081970215, + "logps/chosen": -153.7445831298828, + "logps/rejected": -454.8304443359375, + "loss": 0.4019, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.35934144258499146, + "rewards/margins": 1.60845947265625, + "rewards/rejected": -1.2491180896759033, + "step": 384 + }, + { + "epoch": 1.0, + "step": 384, + "total_flos": 0.0, + "train_loss": 0.4645683264825493, + "train_runtime": 774.0613, + "train_samples_per_second": 7.933, + "train_steps_per_second": 0.496 + } + ], + "logging_steps": 1.0, + "max_steps": 384, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/ckpt/llava-v1.6-mistral-7b-STIC-stage1_lora b/ckpt/llava-v1.6-mistral-7b-STIC-stage1_lora new file mode 160000 index 0000000000000000000000000000000000000000..50b9a84f6424e2f73427a2ce139d4666a7be01f9 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC-stage1_lora @@ -0,0 +1 @@ +Subproject commit 50b9a84f6424e2f73427a2ce139d4666a7be01f9 diff --git a/ckpt/llava-v1.6-mistral-7b-STIC_lora b/ckpt/llava-v1.6-mistral-7b-STIC_lora new file mode 160000 index 0000000000000000000000000000000000000000..c890150aca9b95b185967cb6105b7e2db35b00f5 --- /dev/null +++ b/ckpt/llava-v1.6-mistral-7b-STIC_lora @@ -0,0 +1 @@ +Subproject commit c890150aca9b95b185967cb6105b7e2db35b00f5 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0581ff28884c2d94b7a1c63e346940bedb368806 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5c668f5f3a82724681b7c67a0451177b50fa0952173becdde6636e9351d4c9 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f02bbb61b73abdec3a8c3681d7d69850d18bb17f --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94cbce3e03403604e617ff52fe26cf8d01344bba71ce44c9955ab626686c88f0 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a33fda696a37308817cfee35846b3fc95325853e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d85543c455f63f0f2563bae9d1681d4a8ca37992e44f131b279b3033854e887 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf3fddd80568d8673272a778f38eb0d839e347d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade0e1a6531fa0082f7cd442f2bffda6f6993d92245a7fefb0750fee39056ce1 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7688757dc6cb318668d2493a2771459d8a440608 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a30ed34ee81868a59fac07d1e90ca2296a89e76f235ee948fd4bf899d2c84fd +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efe735e16f8cf7c1510bb06b74a7f6366a28ef0e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6773bc8b12e20219d2f0c2a6ad1aa2fc5558dcf177f9cf0e304352b6bf40c19 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_concat_new_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..619d9139c4b5bc927bbfa663aef30148451a81df --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bbbeac69e14659932de6d6bcbc096d9784882542cd73fd4c766079b40d50a3 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06b5d0cad6445980f53e33a8ef8f9af193a674a0 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eeb4755e0b87f559214b7c82be493c503b6e4761b498358dc35500442b3785c +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..461dcf4afe4cfb40785283f8105b1510b7357a37 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb67593c8a105af783a88b19fe91e14e6a94b987404321aae51387c6afcb284 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e25180d2d17afab8f6015f2b40c05cd741854f6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4866366ab971e21d24daaa14a1f1dbc64ef9a2a3cb50feafaca3968bd61e44ff +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4039bc638e4b8620c5fa7bebe28e7f80fb8ada6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a0742a4a7f6fad88ab36c54fd325faf22a8fbee350ea58c079d33bb0b231f1 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5eb789f1ad884679c80d9dfd410bd2d3e3d12ff --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a542e97da902a856c12c86131134105c701b0a85e50a638ce99fc1f28bba392 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_2_new_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4597aa5ce979b3b7b9c4a4eb07fafa14a9a7334d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be69230f78d7a09cc15ccee81ee39a7659d330149335f86243f8ed5b339d261c +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7311888ec953e5f47e398ac74de20549008f6aa8 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b149acacea252c225de40dfb8e6e955b2579cc033ef2750f8d151195013fff +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e55cfaea8c7aa938ebfc9723b2477697b2a8962c --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207b025ede971f439debbc5f4ac893949f06e8e43c10947cf135a5f315ad2567 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..984723f5d26305b69d8052a81664deb8af2e58ee --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89824231c8e216ca69fc1609e5262068916d32ad8d7d0a5480d3546c5b420fa4 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d88557aaf141c5682874e325a2f1d123146e5f6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b72e17a4a9b37e9979affe07d500c2e1af26ef6e599a52c506c50686695129 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e99fb42f034d8b851f3b905d4155d20b82338752 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eef65aa007fe90a86a17ca0e5215266254d7e53a3aecff71847bee1572cd0b8 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-Iter1_lora_2ep/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b004d6ea32f5de0c61b5f1263c72b90a89cb3df --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72aaa716364a0cdc0dff311dc0f77048f5ec0eae51f5d3110f9581526264fe27 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90bf1e163f2ae65d5769e4548ef611957a5e570c --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:330711549888cc6bc7ad1d953d37305cc51e905f482a0c33315b2925c173632c +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..64e41244b2f03efb9e33a0fe3abf8572b653b0b9 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cda84022df82ac60edf9c7fa86184e734aeaa0936a04b16b60fd5e050929ddb +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-gt-judge/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..039be5e3087aeb5886a8e8b795e2bda3b7fe5990 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08da9f55ee54918011574c77a7e0fe249274fe91a24889e80ee0d9e42b59c5c9 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0492421feb43325b822f23521f2a470ef6f610e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5afe4147ba14654fe4441a8458f6184c479996628731c85a1c4e7a963fae48 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee284814966ad7c7991197cad7377b4c01131bd0 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c59d69b4649ba0a16756c58963fdf359de3bcb63806814bb9a6a74a516c433 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fea19df31e4971a37acc6b0f2dae9f5db3d3a27e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac717590ebac25c356d7b6c4381d4ec7c57f9d1623b1184d24933b889086eaf4 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dace66f902e1a68e25d036542ab114b97cc6a70 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b192fedc70345ce2daf6a449d4fe3a7286c7f5b5efe47079b5c5977c16efd170 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..349082a405c8594d34138679ab2511e8be17c881 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c068d23d42c79ba5ed6f919fab4de16fdc17ad527e091deca2722029a5d7e6e +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_new/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..643c153f4e4ab1e4f53093e5d2e2be6be691528d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bae935aa2da951627e6edaadb36ed43ef4a6bb455ea30ae377a9499eb05869 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d31e90474b9297e9cde28ac08455abb0adb4726 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7744533c669e8edbbe5d404df2f37d4321d431070bddb3754a188c95ff6e6814 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdd0f5e5df62e8ef7578a20f484442ecee96a608 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696161835b28ddc31674d60bc48dbc295de6fc47eec785ca00a5ee55cf729329 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5104cf030e97ad1902bfcc7af4a887a83076817 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55cfaff9b0c2ed246a7300cd0aa61d3f9e8a78ae59594682e20d028d66337130 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..669e86d59d524e957f44a05eec15d2062c76d314 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9a4290ac74fb60ed1cb168e21b0f559ed69274958f149aeed5df7046e352fd +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d351cff272ba61e766a2c28968b346de48e2d9b0 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2bc8eb51a3f8abb98c10d1414fe6be46da5a2c9a6e8ff096d15514ebef13ccb +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-judge_v2_2ep/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32471f8c66b520593b9503a2616b68f766922c5c --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4ceeb51acc409b6f65a94319c6485dc601c4a12dc975ee76d86890b2162c5a +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55d5906a4ef84f80b96fc48e61c8dea22ab65000 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76eccf6ab1317da332b4ea49666cb46c3ae6edbb4bed60ea38655f16565ce94 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b6fb227229c79983ac41a666106e6b0b050e17f --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9684ed9e97b720efeea6a368a32551988d61dd83bb9fc624b5b4be17d1b2e4b1 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1bcae5bd1f9391584da20e53edd35ab89ce9149d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a77c639bc1cef5c114e023e2f3c8b589ae52233633f401e59eef5ae126aea9 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d9908a0c63c05de4e9fa70ee673f5ea8c6d026d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d774d94eac3fb9072397242456f0fde6721aea0e854e700a3409deb66b64691 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9192f5e6ec98a2459c90cccfde75f4b643b132e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8becff71e9cf39a32d38088584bb17cbc067e45342d82c85f5a3fb08986ff9f +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_concat_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5207315c82535f9100522f2154afa10df0bb5073 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02885e20c3c9a6e6d2b9427f312c8a11fc47bd79379d367b598773b733f5006b +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a2dfa337e9d3806063fb678fd99fbd77ceea891 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80bedb362d9df7ee4fb1bc3955db94374d41927b6d8628d69a430b529172ad62 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58115b728583564d9969ce9ef4bb86d17a4606ea --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e40049a4114f7e98582bdc3ea0ea6393e5e11a9d60d0e99d2a6827deba0b6b +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2_lora/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17fbf671fa815a0dd9a0b9bc0ec81829f9866400 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee9a6a86ab7d75536137fba7f5ee76dd808a403d54ab54eb2ee4e0754d6c4986 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..722dca247977fe74c3b92116e07c314b6626dccd --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb45a7f84e1455ad780434e79314cd4b314a98516da6ae112157f4d36e9a7b9 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d993a6dbebbb991360f4bf9f749a1fa4bcfcd5b6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd323611b08e81839bf0b9a455a03f6f225d7c9d40336b1216993f81a9734fb5 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-Iter1_2ep/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a195db571d5e8fa4e66d56e35b6086b27d7437ba --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038f6eab9d1797a3faeaf8c3a5ccc123038581b1818edda5f0db89b8c8d0fc0e +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec763bb94dddcf3cb5717a594a63d2da4d07e41b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5bcfe113b7b03e22b1628f5b91d7a7871cedf63c7ed698c6895e26843a8c06 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9516d3c6c903b3c3c9f4477485313ce6d63f0120 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd9fc65be156353ea27be02f43fb9db7c57a62c3b3a8e6df16945bcf2517755 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70de140110218836276d4cb8ad5fd237e2c04f98 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caed82993f1f36691d64aeb62a3a6a5606310d4a28e8392b586f6b875db9b9bd +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b7981a93213c1cacfb8508014e66a7faf446046 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f1dd182a1240ed0c623c12fe3df5695022de5e0ab8c41a7110bfa66b715d33 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c99dfa7bdb66a4d32ea7fb986aabccc1680ff85a --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7910e004751aa27d1eb5a83161c16ff1219ba178dd9fa0c5b814ae91b168c9f +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1-judge_new/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98fd0bc143cbac9fcf4416f2f93654a3e8daf2e --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 1048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fea915af59d94568ef0cb41c5f06d75add06ea9 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f27ef05078b0fc06ddcdb40e9b4d6ed4c7873d0f29ccb34844ebc1df40230b3 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c8a50d9b153da0b7c9fda4007149f6dff24a3ab --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b21b64de0c8adbb79cedf3bca307274e1692e39a6a8384d75648f259d39ddfc +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efa9ab4a36e94895a916204dac9841655604cc67 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b18a6bc7d7674bf2d528da8b58561ee9eda07cd97ae388656673e519b7e5847 +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC-stage1/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f01082ed6b22a9c864ec4b3c70ec36424dfaad7b --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/config.json @@ -0,0 +1,73 @@ +{ + "_name_or_path": "liuhaotian/llava-v1.6-mistral-7b", + "architectures": [ + "LlavaLlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "freeze_mm_mlp_adapter": false, + "freeze_mm_vision_resampler": false, + "hidden_act": "silu", + "hidden_size": 4096, + "image_aspect_ratio": "pad", + "image_crop_resolution": 224, + "image_grid_pinpoints": [ + [ + 336, + 672 + ], + [ + 672, + 336 + ], + [ + 672, + 672 + ], + [ + 1008, + 336 + ], + [ + 336, + 1008 + ] + ], + "image_split_resolution": 224, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "mm_hidden_size": 1024, + "mm_patch_merge_type": "flat", + "mm_projector_lr": 2e-05, + "mm_projector_type": "mlp2x_gelu", + "mm_resampler_type": null, + "mm_use_im_patch_token": false, + "mm_use_im_start_end": false, + "mm_vision_select_feature": "patch", + "mm_vision_select_layer": -2, + "mm_vision_tower": "openai/clip-vit-large-patch14-336", + "mm_vision_tower_lr": 2e-06, + "model_type": "llava_llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "tokenizer_model_max_length": 2048, + "tokenizer_padding_side": "right", + "torch_dtype": "float16", + "transformers_version": "4.37.2", + "tune_mm_mlp_adapter": false, + "tune_mm_vision_resampler": false, + "unfreeze_mm_vision_tower": true, + "use_cache": true, + "use_mm_proj": true, + "vocab_size": 32000 +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/generation_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..69b7806611a4865cd48c3e991dbd7d8312e0c5d3 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.2" +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00001-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0069706743708d71261016d4a488e89c2766d62 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fff0bd4ae4268f294a5102d9b8ca77dd9b67f3dc8787355e8018e97014a0d31 +size 4943162240 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00002-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b6622bda5a51bf608019de6e9b1eabda41c39ad --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42720bd3c3be0ca291ba68d0c0b88401c43a12945e0bbad1469a4768b02ff759 +size 4999819232 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00003-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..760d1b15ed6f24fc2343f4dc8ec1a342b4b65058 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d55f922802d2d9069b94e392c9c926bf9a0feedf0202b88feedceabdcbe4ce +size 4927407880 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00004-of-00004.safetensors b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..386f95afa8c27e4e1fbbfbe1d07c894d3ecb567d --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd131cf8ade9f39ba17b218d832cedd32eb709969ec02aaf1faec69b22830695 +size 262144128 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model.safetensors.index.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..55947b5538de3bfdd41f73730d8dba165e080e94 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/model.safetensors.index.json @@ -0,0 +1,693 @@ +{ + "metadata": { + "total_size": 15132438528 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.mm_projector.0.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.0.weight": "model-00003-of-00004.safetensors", + "model.mm_projector.2.bias": "model-00003-of-00004.safetensors", + "model.mm_projector.2.weight": "model-00003-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.class_embedding": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.bias": "model-00003-of-00004.safetensors", + "model.vision_tower.vision_tower.vision_model.pre_layrnorm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/special_tokens_map.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..8bedc05a6476080d7f473a9da72394f2cee48340 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/tokenizer.model b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/tokenizer_config.json b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..56dd3e8f4369780fc8bccc1c18056808a2a49e68 --- /dev/null +++ b/ckpt/whole_model/llava-v1.6-mistral-7b-STIC/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 4096, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}