yihang7 commited on
Commit
48fa12f
1 Parent(s): 4e5693e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-v0.1
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: zephyr-7b-dpo-lora
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # zephyr-7b-dpo-lora
15
+
16
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.2082
19
+ - Rewards/chosen: 1.3857
20
+ - Rewards/rejected: -0.9066
21
+ - Rewards/accuracies: 0.9414
22
+ - Rewards/margins: 2.2923
23
+ - Logps/rejected: -388.5903
24
+ - Logps/chosen: -238.5479
25
+ - Logits/rejected: -2.7219
26
+ - Logits/chosen: -2.6178
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 4
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 32
50
+ - total_train_batch_size: 128
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 1
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
59
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
60
+ | 0.2019 | 1.0 | 1470 | 0.2082 | 1.3857 | -0.9066 | 0.9414 | 2.2923 | -388.5903 | -238.5479 | -2.7219 | -2.6178 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.35.0
66
+ - Pytorch 2.1.1+cu121
67
+ - Datasets 2.14.6
68
+ - Tokenizers 0.14.1
adapter_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 16,
12
+ "lora_dropout": 0.1,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 64,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "k_proj",
20
+ "o_proj",
21
+ "v_proj",
22
+ "q_proj"
23
+ ],
24
+ "task_type": "CAUSAL_LM"
25
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3cb617f4f26f59b2adac4da811c4ddcf2516a14593f17af805749d5ecbf3f92
3
+ size 218138576
all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.617767333984375,
4
+ "eval_logits/rejected": -2.721874952316284,
5
+ "eval_logps/chosen": -238.54788208007812,
6
+ "eval_logps/rejected": -388.59033203125,
7
+ "eval_loss": 0.20815864205360413,
8
+ "eval_rewards/accuracies": 0.9413930773735046,
9
+ "eval_rewards/chosen": 1.3856867551803589,
10
+ "eval_rewards/margins": 2.292266845703125,
11
+ "eval_rewards/rejected": -0.9065799117088318,
12
+ "eval_runtime": 2791.9998,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 3.403,
15
+ "eval_steps_per_second": 0.426,
16
+ "total_flos": 0.0,
17
+ "train_loss": 0.33413780781687524,
18
+ "train_runtime": 91396.7242,
19
+ "train_samples": 61966,
20
+ "train_samples_per_second": 2.06,
21
+ "train_steps_per_second": 0.016
22
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.617767333984375,
4
+ "eval_logits/rejected": -2.721874952316284,
5
+ "eval_logps/chosen": -238.54788208007812,
6
+ "eval_logps/rejected": -388.59033203125,
7
+ "eval_loss": 0.20815864205360413,
8
+ "eval_rewards/accuracies": 0.9413930773735046,
9
+ "eval_rewards/chosen": 1.3856867551803589,
10
+ "eval_rewards/margins": 2.292266845703125,
11
+ "eval_rewards/rejected": -0.9065799117088318,
12
+ "eval_runtime": 2791.9998,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 3.403,
15
+ "eval_steps_per_second": 0.426
16
+ }
runs/Jan05_23-27-01_amaterasu/events.out.tfevents.1704497299.amaterasu.1124388.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c179ca7521c85f957d90d16cd0c6ef86a67e11a79827f4faf5b07c8638edaafc
3
+ size 4424
runs/Jan05_23-32-55_amaterasu/events.out.tfevents.1704497659.amaterasu.1124388.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:332afb430f6d83b9d8bc265a65d5b92f9874d1d9335501017307e8445e477b2d
3
+ size 4425
runs/Jan05_23-39-40_amaterasu/events.out.tfevents.1704498064.amaterasu.1124388.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec0c4508be3044b5cc6d78e1abe9a6ee56bdfac01ae4072b90a8a834747c0401
3
+ size 4425
runs/Jan05_23-47-10_amaterasu/events.out.tfevents.1704498636.amaterasu.1197664.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a84d4e16949b0246722c71e78a55e444e69fb7126a17d3f5ecbc2b11c2b125de
3
+ size 5604
runs/Jan05_23-47-10_amaterasu/events.out.tfevents.1704499383.amaterasu.1197664.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fc7098474eb2683a998b5e2abe5d75f4b2bb5c2aea866112c592487dda88fa
3
+ size 4397
runs/Jan06_00-04-38_amaterasu/events.out.tfevents.1704499547.amaterasu.1204821.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6b0f195488abb9e5a95a9199b022def032d8bbebe187137dc58b1b2dec4655
3
+ size 4397
runs/Jan06_00-08-44_amaterasu/events.out.tfevents.1704499811.amaterasu.1206551.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2660a0abf6e7d06d2e70e04b500b3e4a3140fc44a19cbc6575a943320566ae5c
3
+ size 4424
runs/Jan06_00-12-03_amaterasu/events.out.tfevents.1704500027.amaterasu.1207814.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797411fc6fd5c3684040b9ed4898970d6056bcf5cf9679c11528e7b20b02cb11
3
+ size 99141
runs/Jan06_00-12-03_amaterasu/events.out.tfevents.1704594215.amaterasu.1207814.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0520ffa6df0acf10079889746643a4f85aaebd2de7e049fc0b920c5ed6e852e9
3
+ size 828
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "bos_token": "<s>",
30
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": true,
34
+ "model_max_length": 2048,
35
+ "pad_token": "</s>",
36
+ "sp_model_kwargs": {},
37
+ "spaces_between_special_tokens": false,
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": true
41
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.33413780781687524,
5
+ "train_runtime": 91396.7242,
6
+ "train_samples": 61966,
7
+ "train_samples_per_second": 2.06,
8
+ "train_steps_per_second": 0.016
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,2116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9993414204074695,
5
+ "eval_steps": 100,
6
+ "global_step": 1470,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 3.4013605442176867e-09,
14
+ "logits/chosen": -2.8035497665405273,
15
+ "logits/rejected": -2.7962629795074463,
16
+ "logps/chosen": -211.36532592773438,
17
+ "logps/rejected": -294.74530029296875,
18
+ "loss": 0.693,
19
+ "rewards/accuracies": 0.2265625,
20
+ "rewards/chosen": 0.0010320872534066439,
21
+ "rewards/margins": 0.0005493065109476447,
22
+ "rewards/rejected": 0.0004827805096283555,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 3.4013605442176873e-08,
28
+ "logits/chosen": -2.7791833877563477,
29
+ "logits/rejected": -2.804030418395996,
30
+ "logps/chosen": -240.9124298095703,
31
+ "logps/rejected": -369.5000305175781,
32
+ "loss": 0.6926,
33
+ "rewards/accuracies": 0.4696180522441864,
34
+ "rewards/chosen": 0.00038262151065282524,
35
+ "rewards/margins": 0.0016919042682275176,
36
+ "rewards/rejected": -0.00130928261205554,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.01,
41
+ "learning_rate": 6.802721088435375e-08,
42
+ "logits/chosen": -2.7648768424987793,
43
+ "logits/rejected": -2.78273606300354,
44
+ "logps/chosen": -245.15121459960938,
45
+ "logps/rejected": -350.14898681640625,
46
+ "loss": 0.6932,
47
+ "rewards/accuracies": 0.5023437738418579,
48
+ "rewards/chosen": 0.0015446910401806235,
49
+ "rewards/margins": 0.0005673653213307261,
50
+ "rewards/rejected": 0.000977325951680541,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.02,
55
+ "learning_rate": 1.0204081632653061e-07,
56
+ "logits/chosen": -2.8178772926330566,
57
+ "logits/rejected": -2.786083221435547,
58
+ "logps/chosen": -240.51516723632812,
59
+ "logps/rejected": -352.41339111328125,
60
+ "loss": 0.6929,
61
+ "rewards/accuracies": 0.508593738079071,
62
+ "rewards/chosen": 0.0015831931959837675,
63
+ "rewards/margins": 0.001221821061335504,
64
+ "rewards/rejected": 0.00036137248389422894,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.03,
69
+ "learning_rate": 1.360544217687075e-07,
70
+ "logits/chosen": -2.803492307662964,
71
+ "logits/rejected": -2.7716286182403564,
72
+ "logps/chosen": -235.7887725830078,
73
+ "logps/rejected": -359.8059997558594,
74
+ "loss": 0.6912,
75
+ "rewards/accuracies": 0.5234375,
76
+ "rewards/chosen": 0.0033938586711883545,
77
+ "rewards/margins": 0.004743899218738079,
78
+ "rewards/rejected": -0.001350040198303759,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.03,
83
+ "learning_rate": 1.7006802721088434e-07,
84
+ "logits/chosen": -2.8103866577148438,
85
+ "logits/rejected": -2.803828716278076,
86
+ "logps/chosen": -245.4801483154297,
87
+ "logps/rejected": -306.09783935546875,
88
+ "loss": 0.6899,
89
+ "rewards/accuracies": 0.5640624761581421,
90
+ "rewards/chosen": 0.00749587407335639,
91
+ "rewards/margins": 0.007244518492370844,
92
+ "rewards/rejected": 0.00025135590112768114,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.04,
97
+ "learning_rate": 2.0408163265306121e-07,
98
+ "logits/chosen": -2.7881524562835693,
99
+ "logits/rejected": -2.808814525604248,
100
+ "logps/chosen": -269.1226501464844,
101
+ "logps/rejected": -339.7763977050781,
102
+ "loss": 0.6874,
103
+ "rewards/accuracies": 0.6015625,
104
+ "rewards/chosen": 0.011595133692026138,
105
+ "rewards/margins": 0.012225830927491188,
106
+ "rewards/rejected": -0.000630697060842067,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.05,
111
+ "learning_rate": 2.3809523809523806e-07,
112
+ "logits/chosen": -2.7982544898986816,
113
+ "logits/rejected": -2.765774726867676,
114
+ "logps/chosen": -258.89117431640625,
115
+ "logps/rejected": -372.06451416015625,
116
+ "loss": 0.6864,
117
+ "rewards/accuracies": 0.6148437261581421,
118
+ "rewards/chosen": 0.013396549038589,
119
+ "rewards/margins": 0.014446373097598553,
120
+ "rewards/rejected": -0.0010498259216547012,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.05,
125
+ "learning_rate": 2.72108843537415e-07,
126
+ "logits/chosen": -2.8109402656555176,
127
+ "logits/rejected": -2.7843804359436035,
128
+ "logps/chosen": -244.15817260742188,
129
+ "logps/rejected": -369.6734313964844,
130
+ "loss": 0.6821,
131
+ "rewards/accuracies": 0.6742187738418579,
132
+ "rewards/chosen": 0.02298940345644951,
133
+ "rewards/margins": 0.023194540292024612,
134
+ "rewards/rejected": -0.0002051351184491068,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.06,
139
+ "learning_rate": 3.0612244897959183e-07,
140
+ "logits/chosen": -2.8090157508850098,
141
+ "logits/rejected": -2.7707672119140625,
142
+ "logps/chosen": -222.1091766357422,
143
+ "logps/rejected": -365.6192321777344,
144
+ "loss": 0.6766,
145
+ "rewards/accuracies": 0.7242187261581421,
146
+ "rewards/chosen": 0.030939970165491104,
147
+ "rewards/margins": 0.03436826914548874,
148
+ "rewards/rejected": -0.0034283031709492207,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.07,
153
+ "learning_rate": 3.401360544217687e-07,
154
+ "logits/chosen": -2.7735049724578857,
155
+ "logits/rejected": -2.7935452461242676,
156
+ "logps/chosen": -251.73049926757812,
157
+ "logps/rejected": -388.00115966796875,
158
+ "loss": 0.6728,
159
+ "rewards/accuracies": 0.735156238079071,
160
+ "rewards/chosen": 0.03729977086186409,
161
+ "rewards/margins": 0.04232599213719368,
162
+ "rewards/rejected": -0.0050262222066521645,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.07,
167
+ "learning_rate": 3.741496598639456e-07,
168
+ "logits/chosen": -2.797628164291382,
169
+ "logits/rejected": -2.784834384918213,
170
+ "logps/chosen": -255.72265625,
171
+ "logps/rejected": -349.15985107421875,
172
+ "loss": 0.6651,
173
+ "rewards/accuracies": 0.788281261920929,
174
+ "rewards/chosen": 0.05175922438502312,
175
+ "rewards/margins": 0.05847715586423874,
176
+ "rewards/rejected": -0.00671793520450592,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.08,
181
+ "learning_rate": 4.0816326530612243e-07,
182
+ "logits/chosen": -2.7973737716674805,
183
+ "logits/rejected": -2.7825686931610107,
184
+ "logps/chosen": -252.3303985595703,
185
+ "logps/rejected": -348.4207458496094,
186
+ "loss": 0.6604,
187
+ "rewards/accuracies": 0.813281238079071,
188
+ "rewards/chosen": 0.06004839017987251,
189
+ "rewards/margins": 0.06873828917741776,
190
+ "rewards/rejected": -0.008689895272254944,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.09,
195
+ "learning_rate": 4.421768707482993e-07,
196
+ "logits/chosen": -2.7856903076171875,
197
+ "logits/rejected": -2.8103625774383545,
198
+ "logps/chosen": -248.4453125,
199
+ "logps/rejected": -316.520263671875,
200
+ "loss": 0.6528,
201
+ "rewards/accuracies": 0.8179687261581421,
202
+ "rewards/chosen": 0.07609430700540543,
203
+ "rewards/margins": 0.08578468859195709,
204
+ "rewards/rejected": -0.00969038438051939,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.1,
209
+ "learning_rate": 4.761904761904761e-07,
210
+ "logits/chosen": -2.7964794635772705,
211
+ "logits/rejected": -2.8038413524627686,
212
+ "logps/chosen": -251.0780029296875,
213
+ "logps/rejected": -380.4024353027344,
214
+ "loss": 0.6409,
215
+ "rewards/accuracies": 0.842968761920929,
216
+ "rewards/chosen": 0.10089793056249619,
217
+ "rewards/margins": 0.11140058934688568,
218
+ "rewards/rejected": -0.010502668097615242,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.1,
223
+ "learning_rate": 4.988662131519274e-07,
224
+ "logits/chosen": -2.7733452320098877,
225
+ "logits/rejected": -2.799926280975342,
226
+ "logps/chosen": -259.34686279296875,
227
+ "logps/rejected": -335.1527404785156,
228
+ "loss": 0.6297,
229
+ "rewards/accuracies": 0.8539062738418579,
230
+ "rewards/chosen": 0.12008102238178253,
231
+ "rewards/margins": 0.13700444996356964,
232
+ "rewards/rejected": -0.016923416405916214,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.11,
237
+ "learning_rate": 4.950869236583522e-07,
238
+ "logits/chosen": -2.774165153503418,
239
+ "logits/rejected": -2.7881526947021484,
240
+ "logps/chosen": -245.5338134765625,
241
+ "logps/rejected": -338.31597900390625,
242
+ "loss": 0.6201,
243
+ "rewards/accuracies": 0.8687499761581421,
244
+ "rewards/chosen": 0.14051470160484314,
245
+ "rewards/margins": 0.1599283218383789,
246
+ "rewards/rejected": -0.019413620233535767,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.12,
251
+ "learning_rate": 4.91307634164777e-07,
252
+ "logits/chosen": -2.811603546142578,
253
+ "logits/rejected": -2.8174936771392822,
254
+ "logps/chosen": -260.7558898925781,
255
+ "logps/rejected": -356.88153076171875,
256
+ "loss": 0.6041,
257
+ "rewards/accuracies": 0.875,
258
+ "rewards/chosen": 0.1749168038368225,
259
+ "rewards/margins": 0.19711166620254517,
260
+ "rewards/rejected": -0.022194867953658104,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.12,
265
+ "learning_rate": 4.875283446712018e-07,
266
+ "logits/chosen": -2.7915146350860596,
267
+ "logits/rejected": -2.7889480590820312,
268
+ "logps/chosen": -264.36138916015625,
269
+ "logps/rejected": -353.7435607910156,
270
+ "loss": 0.5926,
271
+ "rewards/accuracies": 0.883593738079071,
272
+ "rewards/chosen": 0.19911792874336243,
273
+ "rewards/margins": 0.22633683681488037,
274
+ "rewards/rejected": -0.02721891924738884,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.13,
279
+ "learning_rate": 4.837490551776266e-07,
280
+ "logits/chosen": -2.7990036010742188,
281
+ "logits/rejected": -2.7916808128356934,
282
+ "logps/chosen": -257.4069519042969,
283
+ "logps/rejected": -372.6297302246094,
284
+ "loss": 0.5799,
285
+ "rewards/accuracies": 0.887499988079071,
286
+ "rewards/chosen": 0.22631244361400604,
287
+ "rewards/margins": 0.2581940293312073,
288
+ "rewards/rejected": -0.031881578266620636,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.14,
293
+ "learning_rate": 4.799697656840514e-07,
294
+ "logits/chosen": -2.7753312587738037,
295
+ "logits/rejected": -2.7730696201324463,
296
+ "logps/chosen": -259.2568054199219,
297
+ "logps/rejected": -390.26995849609375,
298
+ "loss": 0.564,
299
+ "rewards/accuracies": 0.889843761920929,
300
+ "rewards/chosen": 0.25861743092536926,
301
+ "rewards/margins": 0.30055442452430725,
302
+ "rewards/rejected": -0.0419369637966156,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.14,
307
+ "learning_rate": 4.761904761904761e-07,
308
+ "logits/chosen": -2.7830989360809326,
309
+ "logits/rejected": -2.7885472774505615,
310
+ "logps/chosen": -229.49685668945312,
311
+ "logps/rejected": -346.35784912109375,
312
+ "loss": 0.5551,
313
+ "rewards/accuracies": 0.91015625,
314
+ "rewards/chosen": 0.28561651706695557,
315
+ "rewards/margins": 0.32180091738700867,
316
+ "rewards/rejected": -0.03618443384766579,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.15,
321
+ "learning_rate": 4.7241118669690096e-07,
322
+ "logits/chosen": -2.7914628982543945,
323
+ "logits/rejected": -2.7812819480895996,
324
+ "logps/chosen": -277.1968078613281,
325
+ "logps/rejected": -334.34124755859375,
326
+ "loss": 0.5473,
327
+ "rewards/accuracies": 0.8999999761581421,
328
+ "rewards/chosen": 0.30997538566589355,
329
+ "rewards/margins": 0.3486320972442627,
330
+ "rewards/rejected": -0.038656704127788544,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.16,
335
+ "learning_rate": 4.6863189720332574e-07,
336
+ "logits/chosen": -2.7915186882019043,
337
+ "logits/rejected": -2.7635109424591064,
338
+ "logps/chosen": -230.6345672607422,
339
+ "logps/rejected": -366.45855712890625,
340
+ "loss": 0.5283,
341
+ "rewards/accuracies": 0.901562511920929,
342
+ "rewards/chosen": 0.3480406403541565,
343
+ "rewards/margins": 0.3980127274990082,
344
+ "rewards/rejected": -0.04997207969427109,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.16,
349
+ "learning_rate": 4.648526077097505e-07,
350
+ "logits/chosen": -2.8176677227020264,
351
+ "logits/rejected": -2.8094589710235596,
352
+ "logps/chosen": -255.73318481445312,
353
+ "logps/rejected": -356.473876953125,
354
+ "loss": 0.5141,
355
+ "rewards/accuracies": 0.905468761920929,
356
+ "rewards/chosen": 0.38035809993743896,
357
+ "rewards/margins": 0.4426742494106293,
358
+ "rewards/rejected": -0.062316179275512695,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.17,
363
+ "learning_rate": 4.6107331821617536e-07,
364
+ "logits/chosen": -2.778831958770752,
365
+ "logits/rejected": -2.7532734870910645,
366
+ "logps/chosen": -260.0787658691406,
367
+ "logps/rejected": -382.69403076171875,
368
+ "loss": 0.5037,
369
+ "rewards/accuracies": 0.9078124761581421,
370
+ "rewards/chosen": 0.4094300866127014,
371
+ "rewards/margins": 0.4735100269317627,
372
+ "rewards/rejected": -0.06407993286848068,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.18,
377
+ "learning_rate": 4.5729402872260014e-07,
378
+ "logits/chosen": -2.7875959873199463,
379
+ "logits/rejected": -2.789522647857666,
380
+ "logps/chosen": -245.36215209960938,
381
+ "logps/rejected": -398.8630676269531,
382
+ "loss": 0.4946,
383
+ "rewards/accuracies": 0.897656261920929,
384
+ "rewards/chosen": 0.43164581060409546,
385
+ "rewards/margins": 0.506696879863739,
386
+ "rewards/rejected": -0.07505108416080475,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.18,
391
+ "learning_rate": 4.535147392290249e-07,
392
+ "logits/chosen": -2.7784600257873535,
393
+ "logits/rejected": -2.743320941925049,
394
+ "logps/chosen": -240.0518035888672,
395
+ "logps/rejected": -373.5130920410156,
396
+ "loss": 0.4891,
397
+ "rewards/accuracies": 0.89453125,
398
+ "rewards/chosen": 0.45601949095726013,
399
+ "rewards/margins": 0.5297552347183228,
400
+ "rewards/rejected": -0.07373576611280441,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.19,
405
+ "learning_rate": 4.497354497354497e-07,
406
+ "logits/chosen": -2.777036190032959,
407
+ "logits/rejected": -2.7678191661834717,
408
+ "logps/chosen": -264.9656677246094,
409
+ "logps/rejected": -373.12042236328125,
410
+ "loss": 0.4766,
411
+ "rewards/accuracies": 0.9156249761581421,
412
+ "rewards/chosen": 0.47401291131973267,
413
+ "rewards/margins": 0.5673891305923462,
414
+ "rewards/rejected": -0.09337621927261353,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.2,
419
+ "learning_rate": 4.459561602418745e-07,
420
+ "logits/chosen": -2.7813751697540283,
421
+ "logits/rejected": -2.7827224731445312,
422
+ "logps/chosen": -239.7397918701172,
423
+ "logps/rejected": -392.6272888183594,
424
+ "loss": 0.4603,
425
+ "rewards/accuracies": 0.9117187261581421,
426
+ "rewards/chosen": 0.5112585425376892,
427
+ "rewards/margins": 0.6238077878952026,
428
+ "rewards/rejected": -0.11254926025867462,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.2,
433
+ "learning_rate": 4.421768707482993e-07,
434
+ "logits/chosen": -2.784381628036499,
435
+ "logits/rejected": -2.7823455333709717,
436
+ "logps/chosen": -247.23696899414062,
437
+ "logps/rejected": -340.01971435546875,
438
+ "loss": 0.4569,
439
+ "rewards/accuracies": 0.909375011920929,
440
+ "rewards/chosen": 0.5431731939315796,
441
+ "rewards/margins": 0.6343038082122803,
442
+ "rewards/rejected": -0.09113059937953949,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.21,
447
+ "learning_rate": 4.383975812547241e-07,
448
+ "logits/chosen": -2.7919013500213623,
449
+ "logits/rejected": -2.7927372455596924,
450
+ "logps/chosen": -244.9982147216797,
451
+ "logps/rejected": -345.5526428222656,
452
+ "loss": 0.4422,
453
+ "rewards/accuracies": 0.922656238079071,
454
+ "rewards/chosen": 0.5760600566864014,
455
+ "rewards/margins": 0.6899352669715881,
456
+ "rewards/rejected": -0.11387525498867035,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 0.22,
461
+ "learning_rate": 4.346182917611489e-07,
462
+ "logits/chosen": -2.786698341369629,
463
+ "logits/rejected": -2.7934978008270264,
464
+ "logps/chosen": -255.37142944335938,
465
+ "logps/rejected": -399.12957763671875,
466
+ "loss": 0.4344,
467
+ "rewards/accuracies": 0.9125000238418579,
468
+ "rewards/chosen": 0.5843140482902527,
469
+ "rewards/margins": 0.7244275808334351,
470
+ "rewards/rejected": -0.1401134431362152,
471
+ "step": 320
472
+ },
473
+ {
474
+ "epoch": 0.22,
475
+ "learning_rate": 4.308390022675737e-07,
476
+ "logits/chosen": -2.7745113372802734,
477
+ "logits/rejected": -2.7805206775665283,
478
+ "logps/chosen": -252.92514038085938,
479
+ "logps/rejected": -392.51165771484375,
480
+ "loss": 0.4332,
481
+ "rewards/accuracies": 0.9046875238418579,
482
+ "rewards/chosen": 0.5970828533172607,
483
+ "rewards/margins": 0.7323796153068542,
484
+ "rewards/rejected": -0.1352967619895935,
485
+ "step": 330
486
+ },
487
+ {
488
+ "epoch": 0.23,
489
+ "learning_rate": 4.270597127739985e-07,
490
+ "logits/chosen": -2.783926486968994,
491
+ "logits/rejected": -2.7875866889953613,
492
+ "logps/chosen": -250.8353729248047,
493
+ "logps/rejected": -335.03265380859375,
494
+ "loss": 0.4175,
495
+ "rewards/accuracies": 0.9164062738418579,
496
+ "rewards/chosen": 0.6492675542831421,
497
+ "rewards/margins": 0.7875067591667175,
498
+ "rewards/rejected": -0.13823917508125305,
499
+ "step": 340
500
+ },
501
+ {
502
+ "epoch": 0.24,
503
+ "learning_rate": 4.2328042328042324e-07,
504
+ "logits/chosen": -2.7828190326690674,
505
+ "logits/rejected": -2.772052764892578,
506
+ "logps/chosen": -236.33706665039062,
507
+ "logps/rejected": -370.28399658203125,
508
+ "loss": 0.4152,
509
+ "rewards/accuracies": 0.9242187738418579,
510
+ "rewards/chosen": 0.662378191947937,
511
+ "rewards/margins": 0.7986767888069153,
512
+ "rewards/rejected": -0.13629861176013947,
513
+ "step": 350
514
+ },
515
+ {
516
+ "epoch": 0.24,
517
+ "learning_rate": 4.19501133786848e-07,
518
+ "logits/chosen": -2.780648946762085,
519
+ "logits/rejected": -2.771820545196533,
520
+ "logps/chosen": -228.22445678710938,
521
+ "logps/rejected": -390.63751220703125,
522
+ "loss": 0.4051,
523
+ "rewards/accuracies": 0.9140625,
524
+ "rewards/chosen": 0.6962443590164185,
525
+ "rewards/margins": 0.8446155786514282,
526
+ "rewards/rejected": -0.1483711302280426,
527
+ "step": 360
528
+ },
529
+ {
530
+ "epoch": 0.25,
531
+ "learning_rate": 4.1572184429327286e-07,
532
+ "logits/chosen": -2.8088645935058594,
533
+ "logits/rejected": -2.7826154232025146,
534
+ "logps/chosen": -255.2318572998047,
535
+ "logps/rejected": -344.69183349609375,
536
+ "loss": 0.3908,
537
+ "rewards/accuracies": 0.9203125238418579,
538
+ "rewards/chosen": 0.7306076288223267,
539
+ "rewards/margins": 0.900040328502655,
540
+ "rewards/rejected": -0.16943258047103882,
541
+ "step": 370
542
+ },
543
+ {
544
+ "epoch": 0.26,
545
+ "learning_rate": 4.1194255479969764e-07,
546
+ "logits/chosen": -2.7837393283843994,
547
+ "logits/rejected": -2.754739284515381,
548
+ "logps/chosen": -252.39779663085938,
549
+ "logps/rejected": -347.7734069824219,
550
+ "loss": 0.4019,
551
+ "rewards/accuracies": 0.907031238079071,
552
+ "rewards/chosen": 0.7146260738372803,
553
+ "rewards/margins": 0.8642898797988892,
554
+ "rewards/rejected": -0.14966385066509247,
555
+ "step": 380
556
+ },
557
+ {
558
+ "epoch": 0.27,
559
+ "learning_rate": 4.0816326530612243e-07,
560
+ "logits/chosen": -2.793994426727295,
561
+ "logits/rejected": -2.789456605911255,
562
+ "logps/chosen": -250.083984375,
563
+ "logps/rejected": -345.2536315917969,
564
+ "loss": 0.3843,
565
+ "rewards/accuracies": 0.9203125238418579,
566
+ "rewards/chosen": 0.760775089263916,
567
+ "rewards/margins": 0.9284068942070007,
568
+ "rewards/rejected": -0.1676318198442459,
569
+ "step": 390
570
+ },
571
+ {
572
+ "epoch": 0.27,
573
+ "learning_rate": 4.0438397581254726e-07,
574
+ "logits/chosen": -2.7863235473632812,
575
+ "logits/rejected": -2.7660741806030273,
576
+ "logps/chosen": -243.2860565185547,
577
+ "logps/rejected": -375.15283203125,
578
+ "loss": 0.3736,
579
+ "rewards/accuracies": 0.9195312261581421,
580
+ "rewards/chosen": 0.7728086113929749,
581
+ "rewards/margins": 0.9798704385757446,
582
+ "rewards/rejected": -0.20706184208393097,
583
+ "step": 400
584
+ },
585
+ {
586
+ "epoch": 0.28,
587
+ "learning_rate": 4.0060468631897205e-07,
588
+ "logits/chosen": -2.7740797996520996,
589
+ "logits/rejected": -2.787078857421875,
590
+ "logps/chosen": -231.3814239501953,
591
+ "logps/rejected": -373.4275817871094,
592
+ "loss": 0.3779,
593
+ "rewards/accuracies": 0.9140625,
594
+ "rewards/chosen": 0.786165177822113,
595
+ "rewards/margins": 0.9645744562149048,
596
+ "rewards/rejected": -0.1784091293811798,
597
+ "step": 410
598
+ },
599
+ {
600
+ "epoch": 0.29,
601
+ "learning_rate": 3.968253968253968e-07,
602
+ "logits/chosen": -2.7854466438293457,
603
+ "logits/rejected": -2.782599449157715,
604
+ "logps/chosen": -234.27853393554688,
605
+ "logps/rejected": -341.40106201171875,
606
+ "loss": 0.3758,
607
+ "rewards/accuracies": 0.9125000238418579,
608
+ "rewards/chosen": 0.8017100095748901,
609
+ "rewards/margins": 0.9820283651351929,
610
+ "rewards/rejected": -0.1803184449672699,
611
+ "step": 420
612
+ },
613
+ {
614
+ "epoch": 0.29,
615
+ "learning_rate": 3.930461073318216e-07,
616
+ "logits/chosen": -2.7634427547454834,
617
+ "logits/rejected": -2.7768495082855225,
618
+ "logps/chosen": -230.73318481445312,
619
+ "logps/rejected": -427.71917724609375,
620
+ "loss": 0.3665,
621
+ "rewards/accuracies": 0.91796875,
622
+ "rewards/chosen": 0.8091424703598022,
623
+ "rewards/margins": 1.025179386138916,
624
+ "rewards/rejected": -0.2160368263721466,
625
+ "step": 430
626
+ },
627
+ {
628
+ "epoch": 0.3,
629
+ "learning_rate": 3.892668178382464e-07,
630
+ "logits/chosen": -2.774629592895508,
631
+ "logits/rejected": -2.7814247608184814,
632
+ "logps/chosen": -253.4683074951172,
633
+ "logps/rejected": -386.40216064453125,
634
+ "loss": 0.3495,
635
+ "rewards/accuracies": 0.925000011920929,
636
+ "rewards/chosen": 0.8851088285446167,
637
+ "rewards/margins": 1.123652696609497,
638
+ "rewards/rejected": -0.23854386806488037,
639
+ "step": 440
640
+ },
641
+ {
642
+ "epoch": 0.31,
643
+ "learning_rate": 3.854875283446712e-07,
644
+ "logits/chosen": -2.766551971435547,
645
+ "logits/rejected": -2.7709641456604004,
646
+ "logps/chosen": -271.8524475097656,
647
+ "logps/rejected": -379.4809265136719,
648
+ "loss": 0.3575,
649
+ "rewards/accuracies": 0.9125000238418579,
650
+ "rewards/chosen": 0.8432048559188843,
651
+ "rewards/margins": 1.0976295471191406,
652
+ "rewards/rejected": -0.254424512386322,
653
+ "step": 450
654
+ },
655
+ {
656
+ "epoch": 0.31,
657
+ "learning_rate": 3.8170823885109596e-07,
658
+ "logits/chosen": -2.8009865283966064,
659
+ "logits/rejected": -2.7705283164978027,
660
+ "logps/chosen": -241.07632446289062,
661
+ "logps/rejected": -366.87127685546875,
662
+ "loss": 0.3459,
663
+ "rewards/accuracies": 0.9281250238418579,
664
+ "rewards/chosen": 0.9000816345214844,
665
+ "rewards/margins": 1.1292930841445923,
666
+ "rewards/rejected": -0.22921133041381836,
667
+ "step": 460
668
+ },
669
+ {
670
+ "epoch": 0.32,
671
+ "learning_rate": 3.779289493575208e-07,
672
+ "logits/chosen": -2.7855477333068848,
673
+ "logits/rejected": -2.771469831466675,
674
+ "logps/chosen": -248.2216033935547,
675
+ "logps/rejected": -379.58709716796875,
676
+ "loss": 0.3488,
677
+ "rewards/accuracies": 0.913281261920929,
678
+ "rewards/chosen": 0.8979974985122681,
679
+ "rewards/margins": 1.1383633613586426,
680
+ "rewards/rejected": -0.2403658926486969,
681
+ "step": 470
682
+ },
683
+ {
684
+ "epoch": 0.33,
685
+ "learning_rate": 3.741496598639456e-07,
686
+ "logits/chosen": -2.783979892730713,
687
+ "logits/rejected": -2.787400722503662,
688
+ "logps/chosen": -234.78939819335938,
689
+ "logps/rejected": -391.0784912109375,
690
+ "loss": 0.3396,
691
+ "rewards/accuracies": 0.9312499761581421,
692
+ "rewards/chosen": 0.8895782232284546,
693
+ "rewards/margins": 1.1713939905166626,
694
+ "rewards/rejected": -0.281815767288208,
695
+ "step": 480
696
+ },
697
+ {
698
+ "epoch": 0.33,
699
+ "learning_rate": 3.703703703703703e-07,
700
+ "logits/chosen": -2.7796401977539062,
701
+ "logits/rejected": -2.78939151763916,
702
+ "logps/chosen": -255.79556274414062,
703
+ "logps/rejected": -376.7617492675781,
704
+ "loss": 0.3496,
705
+ "rewards/accuracies": 0.91796875,
706
+ "rewards/chosen": 0.8880151510238647,
707
+ "rewards/margins": 1.1511998176574707,
708
+ "rewards/rejected": -0.26318463683128357,
709
+ "step": 490
710
+ },
711
+ {
712
+ "epoch": 0.34,
713
+ "learning_rate": 3.6659108087679515e-07,
714
+ "logits/chosen": -2.784447193145752,
715
+ "logits/rejected": -2.7811279296875,
716
+ "logps/chosen": -240.26943969726562,
717
+ "logps/rejected": -373.43585205078125,
718
+ "loss": 0.3317,
719
+ "rewards/accuracies": 0.9242187738418579,
720
+ "rewards/chosen": 0.9207477569580078,
721
+ "rewards/margins": 1.2141565084457397,
722
+ "rewards/rejected": -0.2934088110923767,
723
+ "step": 500
724
+ },
725
+ {
726
+ "epoch": 0.35,
727
+ "learning_rate": 3.6281179138321993e-07,
728
+ "logits/chosen": -2.7936480045318604,
729
+ "logits/rejected": -2.7741034030914307,
730
+ "logps/chosen": -253.25625610351562,
731
+ "logps/rejected": -388.1740417480469,
732
+ "loss": 0.3307,
733
+ "rewards/accuracies": 0.925000011920929,
734
+ "rewards/chosen": 0.9261225461959839,
735
+ "rewards/margins": 1.2367761135101318,
736
+ "rewards/rejected": -0.310653418302536,
737
+ "step": 510
738
+ },
739
+ {
740
+ "epoch": 0.35,
741
+ "learning_rate": 3.590325018896447e-07,
742
+ "logits/chosen": -2.764971971511841,
743
+ "logits/rejected": -2.779900074005127,
744
+ "logps/chosen": -277.50433349609375,
745
+ "logps/rejected": -390.9405822753906,
746
+ "loss": 0.3301,
747
+ "rewards/accuracies": 0.9203125238418579,
748
+ "rewards/chosen": 0.9403823614120483,
749
+ "rewards/margins": 1.250135898590088,
750
+ "rewards/rejected": -0.3097533881664276,
751
+ "step": 520
752
+ },
753
+ {
754
+ "epoch": 0.36,
755
+ "learning_rate": 3.5525321239606955e-07,
756
+ "logits/chosen": -2.7859063148498535,
757
+ "logits/rejected": -2.7852673530578613,
758
+ "logps/chosen": -240.83847045898438,
759
+ "logps/rejected": -329.5592346191406,
760
+ "loss": 0.3185,
761
+ "rewards/accuracies": 0.9296875,
762
+ "rewards/chosen": 0.998257040977478,
763
+ "rewards/margins": 1.3062750101089478,
764
+ "rewards/rejected": -0.30801790952682495,
765
+ "step": 530
766
+ },
767
+ {
768
+ "epoch": 0.37,
769
+ "learning_rate": 3.5147392290249433e-07,
770
+ "logits/chosen": -2.7856059074401855,
771
+ "logits/rejected": -2.7904558181762695,
772
+ "logps/chosen": -256.13116455078125,
773
+ "logps/rejected": -359.0440673828125,
774
+ "loss": 0.3201,
775
+ "rewards/accuracies": 0.917187511920929,
776
+ "rewards/chosen": 0.9812418222427368,
777
+ "rewards/margins": 1.2980186939239502,
778
+ "rewards/rejected": -0.316776841878891,
779
+ "step": 540
780
+ },
781
+ {
782
+ "epoch": 0.37,
783
+ "learning_rate": 3.4769463340891906e-07,
784
+ "logits/chosen": -2.7746291160583496,
785
+ "logits/rejected": -2.8083655834198,
786
+ "logps/chosen": -243.3596649169922,
787
+ "logps/rejected": -381.6620788574219,
788
+ "loss": 0.321,
789
+ "rewards/accuracies": 0.921093761920929,
790
+ "rewards/chosen": 0.9785689115524292,
791
+ "rewards/margins": 1.3102028369903564,
792
+ "rewards/rejected": -0.33163395524024963,
793
+ "step": 550
794
+ },
795
+ {
796
+ "epoch": 0.38,
797
+ "learning_rate": 3.439153439153439e-07,
798
+ "logits/chosen": -2.788200616836548,
799
+ "logits/rejected": -2.806088924407959,
800
+ "logps/chosen": -243.46371459960938,
801
+ "logps/rejected": -353.0728454589844,
802
+ "loss": 0.3037,
803
+ "rewards/accuracies": 0.9281250238418579,
804
+ "rewards/chosen": 1.0423057079315186,
805
+ "rewards/margins": 1.40134596824646,
806
+ "rewards/rejected": -0.35903996229171753,
807
+ "step": 560
808
+ },
809
+ {
810
+ "epoch": 0.39,
811
+ "learning_rate": 3.401360544217687e-07,
812
+ "logits/chosen": -2.8205642700195312,
813
+ "logits/rejected": -2.75651216506958,
814
+ "logps/chosen": -225.49380493164062,
815
+ "logps/rejected": -383.3102111816406,
816
+ "loss": 0.2961,
817
+ "rewards/accuracies": 0.934374988079071,
818
+ "rewards/chosen": 1.0518951416015625,
819
+ "rewards/margins": 1.416092872619629,
820
+ "rewards/rejected": -0.3641977310180664,
821
+ "step": 570
822
+ },
823
+ {
824
+ "epoch": 0.39,
825
+ "learning_rate": 3.3635676492819346e-07,
826
+ "logits/chosen": -2.778111696243286,
827
+ "logits/rejected": -2.8062729835510254,
828
+ "logps/chosen": -241.8183135986328,
829
+ "logps/rejected": -360.12677001953125,
830
+ "loss": 0.3026,
831
+ "rewards/accuracies": 0.928906261920929,
832
+ "rewards/chosen": 1.0336360931396484,
833
+ "rewards/margins": 1.3975627422332764,
834
+ "rewards/rejected": -0.3639264702796936,
835
+ "step": 580
836
+ },
837
+ {
838
+ "epoch": 0.4,
839
+ "learning_rate": 3.325774754346183e-07,
840
+ "logits/chosen": -2.7760305404663086,
841
+ "logits/rejected": -2.7639145851135254,
842
+ "logps/chosen": -263.2132568359375,
843
+ "logps/rejected": -326.3753356933594,
844
+ "loss": 0.3079,
845
+ "rewards/accuracies": 0.917187511920929,
846
+ "rewards/chosen": 1.0236365795135498,
847
+ "rewards/margins": 1.3882102966308594,
848
+ "rewards/rejected": -0.36457380652427673,
849
+ "step": 590
850
+ },
851
+ {
852
+ "epoch": 0.41,
853
+ "learning_rate": 3.287981859410431e-07,
854
+ "logits/chosen": -2.8040480613708496,
855
+ "logits/rejected": -2.781839370727539,
856
+ "logps/chosen": -232.688720703125,
857
+ "logps/rejected": -341.75372314453125,
858
+ "loss": 0.294,
859
+ "rewards/accuracies": 0.92578125,
860
+ "rewards/chosen": 1.0763448476791382,
861
+ "rewards/margins": 1.459729790687561,
862
+ "rewards/rejected": -0.38338491320610046,
863
+ "step": 600
864
+ },
865
+ {
866
+ "epoch": 0.41,
867
+ "learning_rate": 3.2501889644746787e-07,
868
+ "logits/chosen": -2.797874927520752,
869
+ "logits/rejected": -2.748481512069702,
870
+ "logps/chosen": -232.8326873779297,
871
+ "logps/rejected": -369.7907409667969,
872
+ "loss": 0.2837,
873
+ "rewards/accuracies": 0.938281238079071,
874
+ "rewards/chosen": 1.101806640625,
875
+ "rewards/margins": 1.498957633972168,
876
+ "rewards/rejected": -0.39715105295181274,
877
+ "step": 610
878
+ },
879
+ {
880
+ "epoch": 0.42,
881
+ "learning_rate": 3.2123960695389265e-07,
882
+ "logits/chosen": -2.780925989151001,
883
+ "logits/rejected": -2.735792636871338,
884
+ "logps/chosen": -222.20596313476562,
885
+ "logps/rejected": -380.5815124511719,
886
+ "loss": 0.2935,
887
+ "rewards/accuracies": 0.921093761920929,
888
+ "rewards/chosen": 1.0859084129333496,
889
+ "rewards/margins": 1.4819860458374023,
890
+ "rewards/rejected": -0.39607763290405273,
891
+ "step": 620
892
+ },
893
+ {
894
+ "epoch": 0.43,
895
+ "learning_rate": 3.1746031746031743e-07,
896
+ "logits/chosen": -2.7768056392669678,
897
+ "logits/rejected": -2.764166831970215,
898
+ "logps/chosen": -236.9914093017578,
899
+ "logps/rejected": -345.6325378417969,
900
+ "loss": 0.2895,
901
+ "rewards/accuracies": 0.932812511920929,
902
+ "rewards/chosen": 1.1014459133148193,
903
+ "rewards/margins": 1.5069670677185059,
904
+ "rewards/rejected": -0.40552106499671936,
905
+ "step": 630
906
+ },
907
+ {
908
+ "epoch": 0.44,
909
+ "learning_rate": 3.136810279667422e-07,
910
+ "logits/chosen": -2.7987258434295654,
911
+ "logits/rejected": -2.8054118156433105,
912
+ "logps/chosen": -235.97109985351562,
913
+ "logps/rejected": -330.56439208984375,
914
+ "loss": 0.2775,
915
+ "rewards/accuracies": 0.93359375,
916
+ "rewards/chosen": 1.1580729484558105,
917
+ "rewards/margins": 1.5699806213378906,
918
+ "rewards/rejected": -0.4119076728820801,
919
+ "step": 640
920
+ },
921
+ {
922
+ "epoch": 0.44,
923
+ "learning_rate": 3.0990173847316705e-07,
924
+ "logits/chosen": -2.7858521938323975,
925
+ "logits/rejected": -2.779346466064453,
926
+ "logps/chosen": -257.5158386230469,
927
+ "logps/rejected": -322.25103759765625,
928
+ "loss": 0.287,
929
+ "rewards/accuracies": 0.9195312261581421,
930
+ "rewards/chosen": 1.1325995922088623,
931
+ "rewards/margins": 1.5360453128814697,
932
+ "rewards/rejected": -0.40344563126564026,
933
+ "step": 650
934
+ },
935
+ {
936
+ "epoch": 0.45,
937
+ "learning_rate": 3.0612244897959183e-07,
938
+ "logits/chosen": -2.7976508140563965,
939
+ "logits/rejected": -2.8010151386260986,
940
+ "logps/chosen": -219.1446533203125,
941
+ "logps/rejected": -315.2838439941406,
942
+ "loss": 0.2703,
943
+ "rewards/accuracies": 0.9453125,
944
+ "rewards/chosen": 1.1511547565460205,
945
+ "rewards/margins": 1.5933144092559814,
946
+ "rewards/rejected": -0.44215965270996094,
947
+ "step": 660
948
+ },
949
+ {
950
+ "epoch": 0.46,
951
+ "learning_rate": 3.023431594860166e-07,
952
+ "logits/chosen": -2.767582416534424,
953
+ "logits/rejected": -2.8024327754974365,
954
+ "logps/chosen": -237.21578979492188,
955
+ "logps/rejected": -314.68377685546875,
956
+ "loss": 0.2637,
957
+ "rewards/accuracies": 0.9359375238418579,
958
+ "rewards/chosen": 1.1508355140686035,
959
+ "rewards/margins": 1.6350256204605103,
960
+ "rewards/rejected": -0.48419007658958435,
961
+ "step": 670
962
+ },
963
+ {
964
+ "epoch": 0.46,
965
+ "learning_rate": 2.9856386999244145e-07,
966
+ "logits/chosen": -2.7926082611083984,
967
+ "logits/rejected": -2.780251979827881,
968
+ "logps/chosen": -244.810302734375,
969
+ "logps/rejected": -347.9936828613281,
970
+ "loss": 0.2784,
971
+ "rewards/accuracies": 0.930468738079071,
972
+ "rewards/chosen": 1.1081712245941162,
973
+ "rewards/margins": 1.5819367170333862,
974
+ "rewards/rejected": -0.47376567125320435,
975
+ "step": 680
976
+ },
977
+ {
978
+ "epoch": 0.47,
979
+ "learning_rate": 2.947845804988662e-07,
980
+ "logits/chosen": -2.771953821182251,
981
+ "logits/rejected": -2.768907070159912,
982
+ "logps/chosen": -248.50332641601562,
983
+ "logps/rejected": -360.6126403808594,
984
+ "loss": 0.2897,
985
+ "rewards/accuracies": 0.921875,
986
+ "rewards/chosen": 1.0936378240585327,
987
+ "rewards/margins": 1.5781736373901367,
988
+ "rewards/rejected": -0.4845358729362488,
989
+ "step": 690
990
+ },
991
+ {
992
+ "epoch": 0.48,
993
+ "learning_rate": 2.9100529100529097e-07,
994
+ "logits/chosen": -2.7748546600341797,
995
+ "logits/rejected": -2.7857470512390137,
996
+ "logps/chosen": -227.1557159423828,
997
+ "logps/rejected": -390.3030700683594,
998
+ "loss": 0.2597,
999
+ "rewards/accuracies": 0.9320312738418579,
1000
+ "rewards/chosen": 1.1781264543533325,
1001
+ "rewards/margins": 1.7109047174453735,
1002
+ "rewards/rejected": -0.5327781438827515,
1003
+ "step": 700
1004
+ },
1005
+ {
1006
+ "epoch": 0.48,
1007
+ "learning_rate": 2.872260015117158e-07,
1008
+ "logits/chosen": -2.77628231048584,
1009
+ "logits/rejected": -2.7869679927825928,
1010
+ "logps/chosen": -245.57839965820312,
1011
+ "logps/rejected": -326.86212158203125,
1012
+ "loss": 0.2613,
1013
+ "rewards/accuracies": 0.934374988079071,
1014
+ "rewards/chosen": 1.1419804096221924,
1015
+ "rewards/margins": 1.6727325916290283,
1016
+ "rewards/rejected": -0.5307522416114807,
1017
+ "step": 710
1018
+ },
1019
+ {
1020
+ "epoch": 0.49,
1021
+ "learning_rate": 2.834467120181406e-07,
1022
+ "logits/chosen": -2.7608537673950195,
1023
+ "logits/rejected": -2.7646660804748535,
1024
+ "logps/chosen": -241.5836944580078,
1025
+ "logps/rejected": -379.62860107421875,
1026
+ "loss": 0.2738,
1027
+ "rewards/accuracies": 0.925000011920929,
1028
+ "rewards/chosen": 1.1373337507247925,
1029
+ "rewards/margins": 1.6593284606933594,
1030
+ "rewards/rejected": -0.5219947099685669,
1031
+ "step": 720
1032
+ },
1033
+ {
1034
+ "epoch": 0.5,
1035
+ "learning_rate": 2.7966742252456537e-07,
1036
+ "logits/chosen": -2.777465343475342,
1037
+ "logits/rejected": -2.801975965499878,
1038
+ "logps/chosen": -227.2059326171875,
1039
+ "logps/rejected": -369.7891540527344,
1040
+ "loss": 0.2554,
1041
+ "rewards/accuracies": 0.934374988079071,
1042
+ "rewards/chosen": 1.209241271018982,
1043
+ "rewards/margins": 1.7282158136367798,
1044
+ "rewards/rejected": -0.5189744234085083,
1045
+ "step": 730
1046
+ },
1047
+ {
1048
+ "epoch": 0.5,
1049
+ "learning_rate": 2.758881330309902e-07,
1050
+ "logits/chosen": -2.7639384269714355,
1051
+ "logits/rejected": -2.7558932304382324,
1052
+ "logps/chosen": -255.972412109375,
1053
+ "logps/rejected": -410.17431640625,
1054
+ "loss": 0.2766,
1055
+ "rewards/accuracies": 0.9156249761581421,
1056
+ "rewards/chosen": 1.1485779285430908,
1057
+ "rewards/margins": 1.672486662864685,
1058
+ "rewards/rejected": -0.5239086151123047,
1059
+ "step": 740
1060
+ },
1061
+ {
1062
+ "epoch": 0.51,
1063
+ "learning_rate": 2.72108843537415e-07,
1064
+ "logits/chosen": -2.7429962158203125,
1065
+ "logits/rejected": -2.7603325843811035,
1066
+ "logps/chosen": -248.05697631835938,
1067
+ "logps/rejected": -382.65863037109375,
1068
+ "loss": 0.2692,
1069
+ "rewards/accuracies": 0.934374988079071,
1070
+ "rewards/chosen": 1.1628259420394897,
1071
+ "rewards/margins": 1.7002170085906982,
1072
+ "rewards/rejected": -0.5373910665512085,
1073
+ "step": 750
1074
+ },
1075
+ {
1076
+ "epoch": 0.52,
1077
+ "learning_rate": 2.683295540438397e-07,
1078
+ "logits/chosen": -2.7732365131378174,
1079
+ "logits/rejected": -2.7899222373962402,
1080
+ "logps/chosen": -230.82577514648438,
1081
+ "logps/rejected": -356.39349365234375,
1082
+ "loss": 0.262,
1083
+ "rewards/accuracies": 0.93359375,
1084
+ "rewards/chosen": 1.1516262292861938,
1085
+ "rewards/margins": 1.7132419347763062,
1086
+ "rewards/rejected": -0.5616158843040466,
1087
+ "step": 760
1088
+ },
1089
+ {
1090
+ "epoch": 0.52,
1091
+ "learning_rate": 2.645502645502645e-07,
1092
+ "logits/chosen": -2.764669895172119,
1093
+ "logits/rejected": -2.7641212940216064,
1094
+ "logps/chosen": -246.3456573486328,
1095
+ "logps/rejected": -370.99896240234375,
1096
+ "loss": 0.2701,
1097
+ "rewards/accuracies": 0.9164062738418579,
1098
+ "rewards/chosen": 1.191197395324707,
1099
+ "rewards/margins": 1.7232650518417358,
1100
+ "rewards/rejected": -0.5320678949356079,
1101
+ "step": 770
1102
+ },
1103
+ {
1104
+ "epoch": 0.53,
1105
+ "learning_rate": 2.6077097505668934e-07,
1106
+ "logits/chosen": -2.7817633152008057,
1107
+ "logits/rejected": -2.7922616004943848,
1108
+ "logps/chosen": -256.2757873535156,
1109
+ "logps/rejected": -356.1881408691406,
1110
+ "loss": 0.2571,
1111
+ "rewards/accuracies": 0.9359375238418579,
1112
+ "rewards/chosen": 1.2059863805770874,
1113
+ "rewards/margins": 1.7701711654663086,
1114
+ "rewards/rejected": -0.5641847848892212,
1115
+ "step": 780
1116
+ },
1117
+ {
1118
+ "epoch": 0.54,
1119
+ "learning_rate": 2.569916855631141e-07,
1120
+ "logits/chosen": -2.7949161529541016,
1121
+ "logits/rejected": -2.800379514694214,
1122
+ "logps/chosen": -219.2698516845703,
1123
+ "logps/rejected": -384.794189453125,
1124
+ "loss": 0.2512,
1125
+ "rewards/accuracies": 0.9296875,
1126
+ "rewards/chosen": 1.2469325065612793,
1127
+ "rewards/margins": 1.8279892206192017,
1128
+ "rewards/rejected": -0.5810565948486328,
1129
+ "step": 790
1130
+ },
1131
+ {
1132
+ "epoch": 0.54,
1133
+ "learning_rate": 2.532123960695389e-07,
1134
+ "logits/chosen": -2.7864224910736084,
1135
+ "logits/rejected": -2.8051304817199707,
1136
+ "logps/chosen": -243.42105102539062,
1137
+ "logps/rejected": -376.7647399902344,
1138
+ "loss": 0.2455,
1139
+ "rewards/accuracies": 0.9398437738418579,
1140
+ "rewards/chosen": 1.25649094581604,
1141
+ "rewards/margins": 1.8448721170425415,
1142
+ "rewards/rejected": -0.5883811712265015,
1143
+ "step": 800
1144
+ },
1145
+ {
1146
+ "epoch": 0.55,
1147
+ "learning_rate": 2.494331065759637e-07,
1148
+ "logits/chosen": -2.7794528007507324,
1149
+ "logits/rejected": -2.787205457687378,
1150
+ "logps/chosen": -239.23776245117188,
1151
+ "logps/rejected": -348.8122863769531,
1152
+ "loss": 0.2407,
1153
+ "rewards/accuracies": 0.940625011920929,
1154
+ "rewards/chosen": 1.279539942741394,
1155
+ "rewards/margins": 1.9069591760635376,
1156
+ "rewards/rejected": -0.627419114112854,
1157
+ "step": 810
1158
+ },
1159
+ {
1160
+ "epoch": 0.56,
1161
+ "learning_rate": 2.456538170823885e-07,
1162
+ "logits/chosen": -2.7908012866973877,
1163
+ "logits/rejected": -2.775237798690796,
1164
+ "logps/chosen": -237.18807983398438,
1165
+ "logps/rejected": -347.73028564453125,
1166
+ "loss": 0.2346,
1167
+ "rewards/accuracies": 0.938281238079071,
1168
+ "rewards/chosen": 1.2818529605865479,
1169
+ "rewards/margins": 1.891405701637268,
1170
+ "rewards/rejected": -0.6095527410507202,
1171
+ "step": 820
1172
+ },
1173
+ {
1174
+ "epoch": 0.56,
1175
+ "learning_rate": 2.418745275888133e-07,
1176
+ "logits/chosen": -2.788677453994751,
1177
+ "logits/rejected": -2.759464740753174,
1178
+ "logps/chosen": -244.3543243408203,
1179
+ "logps/rejected": -384.2773742675781,
1180
+ "loss": 0.249,
1181
+ "rewards/accuracies": 0.9273437261581421,
1182
+ "rewards/chosen": 1.2608978748321533,
1183
+ "rewards/margins": 1.8487341403961182,
1184
+ "rewards/rejected": -0.5878363251686096,
1185
+ "step": 830
1186
+ },
1187
+ {
1188
+ "epoch": 0.57,
1189
+ "learning_rate": 2.3809523809523806e-07,
1190
+ "logits/chosen": -2.7865688800811768,
1191
+ "logits/rejected": -2.744267463684082,
1192
+ "logps/chosen": -225.56716918945312,
1193
+ "logps/rejected": -373.64788818359375,
1194
+ "loss": 0.2401,
1195
+ "rewards/accuracies": 0.940625011920929,
1196
+ "rewards/chosen": 1.2590898275375366,
1197
+ "rewards/margins": 1.872513771057129,
1198
+ "rewards/rejected": -0.6134239435195923,
1199
+ "step": 840
1200
+ },
1201
+ {
1202
+ "epoch": 0.58,
1203
+ "learning_rate": 2.3431594860166287e-07,
1204
+ "logits/chosen": -2.763679027557373,
1205
+ "logits/rejected": -2.7585010528564453,
1206
+ "logps/chosen": -234.14706420898438,
1207
+ "logps/rejected": -332.43975830078125,
1208
+ "loss": 0.2506,
1209
+ "rewards/accuracies": 0.934374988079071,
1210
+ "rewards/chosen": 1.2429834604263306,
1211
+ "rewards/margins": 1.8476206064224243,
1212
+ "rewards/rejected": -0.6046372056007385,
1213
+ "step": 850
1214
+ },
1215
+ {
1216
+ "epoch": 0.58,
1217
+ "learning_rate": 2.3053665910808768e-07,
1218
+ "logits/chosen": -2.7579002380371094,
1219
+ "logits/rejected": -2.7620043754577637,
1220
+ "logps/chosen": -236.3244171142578,
1221
+ "logps/rejected": -339.3128356933594,
1222
+ "loss": 0.2543,
1223
+ "rewards/accuracies": 0.936718761920929,
1224
+ "rewards/chosen": 1.218972086906433,
1225
+ "rewards/margins": 1.8291162252426147,
1226
+ "rewards/rejected": -0.6101440191268921,
1227
+ "step": 860
1228
+ },
1229
+ {
1230
+ "epoch": 0.59,
1231
+ "learning_rate": 2.2675736961451246e-07,
1232
+ "logits/chosen": -2.7839019298553467,
1233
+ "logits/rejected": -2.7369167804718018,
1234
+ "logps/chosen": -219.27053833007812,
1235
+ "logps/rejected": -405.5704650878906,
1236
+ "loss": 0.2458,
1237
+ "rewards/accuracies": 0.9312499761581421,
1238
+ "rewards/chosen": 1.2801100015640259,
1239
+ "rewards/margins": 1.8934139013290405,
1240
+ "rewards/rejected": -0.6133038997650146,
1241
+ "step": 870
1242
+ },
1243
+ {
1244
+ "epoch": 0.6,
1245
+ "learning_rate": 2.2297808012093725e-07,
1246
+ "logits/chosen": -2.782578945159912,
1247
+ "logits/rejected": -2.7683374881744385,
1248
+ "logps/chosen": -245.6527099609375,
1249
+ "logps/rejected": -378.6884765625,
1250
+ "loss": 0.2384,
1251
+ "rewards/accuracies": 0.934374988079071,
1252
+ "rewards/chosen": 1.321287751197815,
1253
+ "rewards/margins": 1.9386436939239502,
1254
+ "rewards/rejected": -0.6173557043075562,
1255
+ "step": 880
1256
+ },
1257
+ {
1258
+ "epoch": 0.61,
1259
+ "learning_rate": 2.1919879062736206e-07,
1260
+ "logits/chosen": -2.7775015830993652,
1261
+ "logits/rejected": -2.752042293548584,
1262
+ "logps/chosen": -229.3787078857422,
1263
+ "logps/rejected": -356.0593566894531,
1264
+ "loss": 0.2423,
1265
+ "rewards/accuracies": 0.934374988079071,
1266
+ "rewards/chosen": 1.2723052501678467,
1267
+ "rewards/margins": 1.9301214218139648,
1268
+ "rewards/rejected": -0.6578160524368286,
1269
+ "step": 890
1270
+ },
1271
+ {
1272
+ "epoch": 0.61,
1273
+ "learning_rate": 2.1541950113378684e-07,
1274
+ "logits/chosen": -2.768510580062866,
1275
+ "logits/rejected": -2.7404208183288574,
1276
+ "logps/chosen": -265.3998107910156,
1277
+ "logps/rejected": -373.4928283691406,
1278
+ "loss": 0.2467,
1279
+ "rewards/accuracies": 0.9320312738418579,
1280
+ "rewards/chosen": 1.264615774154663,
1281
+ "rewards/margins": 1.920330286026001,
1282
+ "rewards/rejected": -0.6557145714759827,
1283
+ "step": 900
1284
+ },
1285
+ {
1286
+ "epoch": 0.62,
1287
+ "learning_rate": 2.1164021164021162e-07,
1288
+ "logits/chosen": -2.7891170978546143,
1289
+ "logits/rejected": -2.7741641998291016,
1290
+ "logps/chosen": -220.24307250976562,
1291
+ "logps/rejected": -358.5487976074219,
1292
+ "loss": 0.2284,
1293
+ "rewards/accuracies": 0.94140625,
1294
+ "rewards/chosen": 1.304023027420044,
1295
+ "rewards/margins": 1.9829524755477905,
1296
+ "rewards/rejected": -0.6789294481277466,
1297
+ "step": 910
1298
+ },
1299
+ {
1300
+ "epoch": 0.63,
1301
+ "learning_rate": 2.0786092214663643e-07,
1302
+ "logits/chosen": -2.7575926780700684,
1303
+ "logits/rejected": -2.7642369270324707,
1304
+ "logps/chosen": -234.12026977539062,
1305
+ "logps/rejected": -384.3020935058594,
1306
+ "loss": 0.2373,
1307
+ "rewards/accuracies": 0.94140625,
1308
+ "rewards/chosen": 1.2832618951797485,
1309
+ "rewards/margins": 1.9688091278076172,
1310
+ "rewards/rejected": -0.6855469942092896,
1311
+ "step": 920
1312
+ },
1313
+ {
1314
+ "epoch": 0.63,
1315
+ "learning_rate": 2.0408163265306121e-07,
1316
+ "logits/chosen": -2.766233444213867,
1317
+ "logits/rejected": -2.7951343059539795,
1318
+ "logps/chosen": -244.18026733398438,
1319
+ "logps/rejected": -320.21771240234375,
1320
+ "loss": 0.2259,
1321
+ "rewards/accuracies": 0.9398437738418579,
1322
+ "rewards/chosen": 1.312534213066101,
1323
+ "rewards/margins": 2.0482983589172363,
1324
+ "rewards/rejected": -0.7357643246650696,
1325
+ "step": 930
1326
+ },
1327
+ {
1328
+ "epoch": 0.64,
1329
+ "learning_rate": 2.0030234315948602e-07,
1330
+ "logits/chosen": -2.768256664276123,
1331
+ "logits/rejected": -2.7545723915100098,
1332
+ "logps/chosen": -248.816650390625,
1333
+ "logps/rejected": -401.00958251953125,
1334
+ "loss": 0.234,
1335
+ "rewards/accuracies": 0.9320312738418579,
1336
+ "rewards/chosen": 1.3067686557769775,
1337
+ "rewards/margins": 2.003986358642578,
1338
+ "rewards/rejected": -0.6972178816795349,
1339
+ "step": 940
1340
+ },
1341
+ {
1342
+ "epoch": 0.65,
1343
+ "learning_rate": 1.965230536659108e-07,
1344
+ "logits/chosen": -2.7718937397003174,
1345
+ "logits/rejected": -2.7864131927490234,
1346
+ "logps/chosen": -245.76220703125,
1347
+ "logps/rejected": -350.4901428222656,
1348
+ "loss": 0.2342,
1349
+ "rewards/accuracies": 0.938281238079071,
1350
+ "rewards/chosen": 1.3374592065811157,
1351
+ "rewards/margins": 2.001889228820801,
1352
+ "rewards/rejected": -0.6644300222396851,
1353
+ "step": 950
1354
+ },
1355
+ {
1356
+ "epoch": 0.65,
1357
+ "learning_rate": 1.927437641723356e-07,
1358
+ "logits/chosen": -2.7670133113861084,
1359
+ "logits/rejected": -2.76993465423584,
1360
+ "logps/chosen": -227.41748046875,
1361
+ "logps/rejected": -354.6375427246094,
1362
+ "loss": 0.2386,
1363
+ "rewards/accuracies": 0.92578125,
1364
+ "rewards/chosen": 1.3317902088165283,
1365
+ "rewards/margins": 1.9966375827789307,
1366
+ "rewards/rejected": -0.6648473739624023,
1367
+ "step": 960
1368
+ },
1369
+ {
1370
+ "epoch": 0.66,
1371
+ "learning_rate": 1.889644746787604e-07,
1372
+ "logits/chosen": -2.7860965728759766,
1373
+ "logits/rejected": -2.776639699935913,
1374
+ "logps/chosen": -257.2185363769531,
1375
+ "logps/rejected": -302.48846435546875,
1376
+ "loss": 0.2278,
1377
+ "rewards/accuracies": 0.9390624761581421,
1378
+ "rewards/chosen": 1.352912187576294,
1379
+ "rewards/margins": 2.036379814147949,
1380
+ "rewards/rejected": -0.6834677457809448,
1381
+ "step": 970
1382
+ },
1383
+ {
1384
+ "epoch": 0.67,
1385
+ "learning_rate": 1.8518518518518516e-07,
1386
+ "logits/chosen": -2.7641091346740723,
1387
+ "logits/rejected": -2.7789313793182373,
1388
+ "logps/chosen": -256.19476318359375,
1389
+ "logps/rejected": -390.69549560546875,
1390
+ "loss": 0.2325,
1391
+ "rewards/accuracies": 0.942187488079071,
1392
+ "rewards/chosen": 1.3011709451675415,
1393
+ "rewards/margins": 2.039425849914551,
1394
+ "rewards/rejected": -0.738254964351654,
1395
+ "step": 980
1396
+ },
1397
+ {
1398
+ "epoch": 0.67,
1399
+ "learning_rate": 1.8140589569160996e-07,
1400
+ "logits/chosen": -2.795642614364624,
1401
+ "logits/rejected": -2.7746355533599854,
1402
+ "logps/chosen": -234.4689483642578,
1403
+ "logps/rejected": -395.40618896484375,
1404
+ "loss": 0.223,
1405
+ "rewards/accuracies": 0.9390624761581421,
1406
+ "rewards/chosen": 1.3416879177093506,
1407
+ "rewards/margins": 2.1014368534088135,
1408
+ "rewards/rejected": -0.7597488164901733,
1409
+ "step": 990
1410
+ },
1411
+ {
1412
+ "epoch": 0.68,
1413
+ "learning_rate": 1.7762660619803477e-07,
1414
+ "logits/chosen": -2.7756259441375732,
1415
+ "logits/rejected": -2.741664409637451,
1416
+ "logps/chosen": -242.3101348876953,
1417
+ "logps/rejected": -363.46160888671875,
1418
+ "loss": 0.2285,
1419
+ "rewards/accuracies": 0.925000011920929,
1420
+ "rewards/chosen": 1.3446866273880005,
1421
+ "rewards/margins": 2.070406436920166,
1422
+ "rewards/rejected": -0.7257199287414551,
1423
+ "step": 1000
1424
+ },
1425
+ {
1426
+ "epoch": 0.69,
1427
+ "learning_rate": 1.7384731670445953e-07,
1428
+ "logits/chosen": -2.7595419883728027,
1429
+ "logits/rejected": -2.7858798503875732,
1430
+ "logps/chosen": -259.9520568847656,
1431
+ "logps/rejected": -358.3509216308594,
1432
+ "loss": 0.2273,
1433
+ "rewards/accuracies": 0.9359375238418579,
1434
+ "rewards/chosen": 1.3033568859100342,
1435
+ "rewards/margins": 2.0887067317962646,
1436
+ "rewards/rejected": -0.78534996509552,
1437
+ "step": 1010
1438
+ },
1439
+ {
1440
+ "epoch": 0.69,
1441
+ "learning_rate": 1.7006802721088434e-07,
1442
+ "logits/chosen": -2.768449068069458,
1443
+ "logits/rejected": -2.7718656063079834,
1444
+ "logps/chosen": -238.11740112304688,
1445
+ "logps/rejected": -354.0820007324219,
1446
+ "loss": 0.236,
1447
+ "rewards/accuracies": 0.9351562261581421,
1448
+ "rewards/chosen": 1.3048899173736572,
1449
+ "rewards/margins": 2.033565044403076,
1450
+ "rewards/rejected": -0.7286752462387085,
1451
+ "step": 1020
1452
+ },
1453
+ {
1454
+ "epoch": 0.7,
1455
+ "learning_rate": 1.6628873771730915e-07,
1456
+ "logits/chosen": -2.7650275230407715,
1457
+ "logits/rejected": -2.7476916313171387,
1458
+ "logps/chosen": -245.41885375976562,
1459
+ "logps/rejected": -343.54437255859375,
1460
+ "loss": 0.2357,
1461
+ "rewards/accuracies": 0.928906261920929,
1462
+ "rewards/chosen": 1.316489815711975,
1463
+ "rewards/margins": 2.062798023223877,
1464
+ "rewards/rejected": -0.7463082671165466,
1465
+ "step": 1030
1466
+ },
1467
+ {
1468
+ "epoch": 0.71,
1469
+ "learning_rate": 1.6250944822373393e-07,
1470
+ "logits/chosen": -2.7734358310699463,
1471
+ "logits/rejected": -2.7748751640319824,
1472
+ "logps/chosen": -237.48538208007812,
1473
+ "logps/rejected": -389.1809997558594,
1474
+ "loss": 0.2308,
1475
+ "rewards/accuracies": 0.930468738079071,
1476
+ "rewards/chosen": 1.2910696268081665,
1477
+ "rewards/margins": 2.079051971435547,
1478
+ "rewards/rejected": -0.7879821062088013,
1479
+ "step": 1040
1480
+ },
1481
+ {
1482
+ "epoch": 0.71,
1483
+ "learning_rate": 1.5873015873015872e-07,
1484
+ "logits/chosen": -2.7670979499816895,
1485
+ "logits/rejected": -2.769535779953003,
1486
+ "logps/chosen": -214.1968536376953,
1487
+ "logps/rejected": -331.4734802246094,
1488
+ "loss": 0.224,
1489
+ "rewards/accuracies": 0.9390624761581421,
1490
+ "rewards/chosen": 1.377071738243103,
1491
+ "rewards/margins": 2.1104674339294434,
1492
+ "rewards/rejected": -0.7333956956863403,
1493
+ "step": 1050
1494
+ },
1495
+ {
1496
+ "epoch": 0.72,
1497
+ "learning_rate": 1.5495086923658353e-07,
1498
+ "logits/chosen": -2.789698839187622,
1499
+ "logits/rejected": -2.7418367862701416,
1500
+ "logps/chosen": -233.3469696044922,
1501
+ "logps/rejected": -372.7334289550781,
1502
+ "loss": 0.2046,
1503
+ "rewards/accuracies": 0.948437511920929,
1504
+ "rewards/chosen": 1.4205210208892822,
1505
+ "rewards/margins": 2.2147347927093506,
1506
+ "rewards/rejected": -0.7942138910293579,
1507
+ "step": 1060
1508
+ },
1509
+ {
1510
+ "epoch": 0.73,
1511
+ "learning_rate": 1.511715797430083e-07,
1512
+ "logits/chosen": -2.7732410430908203,
1513
+ "logits/rejected": -2.7837493419647217,
1514
+ "logps/chosen": -240.2108917236328,
1515
+ "logps/rejected": -340.86712646484375,
1516
+ "loss": 0.2229,
1517
+ "rewards/accuracies": 0.932812511920929,
1518
+ "rewards/chosen": 1.365039348602295,
1519
+ "rewards/margins": 2.149728298187256,
1520
+ "rewards/rejected": -0.7846890091896057,
1521
+ "step": 1070
1522
+ },
1523
+ {
1524
+ "epoch": 0.73,
1525
+ "learning_rate": 1.473922902494331e-07,
1526
+ "logits/chosen": -2.762357711791992,
1527
+ "logits/rejected": -2.7503538131713867,
1528
+ "logps/chosen": -245.73129272460938,
1529
+ "logps/rejected": -367.2342529296875,
1530
+ "loss": 0.2247,
1531
+ "rewards/accuracies": 0.934374988079071,
1532
+ "rewards/chosen": 1.3420137166976929,
1533
+ "rewards/margins": 2.1435036659240723,
1534
+ "rewards/rejected": -0.8014899492263794,
1535
+ "step": 1080
1536
+ },
1537
+ {
1538
+ "epoch": 0.74,
1539
+ "learning_rate": 1.436130007558579e-07,
1540
+ "logits/chosen": -2.786447048187256,
1541
+ "logits/rejected": -2.7433903217315674,
1542
+ "logps/chosen": -259.77923583984375,
1543
+ "logps/rejected": -384.2717590332031,
1544
+ "loss": 0.2176,
1545
+ "rewards/accuracies": 0.938281238079071,
1546
+ "rewards/chosen": 1.4291341304779053,
1547
+ "rewards/margins": 2.1485352516174316,
1548
+ "rewards/rejected": -0.7194010019302368,
1549
+ "step": 1090
1550
+ },
1551
+ {
1552
+ "epoch": 0.75,
1553
+ "learning_rate": 1.3983371126228268e-07,
1554
+ "logits/chosen": -2.766045570373535,
1555
+ "logits/rejected": -2.783592700958252,
1556
+ "logps/chosen": -258.433349609375,
1557
+ "logps/rejected": -356.44293212890625,
1558
+ "loss": 0.2166,
1559
+ "rewards/accuracies": 0.9359375238418579,
1560
+ "rewards/chosen": 1.3983967304229736,
1561
+ "rewards/margins": 2.2020390033721924,
1562
+ "rewards/rejected": -0.8036419153213501,
1563
+ "step": 1100
1564
+ },
1565
+ {
1566
+ "epoch": 0.75,
1567
+ "learning_rate": 1.360544217687075e-07,
1568
+ "logits/chosen": -2.784245491027832,
1569
+ "logits/rejected": -2.7566187381744385,
1570
+ "logps/chosen": -251.7339324951172,
1571
+ "logps/rejected": -356.1120300292969,
1572
+ "loss": 0.2042,
1573
+ "rewards/accuracies": 0.949999988079071,
1574
+ "rewards/chosen": 1.394803762435913,
1575
+ "rewards/margins": 2.219846487045288,
1576
+ "rewards/rejected": -0.8250430822372437,
1577
+ "step": 1110
1578
+ },
1579
+ {
1580
+ "epoch": 0.76,
1581
+ "learning_rate": 1.3227513227513225e-07,
1582
+ "logits/chosen": -2.768209218978882,
1583
+ "logits/rejected": -2.7927510738372803,
1584
+ "logps/chosen": -250.1661376953125,
1585
+ "logps/rejected": -341.25396728515625,
1586
+ "loss": 0.2216,
1587
+ "rewards/accuracies": 0.936718761920929,
1588
+ "rewards/chosen": 1.3784762620925903,
1589
+ "rewards/margins": 2.1875884532928467,
1590
+ "rewards/rejected": -0.8091121912002563,
1591
+ "step": 1120
1592
+ },
1593
+ {
1594
+ "epoch": 0.77,
1595
+ "learning_rate": 1.2849584278155706e-07,
1596
+ "logits/chosen": -2.755992889404297,
1597
+ "logits/rejected": -2.7883083820343018,
1598
+ "logps/chosen": -258.3106384277344,
1599
+ "logps/rejected": -338.23822021484375,
1600
+ "loss": 0.2233,
1601
+ "rewards/accuracies": 0.9359375238418579,
1602
+ "rewards/chosen": 1.371977686882019,
1603
+ "rewards/margins": 2.1515755653381348,
1604
+ "rewards/rejected": -0.7795979380607605,
1605
+ "step": 1130
1606
+ },
1607
+ {
1608
+ "epoch": 0.77,
1609
+ "learning_rate": 1.2471655328798184e-07,
1610
+ "logits/chosen": -2.765443801879883,
1611
+ "logits/rejected": -2.773919105529785,
1612
+ "logps/chosen": -228.51766967773438,
1613
+ "logps/rejected": -353.0353698730469,
1614
+ "loss": 0.2184,
1615
+ "rewards/accuracies": 0.9359375238418579,
1616
+ "rewards/chosen": 1.3785618543624878,
1617
+ "rewards/margins": 2.173300266265869,
1618
+ "rewards/rejected": -0.7947384119033813,
1619
+ "step": 1140
1620
+ },
1621
+ {
1622
+ "epoch": 0.78,
1623
+ "learning_rate": 1.2093726379440665e-07,
1624
+ "logits/chosen": -2.7806408405303955,
1625
+ "logits/rejected": -2.756528854370117,
1626
+ "logps/chosen": -227.71621704101562,
1627
+ "logps/rejected": -391.3194580078125,
1628
+ "loss": 0.2113,
1629
+ "rewards/accuracies": 0.9476562738418579,
1630
+ "rewards/chosen": 1.3853504657745361,
1631
+ "rewards/margins": 2.21071195602417,
1632
+ "rewards/rejected": -0.8253618478775024,
1633
+ "step": 1150
1634
+ },
1635
+ {
1636
+ "epoch": 0.79,
1637
+ "learning_rate": 1.1715797430083144e-07,
1638
+ "logits/chosen": -2.7610268592834473,
1639
+ "logits/rejected": -2.7615675926208496,
1640
+ "logps/chosen": -269.79010009765625,
1641
+ "logps/rejected": -378.21209716796875,
1642
+ "loss": 0.2102,
1643
+ "rewards/accuracies": 0.9398437738418579,
1644
+ "rewards/chosen": 1.3920191526412964,
1645
+ "rewards/margins": 2.2798304557800293,
1646
+ "rewards/rejected": -0.887811541557312,
1647
+ "step": 1160
1648
+ },
1649
+ {
1650
+ "epoch": 0.8,
1651
+ "learning_rate": 1.1337868480725623e-07,
1652
+ "logits/chosen": -2.776198625564575,
1653
+ "logits/rejected": -2.768550395965576,
1654
+ "logps/chosen": -246.81887817382812,
1655
+ "logps/rejected": -365.49249267578125,
1656
+ "loss": 0.2134,
1657
+ "rewards/accuracies": 0.93359375,
1658
+ "rewards/chosen": 1.4072265625,
1659
+ "rewards/margins": 2.2190985679626465,
1660
+ "rewards/rejected": -0.8118720054626465,
1661
+ "step": 1170
1662
+ },
1663
+ {
1664
+ "epoch": 0.8,
1665
+ "learning_rate": 1.0959939531368103e-07,
1666
+ "logits/chosen": -2.782680034637451,
1667
+ "logits/rejected": -2.7389519214630127,
1668
+ "logps/chosen": -239.7198944091797,
1669
+ "logps/rejected": -363.89215087890625,
1670
+ "loss": 0.2147,
1671
+ "rewards/accuracies": 0.942187488079071,
1672
+ "rewards/chosen": 1.3731368780136108,
1673
+ "rewards/margins": 2.227461814880371,
1674
+ "rewards/rejected": -0.8543251156806946,
1675
+ "step": 1180
1676
+ },
1677
+ {
1678
+ "epoch": 0.81,
1679
+ "learning_rate": 1.0582010582010581e-07,
1680
+ "logits/chosen": -2.7652835845947266,
1681
+ "logits/rejected": -2.74135160446167,
1682
+ "logps/chosen": -223.03579711914062,
1683
+ "logps/rejected": -407.8848571777344,
1684
+ "loss": 0.2255,
1685
+ "rewards/accuracies": 0.930468738079071,
1686
+ "rewards/chosen": 1.3337465524673462,
1687
+ "rewards/margins": 2.1734132766723633,
1688
+ "rewards/rejected": -0.8396667242050171,
1689
+ "step": 1190
1690
+ },
1691
+ {
1692
+ "epoch": 0.82,
1693
+ "learning_rate": 1.0204081632653061e-07,
1694
+ "logits/chosen": -2.785404920578003,
1695
+ "logits/rejected": -2.768556594848633,
1696
+ "logps/chosen": -217.6273651123047,
1697
+ "logps/rejected": -390.0624694824219,
1698
+ "loss": 0.2004,
1699
+ "rewards/accuracies": 0.9515625238418579,
1700
+ "rewards/chosen": 1.452343225479126,
1701
+ "rewards/margins": 2.3132705688476562,
1702
+ "rewards/rejected": -0.8609271049499512,
1703
+ "step": 1200
1704
+ },
1705
+ {
1706
+ "epoch": 0.82,
1707
+ "learning_rate": 9.82615268329554e-08,
1708
+ "logits/chosen": -2.7641220092773438,
1709
+ "logits/rejected": -2.7403550148010254,
1710
+ "logps/chosen": -254.1678466796875,
1711
+ "logps/rejected": -381.2986145019531,
1712
+ "loss": 0.2165,
1713
+ "rewards/accuracies": 0.938281238079071,
1714
+ "rewards/chosen": 1.3716920614242554,
1715
+ "rewards/margins": 2.208040952682495,
1716
+ "rewards/rejected": -0.8363490104675293,
1717
+ "step": 1210
1718
+ },
1719
+ {
1720
+ "epoch": 0.83,
1721
+ "learning_rate": 9.44822373393802e-08,
1722
+ "logits/chosen": -2.7834503650665283,
1723
+ "logits/rejected": -2.7499313354492188,
1724
+ "logps/chosen": -220.555908203125,
1725
+ "logps/rejected": -353.34454345703125,
1726
+ "loss": 0.2073,
1727
+ "rewards/accuracies": 0.936718761920929,
1728
+ "rewards/chosen": 1.4545724391937256,
1729
+ "rewards/margins": 2.2807674407958984,
1730
+ "rewards/rejected": -0.826195240020752,
1731
+ "step": 1220
1732
+ },
1733
+ {
1734
+ "epoch": 0.84,
1735
+ "learning_rate": 9.070294784580498e-08,
1736
+ "logits/chosen": -2.7742843627929688,
1737
+ "logits/rejected": -2.7704269886016846,
1738
+ "logps/chosen": -240.16586303710938,
1739
+ "logps/rejected": -341.08270263671875,
1740
+ "loss": 0.2097,
1741
+ "rewards/accuracies": 0.9398437738418579,
1742
+ "rewards/chosen": 1.3970229625701904,
1743
+ "rewards/margins": 2.205933094024658,
1744
+ "rewards/rejected": -0.8089098930358887,
1745
+ "step": 1230
1746
+ },
1747
+ {
1748
+ "epoch": 0.84,
1749
+ "learning_rate": 8.692365835222977e-08,
1750
+ "logits/chosen": -2.7731618881225586,
1751
+ "logits/rejected": -2.7807064056396484,
1752
+ "logps/chosen": -246.8760223388672,
1753
+ "logps/rejected": -394.09661865234375,
1754
+ "loss": 0.1942,
1755
+ "rewards/accuracies": 0.94921875,
1756
+ "rewards/chosen": 1.4174280166625977,
1757
+ "rewards/margins": 2.335336446762085,
1758
+ "rewards/rejected": -0.9179089665412903,
1759
+ "step": 1240
1760
+ },
1761
+ {
1762
+ "epoch": 0.85,
1763
+ "learning_rate": 8.314436885865457e-08,
1764
+ "logits/chosen": -2.7794883251190186,
1765
+ "logits/rejected": -2.7599997520446777,
1766
+ "logps/chosen": -234.8397979736328,
1767
+ "logps/rejected": -354.03411865234375,
1768
+ "loss": 0.2101,
1769
+ "rewards/accuracies": 0.9359375238418579,
1770
+ "rewards/chosen": 1.3885688781738281,
1771
+ "rewards/margins": 2.2632603645324707,
1772
+ "rewards/rejected": -0.8746916651725769,
1773
+ "step": 1250
1774
+ },
1775
+ {
1776
+ "epoch": 0.86,
1777
+ "learning_rate": 7.936507936507936e-08,
1778
+ "logits/chosen": -2.7606375217437744,
1779
+ "logits/rejected": -2.7535159587860107,
1780
+ "logps/chosen": -225.62606811523438,
1781
+ "logps/rejected": -382.0788269042969,
1782
+ "loss": 0.2247,
1783
+ "rewards/accuracies": 0.9375,
1784
+ "rewards/chosen": 1.3528351783752441,
1785
+ "rewards/margins": 2.18499755859375,
1786
+ "rewards/rejected": -0.8321624994277954,
1787
+ "step": 1260
1788
+ },
1789
+ {
1790
+ "epoch": 0.86,
1791
+ "learning_rate": 7.558578987150415e-08,
1792
+ "logits/chosen": -2.7874550819396973,
1793
+ "logits/rejected": -2.7440848350524902,
1794
+ "logps/chosen": -216.8153533935547,
1795
+ "logps/rejected": -372.12982177734375,
1796
+ "loss": 0.2204,
1797
+ "rewards/accuracies": 0.946093738079071,
1798
+ "rewards/chosen": 1.3856043815612793,
1799
+ "rewards/margins": 2.1913902759552,
1800
+ "rewards/rejected": -0.8057858347892761,
1801
+ "step": 1270
1802
+ },
1803
+ {
1804
+ "epoch": 0.87,
1805
+ "learning_rate": 7.180650037792895e-08,
1806
+ "logits/chosen": -2.7706284523010254,
1807
+ "logits/rejected": -2.7321717739105225,
1808
+ "logps/chosen": -249.1674041748047,
1809
+ "logps/rejected": -390.70855712890625,
1810
+ "loss": 0.2218,
1811
+ "rewards/accuracies": 0.932812511920929,
1812
+ "rewards/chosen": 1.3266818523406982,
1813
+ "rewards/margins": 2.204909086227417,
1814
+ "rewards/rejected": -0.8782272338867188,
1815
+ "step": 1280
1816
+ },
1817
+ {
1818
+ "epoch": 0.88,
1819
+ "learning_rate": 6.802721088435375e-08,
1820
+ "logits/chosen": -2.771331310272217,
1821
+ "logits/rejected": -2.7345921993255615,
1822
+ "logps/chosen": -244.32217407226562,
1823
+ "logps/rejected": -395.6925964355469,
1824
+ "loss": 0.2148,
1825
+ "rewards/accuracies": 0.93359375,
1826
+ "rewards/chosen": 1.379319190979004,
1827
+ "rewards/margins": 2.2045130729675293,
1828
+ "rewards/rejected": -0.8251941800117493,
1829
+ "step": 1290
1830
+ },
1831
+ {
1832
+ "epoch": 0.88,
1833
+ "learning_rate": 6.424792139077853e-08,
1834
+ "logits/chosen": -2.781578779220581,
1835
+ "logits/rejected": -2.7588868141174316,
1836
+ "logps/chosen": -234.79800415039062,
1837
+ "logps/rejected": -359.72332763671875,
1838
+ "loss": 0.2047,
1839
+ "rewards/accuracies": 0.9476562738418579,
1840
+ "rewards/chosen": 1.4176688194274902,
1841
+ "rewards/margins": 2.3017234802246094,
1842
+ "rewards/rejected": -0.8840547800064087,
1843
+ "step": 1300
1844
+ },
1845
+ {
1846
+ "epoch": 0.89,
1847
+ "learning_rate": 6.046863189720333e-08,
1848
+ "logits/chosen": -2.770113706588745,
1849
+ "logits/rejected": -2.73785662651062,
1850
+ "logps/chosen": -244.0814971923828,
1851
+ "logps/rejected": -370.0007019042969,
1852
+ "loss": 0.2086,
1853
+ "rewards/accuracies": 0.9351562261581421,
1854
+ "rewards/chosen": 1.4492876529693604,
1855
+ "rewards/margins": 2.2907984256744385,
1856
+ "rewards/rejected": -0.8415109515190125,
1857
+ "step": 1310
1858
+ },
1859
+ {
1860
+ "epoch": 0.9,
1861
+ "learning_rate": 5.6689342403628116e-08,
1862
+ "logits/chosen": -2.753613233566284,
1863
+ "logits/rejected": -2.7601518630981445,
1864
+ "logps/chosen": -250.3900604248047,
1865
+ "logps/rejected": -360.0616455078125,
1866
+ "loss": 0.2099,
1867
+ "rewards/accuracies": 0.9359375238418579,
1868
+ "rewards/chosen": 1.4489208459854126,
1869
+ "rewards/margins": 2.3246617317199707,
1870
+ "rewards/rejected": -0.8757408261299133,
1871
+ "step": 1320
1872
+ },
1873
+ {
1874
+ "epoch": 0.9,
1875
+ "learning_rate": 5.2910052910052905e-08,
1876
+ "logits/chosen": -2.7654261589050293,
1877
+ "logits/rejected": -2.7347397804260254,
1878
+ "logps/chosen": -232.9058837890625,
1879
+ "logps/rejected": -352.3494567871094,
1880
+ "loss": 0.2101,
1881
+ "rewards/accuracies": 0.9398437738418579,
1882
+ "rewards/chosen": 1.4914627075195312,
1883
+ "rewards/margins": 2.3455305099487305,
1884
+ "rewards/rejected": -0.8540679216384888,
1885
+ "step": 1330
1886
+ },
1887
+ {
1888
+ "epoch": 0.91,
1889
+ "learning_rate": 4.91307634164777e-08,
1890
+ "logits/chosen": -2.7729830741882324,
1891
+ "logits/rejected": -2.7506096363067627,
1892
+ "logps/chosen": -237.5419158935547,
1893
+ "logps/rejected": -361.7286071777344,
1894
+ "loss": 0.2271,
1895
+ "rewards/accuracies": 0.936718761920929,
1896
+ "rewards/chosen": 1.3387925624847412,
1897
+ "rewards/margins": 2.1680846214294434,
1898
+ "rewards/rejected": -0.8292919397354126,
1899
+ "step": 1340
1900
+ },
1901
+ {
1902
+ "epoch": 0.92,
1903
+ "learning_rate": 4.535147392290249e-08,
1904
+ "logits/chosen": -2.758366584777832,
1905
+ "logits/rejected": -2.747448444366455,
1906
+ "logps/chosen": -262.02313232421875,
1907
+ "logps/rejected": -371.6409912109375,
1908
+ "loss": 0.2117,
1909
+ "rewards/accuracies": 0.93359375,
1910
+ "rewards/chosen": 1.4255142211914062,
1911
+ "rewards/margins": 2.2661709785461426,
1912
+ "rewards/rejected": -0.8406568765640259,
1913
+ "step": 1350
1914
+ },
1915
+ {
1916
+ "epoch": 0.92,
1917
+ "learning_rate": 4.157218442932729e-08,
1918
+ "logits/chosen": -2.7460246086120605,
1919
+ "logits/rejected": -2.7499794960021973,
1920
+ "logps/chosen": -242.7806396484375,
1921
+ "logps/rejected": -374.79736328125,
1922
+ "loss": 0.2305,
1923
+ "rewards/accuracies": 0.9242187738418579,
1924
+ "rewards/chosen": 1.3290668725967407,
1925
+ "rewards/margins": 2.187917470932007,
1926
+ "rewards/rejected": -0.8588504791259766,
1927
+ "step": 1360
1928
+ },
1929
+ {
1930
+ "epoch": 0.93,
1931
+ "learning_rate": 3.779289493575208e-08,
1932
+ "logits/chosen": -2.7681326866149902,
1933
+ "logits/rejected": -2.7562155723571777,
1934
+ "logps/chosen": -220.0043487548828,
1935
+ "logps/rejected": -369.31268310546875,
1936
+ "loss": 0.2015,
1937
+ "rewards/accuracies": 0.944531261920929,
1938
+ "rewards/chosen": 1.4147917032241821,
1939
+ "rewards/margins": 2.347784996032715,
1940
+ "rewards/rejected": -0.9329932332038879,
1941
+ "step": 1370
1942
+ },
1943
+ {
1944
+ "epoch": 0.94,
1945
+ "learning_rate": 3.4013605442176873e-08,
1946
+ "logits/chosen": -2.7685980796813965,
1947
+ "logits/rejected": -2.761018753051758,
1948
+ "logps/chosen": -244.3848114013672,
1949
+ "logps/rejected": -352.2154235839844,
1950
+ "loss": 0.2147,
1951
+ "rewards/accuracies": 0.9398437738418579,
1952
+ "rewards/chosen": 1.3917274475097656,
1953
+ "rewards/margins": 2.2305819988250732,
1954
+ "rewards/rejected": -0.8388546109199524,
1955
+ "step": 1380
1956
+ },
1957
+ {
1958
+ "epoch": 0.94,
1959
+ "learning_rate": 3.023431594860166e-08,
1960
+ "logits/chosen": -2.7724173069000244,
1961
+ "logits/rejected": -2.773851156234741,
1962
+ "logps/chosen": -251.663330078125,
1963
+ "logps/rejected": -341.803466796875,
1964
+ "loss": 0.1992,
1965
+ "rewards/accuracies": 0.94921875,
1966
+ "rewards/chosen": 1.4376652240753174,
1967
+ "rewards/margins": 2.324432134628296,
1968
+ "rewards/rejected": -0.886766791343689,
1969
+ "step": 1390
1970
+ },
1971
+ {
1972
+ "epoch": 0.95,
1973
+ "learning_rate": 2.6455026455026453e-08,
1974
+ "logits/chosen": -2.758798122406006,
1975
+ "logits/rejected": -2.763350009918213,
1976
+ "logps/chosen": -238.17745971679688,
1977
+ "logps/rejected": -398.58135986328125,
1978
+ "loss": 0.2004,
1979
+ "rewards/accuracies": 0.9476562738418579,
1980
+ "rewards/chosen": 1.4174001216888428,
1981
+ "rewards/margins": 2.3445682525634766,
1982
+ "rewards/rejected": -0.9271681904792786,
1983
+ "step": 1400
1984
+ },
1985
+ {
1986
+ "epoch": 0.96,
1987
+ "learning_rate": 2.2675736961451246e-08,
1988
+ "logits/chosen": -2.7801098823547363,
1989
+ "logits/rejected": -2.7490382194519043,
1990
+ "logps/chosen": -242.81613159179688,
1991
+ "logps/rejected": -361.264892578125,
1992
+ "loss": 0.2077,
1993
+ "rewards/accuracies": 0.94140625,
1994
+ "rewards/chosen": 1.4166629314422607,
1995
+ "rewards/margins": 2.316483736038208,
1996
+ "rewards/rejected": -0.8998208045959473,
1997
+ "step": 1410
1998
+ },
1999
+ {
2000
+ "epoch": 0.97,
2001
+ "learning_rate": 1.889644746787604e-08,
2002
+ "logits/chosen": -2.75722336769104,
2003
+ "logits/rejected": -2.7228329181671143,
2004
+ "logps/chosen": -251.5331268310547,
2005
+ "logps/rejected": -375.8110046386719,
2006
+ "loss": 0.2226,
2007
+ "rewards/accuracies": 0.940625011920929,
2008
+ "rewards/chosen": 1.4032243490219116,
2009
+ "rewards/margins": 2.2185873985290527,
2010
+ "rewards/rejected": -0.8153629302978516,
2011
+ "step": 1420
2012
+ },
2013
+ {
2014
+ "epoch": 0.97,
2015
+ "learning_rate": 1.511715797430083e-08,
2016
+ "logits/chosen": -2.7710134983062744,
2017
+ "logits/rejected": -2.787081241607666,
2018
+ "logps/chosen": -241.9620361328125,
2019
+ "logps/rejected": -356.4383544921875,
2020
+ "loss": 0.2074,
2021
+ "rewards/accuracies": 0.938281238079071,
2022
+ "rewards/chosen": 1.4058793783187866,
2023
+ "rewards/margins": 2.3387274742126465,
2024
+ "rewards/rejected": -0.9328481554985046,
2025
+ "step": 1430
2026
+ },
2027
+ {
2028
+ "epoch": 0.98,
2029
+ "learning_rate": 1.1337868480725623e-08,
2030
+ "logits/chosen": -2.788255214691162,
2031
+ "logits/rejected": -2.790001392364502,
2032
+ "logps/chosen": -249.0662078857422,
2033
+ "logps/rejected": -375.603759765625,
2034
+ "loss": 0.1976,
2035
+ "rewards/accuracies": 0.9429687261581421,
2036
+ "rewards/chosen": 1.4689807891845703,
2037
+ "rewards/margins": 2.375899076461792,
2038
+ "rewards/rejected": -0.9069182276725769,
2039
+ "step": 1440
2040
+ },
2041
+ {
2042
+ "epoch": 0.99,
2043
+ "learning_rate": 7.558578987150416e-09,
2044
+ "logits/chosen": -2.762585401535034,
2045
+ "logits/rejected": -2.7085330486297607,
2046
+ "logps/chosen": -238.41751098632812,
2047
+ "logps/rejected": -380.84942626953125,
2048
+ "loss": 0.2228,
2049
+ "rewards/accuracies": 0.936718761920929,
2050
+ "rewards/chosen": 1.4105838537216187,
2051
+ "rewards/margins": 2.214503288269043,
2052
+ "rewards/rejected": -0.8039194345474243,
2053
+ "step": 1450
2054
+ },
2055
+ {
2056
+ "epoch": 0.99,
2057
+ "learning_rate": 3.779289493575208e-09,
2058
+ "logits/chosen": -2.7654013633728027,
2059
+ "logits/rejected": -2.7555670738220215,
2060
+ "logps/chosen": -237.80899047851562,
2061
+ "logps/rejected": -345.7412109375,
2062
+ "loss": 0.2026,
2063
+ "rewards/accuracies": 0.9515625238418579,
2064
+ "rewards/chosen": 1.418304443359375,
2065
+ "rewards/margins": 2.327260971069336,
2066
+ "rewards/rejected": -0.9089563488960266,
2067
+ "step": 1460
2068
+ },
2069
+ {
2070
+ "epoch": 1.0,
2071
+ "learning_rate": 0.0,
2072
+ "logits/chosen": -2.75099515914917,
2073
+ "logits/rejected": -2.7724432945251465,
2074
+ "logps/chosen": -255.767578125,
2075
+ "logps/rejected": -360.5829772949219,
2076
+ "loss": 0.2019,
2077
+ "rewards/accuracies": 0.946093738079071,
2078
+ "rewards/chosen": 1.4355896711349487,
2079
+ "rewards/margins": 2.3439955711364746,
2080
+ "rewards/rejected": -0.9084057807922363,
2081
+ "step": 1470
2082
+ },
2083
+ {
2084
+ "epoch": 1.0,
2085
+ "eval_logits/chosen": -2.617767333984375,
2086
+ "eval_logits/rejected": -2.721874952316284,
2087
+ "eval_logps/chosen": -238.54788208007812,
2088
+ "eval_logps/rejected": -388.59033203125,
2089
+ "eval_loss": 0.20815864205360413,
2090
+ "eval_rewards/accuracies": 0.9413930773735046,
2091
+ "eval_rewards/chosen": 1.3856867551803589,
2092
+ "eval_rewards/margins": 2.292266845703125,
2093
+ "eval_rewards/rejected": -0.9065799117088318,
2094
+ "eval_runtime": 2798.4996,
2095
+ "eval_samples_per_second": 3.395,
2096
+ "eval_steps_per_second": 0.425,
2097
+ "step": 1470
2098
+ },
2099
+ {
2100
+ "epoch": 1.0,
2101
+ "step": 1470,
2102
+ "total_flos": 0.0,
2103
+ "train_loss": 0.33413780781687524,
2104
+ "train_runtime": 91396.7242,
2105
+ "train_samples_per_second": 2.06,
2106
+ "train_steps_per_second": 0.016
2107
+ }
2108
+ ],
2109
+ "logging_steps": 10,
2110
+ "max_steps": 1470,
2111
+ "num_train_epochs": 1,
2112
+ "save_steps": 500,
2113
+ "total_flos": 0.0,
2114
+ "trial_name": null,
2115
+ "trial_params": null
2116
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219972c2a6a222c6eefdfc187b05a639de3b1f90c7026b56decc2c8bb45f034c
3
+ size 4728