Ber Zoidberg commited on
Commit
b445297
1 Parent(s): 1b5f7db

Model save

Browse files
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: NousResearch/Yarn-Mistral-7b-128k
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: unraveled-7b-dpo-lora
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # unraveled-7b-dpo-lora
15
+
16
+ This model is a fine-tuned version of [NousResearch/Yarn-Mistral-7b-128k](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.5895
19
+ - Rewards/chosen: 0.1439
20
+ - Rewards/rejected: -0.1833
21
+ - Rewards/accuracies: 0.6880
22
+ - Rewards/margins: 0.3272
23
+ - Logps/rejected: -221.8329
24
+ - Logps/chosen: -266.1414
25
+ - Logits/rejected: -1.9675
26
+ - Logits/chosen: -2.0859
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 2
47
+ - eval_batch_size: 4
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 4
51
+ - gradient_accumulation_steps: 32
52
+ - total_train_batch_size: 256
53
+ - total_eval_batch_size: 16
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: linear
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 3
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6313 | 1.0 | 242 | 0.6318 | 0.1228 | -0.0304 | 0.6600 | 0.1532 | -220.3036 | -266.3521 | -1.9863 | -2.1062 |
64
+ | 0.6013 | 2.0 | 484 | 0.5983 | 0.1484 | -0.1334 | 0.6760 | 0.2819 | -221.3338 | -266.0959 | -1.9723 | -2.0914 |
65
+ | 0.5889 | 3.0 | 726 | 0.5895 | 0.1439 | -0.1833 | 0.6880 | 0.3272 | -221.8329 | -266.1414 | -1.9675 | -2.0859 |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.35.0
71
+ - Pytorch 2.1.0+cu118
72
+ - Datasets 2.14.6
73
+ - Tokenizers 0.14.1
adapter_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "NousResearch/Yarn-Mistral-7b-128k",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 16,
12
+ "lora_dropout": 0.1,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 64,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "o_proj",
20
+ "q_proj",
21
+ "k_proj",
22
+ "v_proj"
23
+ ],
24
+ "task_type": "CAUSAL_LM"
25
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d361ff1dd8577e4bedd4011cfd60a7e4a850379b1c62d1000c06b041e4c858
3
+ size 218138576
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.085902214050293,
4
+ "eval_logits/rejected": -1.9675065279006958,
5
+ "eval_logps/chosen": -266.14141845703125,
6
+ "eval_logps/rejected": -221.83285522460938,
7
+ "eval_loss": 0.5895045399665833,
8
+ "eval_rewards/accuracies": 0.6880000233650208,
9
+ "eval_rewards/chosen": 0.14388784766197205,
10
+ "eval_rewards/margins": 0.3272360563278198,
11
+ "eval_rewards/rejected": -0.1833481788635254,
12
+ "eval_runtime": 170.0188,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 11.763,
15
+ "eval_steps_per_second": 0.735,
16
+ "train_loss": 0.622471270236102,
17
+ "train_runtime": 20371.8366,
18
+ "train_samples": 61966,
19
+ "train_samples_per_second": 9.125,
20
+ "train_steps_per_second": 0.036
21
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.085902214050293,
4
+ "eval_logits/rejected": -1.9675065279006958,
5
+ "eval_logps/chosen": -266.14141845703125,
6
+ "eval_logps/rejected": -221.83285522460938,
7
+ "eval_loss": 0.5895045399665833,
8
+ "eval_rewards/accuracies": 0.6880000233650208,
9
+ "eval_rewards/chosen": 0.14388784766197205,
10
+ "eval_rewards/margins": 0.3272360563278198,
11
+ "eval_rewards/rejected": -0.1833481788635254,
12
+ "eval_runtime": 170.0188,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 11.763,
15
+ "eval_steps_per_second": 0.735
16
+ }
runs/Nov22_09-29-27_401fa5a8015d/events.out.tfevents.1700645508.401fa5a8015d.9788.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bf84214b62eb5abef7f395c1c7fd693b1b97aee2815078f50035bf105a948f
3
+ size 53448
runs/Nov22_09-29-27_401fa5a8015d/events.out.tfevents.1700666050.401fa5a8015d.9788.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cbcea6296bfe57ddf5605e55aef2891478407c1b3e39ef131fdd971da52de35
3
+ size 828
special_tokens_map.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<unk>",
4
+ "<s>",
5
+ "</s>"
6
+ ],
7
+ "bos_token": {
8
+ "content": "<s>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "eos_token": {
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "pad_token": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "unk_token": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [
29
+ "<unk>",
30
+ "<s>",
31
+ "</s>"
32
+ ],
33
+ "bos_token": "<s>",
34
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "</s>",
37
+ "legacy": true,
38
+ "model_max_length": 2048,
39
+ "pad_token": "</s>",
40
+ "sp_model_kwargs": {},
41
+ "spaces_between_special_tokens": false,
42
+ "tokenizer_class": "LlamaTokenizer",
43
+ "unk_token": "<unk>",
44
+ "use_default_system_prompt": true
45
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.622471270236102,
4
+ "train_runtime": 20371.8366,
5
+ "train_samples": 61966,
6
+ "train_samples_per_second": 9.125,
7
+ "train_steps_per_second": 0.036
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1098 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9992254066615027,
5
+ "eval_steps": 100,
6
+ "global_step": 726,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 6.84931506849315e-09,
14
+ "logits/chosen": -2.3569769859313965,
15
+ "logits/rejected": -2.397932529449463,
16
+ "logps/chosen": -275.3341369628906,
17
+ "logps/rejected": -209.60626220703125,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.04,
27
+ "learning_rate": 6.84931506849315e-08,
28
+ "logits/chosen": -2.4118523597717285,
29
+ "logits/rejected": -2.3359429836273193,
30
+ "logps/chosen": -296.8043212890625,
31
+ "logps/rejected": -226.9486541748047,
32
+ "loss": 0.6936,
33
+ "rewards/accuracies": 0.4618055522441864,
34
+ "rewards/chosen": -0.0005276877782307565,
35
+ "rewards/margins": -0.0006244900869205594,
36
+ "rewards/rejected": 9.680193034000695e-05,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.08,
41
+ "learning_rate": 1.36986301369863e-07,
42
+ "logits/chosen": -2.4295315742492676,
43
+ "logits/rejected": -2.379143714904785,
44
+ "logps/chosen": -281.6752014160156,
45
+ "logps/rejected": -217.3535919189453,
46
+ "loss": 0.6938,
47
+ "rewards/accuracies": 0.4906249940395355,
48
+ "rewards/chosen": 0.0024631484411656857,
49
+ "rewards/margins": 5.021132437832421e-06,
50
+ "rewards/rejected": 0.0024581279139965773,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "learning_rate": 2.054794520547945e-07,
56
+ "logits/chosen": -2.385784149169922,
57
+ "logits/rejected": -2.3432505130767822,
58
+ "logps/chosen": -255.5972442626953,
59
+ "logps/rejected": -208.0276336669922,
60
+ "loss": 0.6924,
61
+ "rewards/accuracies": 0.5406249761581421,
62
+ "rewards/chosen": 0.0008739754557609558,
63
+ "rewards/margins": 0.002002383815124631,
64
+ "rewards/rejected": -0.0011284081265330315,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.17,
69
+ "learning_rate": 2.73972602739726e-07,
70
+ "logits/chosen": -2.449314832687378,
71
+ "logits/rejected": -2.3985402584075928,
72
+ "logps/chosen": -286.9411926269531,
73
+ "logps/rejected": -217.51119995117188,
74
+ "loss": 0.6923,
75
+ "rewards/accuracies": 0.5218750238418579,
76
+ "rewards/chosen": 0.0011719572357833385,
77
+ "rewards/margins": 0.002339401515200734,
78
+ "rewards/rejected": -0.0011674443958327174,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.21,
83
+ "learning_rate": 3.424657534246575e-07,
84
+ "logits/chosen": -2.4232876300811768,
85
+ "logits/rejected": -2.3819985389709473,
86
+ "logps/chosen": -270.04620361328125,
87
+ "logps/rejected": -224.44857788085938,
88
+ "loss": 0.6904,
89
+ "rewards/accuracies": 0.53125,
90
+ "rewards/chosen": 0.0045755826868116856,
91
+ "rewards/margins": 0.007746423594653606,
92
+ "rewards/rejected": -0.003170841606333852,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.25,
97
+ "learning_rate": 4.10958904109589e-07,
98
+ "logits/chosen": -2.406489133834839,
99
+ "logits/rejected": -2.376723289489746,
100
+ "logps/chosen": -269.7476501464844,
101
+ "logps/rejected": -216.1673583984375,
102
+ "loss": 0.6893,
103
+ "rewards/accuracies": 0.5609375238418579,
104
+ "rewards/chosen": 0.00798078440129757,
105
+ "rewards/margins": 0.009231673553586006,
106
+ "rewards/rejected": -0.0012508893851190805,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.29,
111
+ "learning_rate": 4.794520547945205e-07,
112
+ "logits/chosen": -2.3951592445373535,
113
+ "logits/rejected": -2.3819260597229004,
114
+ "logps/chosen": -257.29620361328125,
115
+ "logps/rejected": -215.2325897216797,
116
+ "loss": 0.6874,
117
+ "rewards/accuracies": 0.5609375238418579,
118
+ "rewards/chosen": 0.012629570439457893,
119
+ "rewards/margins": 0.013305542059242725,
120
+ "rewards/rejected": -0.0006759737734682858,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.33,
125
+ "learning_rate": 4.946401225114854e-07,
126
+ "logits/chosen": -2.4296867847442627,
127
+ "logits/rejected": -2.368792772293091,
128
+ "logps/chosen": -267.8879699707031,
129
+ "logps/rejected": -219.56350708007812,
130
+ "loss": 0.6842,
131
+ "rewards/accuracies": 0.621874988079071,
132
+ "rewards/chosen": 0.019007008522748947,
133
+ "rewards/margins": 0.0214321780949831,
134
+ "rewards/rejected": -0.002425167942419648,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.37,
139
+ "learning_rate": 4.869831546707504e-07,
140
+ "logits/chosen": -2.463676929473877,
141
+ "logits/rejected": -2.3972015380859375,
142
+ "logps/chosen": -274.6397705078125,
143
+ "logps/rejected": -225.1620635986328,
144
+ "loss": 0.683,
145
+ "rewards/accuracies": 0.604687511920929,
146
+ "rewards/chosen": 0.024952612817287445,
147
+ "rewards/margins": 0.02081022970378399,
148
+ "rewards/rejected": 0.004142382647842169,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.41,
153
+ "learning_rate": 4.793261868300153e-07,
154
+ "logits/chosen": -2.4068779945373535,
155
+ "logits/rejected": -2.375128746032715,
156
+ "logps/chosen": -276.1687927246094,
157
+ "logps/rejected": -228.1005096435547,
158
+ "loss": 0.6783,
159
+ "rewards/accuracies": 0.660937488079071,
160
+ "rewards/chosen": 0.03607472777366638,
161
+ "rewards/margins": 0.034400396049022675,
162
+ "rewards/rejected": 0.001674329163506627,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.45,
167
+ "learning_rate": 4.7166921898928023e-07,
168
+ "logits/chosen": -2.445211887359619,
169
+ "logits/rejected": -2.3825695514678955,
170
+ "logps/chosen": -254.85311889648438,
171
+ "logps/rejected": -222.86117553710938,
172
+ "loss": 0.6761,
173
+ "rewards/accuracies": 0.6390625238418579,
174
+ "rewards/chosen": 0.034680236130952835,
175
+ "rewards/margins": 0.035091597586870193,
176
+ "rewards/rejected": -0.00041136034997180104,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "learning_rate": 4.640122511485451e-07,
182
+ "logits/chosen": -2.412264585494995,
183
+ "logits/rejected": -2.3916800022125244,
184
+ "logps/chosen": -259.5788879394531,
185
+ "logps/rejected": -220.30831909179688,
186
+ "loss": 0.6715,
187
+ "rewards/accuracies": 0.6421874761581421,
188
+ "rewards/chosen": 0.04985843971371651,
189
+ "rewards/margins": 0.050119031220674515,
190
+ "rewards/rejected": -0.00026059610536322,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "learning_rate": 4.563552833078101e-07,
196
+ "logits/chosen": -2.4411113262176514,
197
+ "logits/rejected": -2.376892566680908,
198
+ "logps/chosen": -268.37335205078125,
199
+ "logps/rejected": -226.0688934326172,
200
+ "loss": 0.666,
201
+ "rewards/accuracies": 0.675000011920929,
202
+ "rewards/chosen": 0.05422825738787651,
203
+ "rewards/margins": 0.05518989637494087,
204
+ "rewards/rejected": -0.0009616309544071555,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "learning_rate": 4.4869831546707505e-07,
210
+ "logits/chosen": -2.448073387145996,
211
+ "logits/rejected": -2.3861663341522217,
212
+ "logps/chosen": -273.23272705078125,
213
+ "logps/rejected": -230.17776489257812,
214
+ "loss": 0.6681,
215
+ "rewards/accuracies": 0.692187488079071,
216
+ "rewards/chosen": 0.06362518668174744,
217
+ "rewards/margins": 0.06625331938266754,
218
+ "rewards/rejected": -0.002628129906952381,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "learning_rate": 4.4104134762633994e-07,
224
+ "logits/chosen": -2.480207920074463,
225
+ "logits/rejected": -2.4179327487945557,
226
+ "logps/chosen": -268.4297790527344,
227
+ "logps/rejected": -226.82815551757812,
228
+ "loss": 0.6596,
229
+ "rewards/accuracies": 0.7250000238418579,
230
+ "rewards/chosen": 0.07131167501211166,
231
+ "rewards/margins": 0.0752154216170311,
232
+ "rewards/rejected": -0.0039037547539919615,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "learning_rate": 4.333843797856049e-07,
238
+ "logits/chosen": -2.438366651535034,
239
+ "logits/rejected": -2.3843159675598145,
240
+ "logps/chosen": -281.81756591796875,
241
+ "logps/rejected": -239.0724334716797,
242
+ "loss": 0.6555,
243
+ "rewards/accuracies": 0.7015625238418579,
244
+ "rewards/chosen": 0.08467759937047958,
245
+ "rewards/margins": 0.08220230042934418,
246
+ "rewards/rejected": 0.002475299406796694,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "learning_rate": 4.257274119448698e-07,
252
+ "logits/chosen": -2.4684793949127197,
253
+ "logits/rejected": -2.433481216430664,
254
+ "logps/chosen": -272.9942932128906,
255
+ "logps/rejected": -239.0500946044922,
256
+ "loss": 0.6542,
257
+ "rewards/accuracies": 0.6859375238418579,
258
+ "rewards/chosen": 0.08272445946931839,
259
+ "rewards/margins": 0.08306626230478287,
260
+ "rewards/rejected": -0.00034180469810962677,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "learning_rate": 4.180704441041347e-07,
266
+ "logits/chosen": -2.429438829421997,
267
+ "logits/rejected": -2.378554105758667,
268
+ "logps/chosen": -259.08294677734375,
269
+ "logps/rejected": -225.7974853515625,
270
+ "loss": 0.6565,
271
+ "rewards/accuracies": 0.6499999761581421,
272
+ "rewards/chosen": 0.08147875219583511,
273
+ "rewards/margins": 0.07715155184268951,
274
+ "rewards/rejected": 0.004327205941081047,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "learning_rate": 4.1041347626339966e-07,
280
+ "logits/chosen": -2.4421346187591553,
281
+ "logits/rejected": -2.3960843086242676,
282
+ "logps/chosen": -263.78216552734375,
283
+ "logps/rejected": -212.2428436279297,
284
+ "loss": 0.6485,
285
+ "rewards/accuracies": 0.684374988079071,
286
+ "rewards/chosen": 0.09069393575191498,
287
+ "rewards/margins": 0.1026170402765274,
288
+ "rewards/rejected": -0.011923106387257576,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.83,
293
+ "learning_rate": 4.027565084226646e-07,
294
+ "logits/chosen": -2.463709831237793,
295
+ "logits/rejected": -2.404499053955078,
296
+ "logps/chosen": -264.03118896484375,
297
+ "logps/rejected": -217.3473358154297,
298
+ "loss": 0.6415,
299
+ "rewards/accuracies": 0.7093750238418579,
300
+ "rewards/chosen": 0.10673630237579346,
301
+ "rewards/margins": 0.1264755129814148,
302
+ "rewards/rejected": -0.019739216193556786,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.87,
307
+ "learning_rate": 3.9509954058192954e-07,
308
+ "logits/chosen": -2.458402633666992,
309
+ "logits/rejected": -2.4066162109375,
310
+ "logps/chosen": -286.2770690917969,
311
+ "logps/rejected": -230.6191864013672,
312
+ "loss": 0.6401,
313
+ "rewards/accuracies": 0.6875,
314
+ "rewards/chosen": 0.10997898876667023,
315
+ "rewards/margins": 0.12333294004201889,
316
+ "rewards/rejected": -0.013353955931961536,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.91,
321
+ "learning_rate": 3.874425727411945e-07,
322
+ "logits/chosen": -2.419983386993408,
323
+ "logits/rejected": -2.386007308959961,
324
+ "logps/chosen": -278.9669494628906,
325
+ "logps/rejected": -227.8882293701172,
326
+ "loss": 0.6399,
327
+ "rewards/accuracies": 0.692187488079071,
328
+ "rewards/chosen": 0.11453185975551605,
329
+ "rewards/margins": 0.13373538851737976,
330
+ "rewards/rejected": -0.01920352131128311,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.95,
335
+ "learning_rate": 3.797856049004594e-07,
336
+ "logits/chosen": -2.4336562156677246,
337
+ "logits/rejected": -2.36842679977417,
338
+ "logps/chosen": -266.24053955078125,
339
+ "logps/rejected": -227.7322235107422,
340
+ "loss": 0.6365,
341
+ "rewards/accuracies": 0.6656249761581421,
342
+ "rewards/chosen": 0.12494595348834991,
343
+ "rewards/margins": 0.14494453370571136,
344
+ "rewards/rejected": -0.019998596981167793,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.99,
349
+ "learning_rate": 3.7212863705972436e-07,
350
+ "logits/chosen": -2.45171856880188,
351
+ "logits/rejected": -2.4028737545013428,
352
+ "logps/chosen": -273.0599365234375,
353
+ "logps/rejected": -221.7194366455078,
354
+ "loss": 0.6313,
355
+ "rewards/accuracies": 0.6875,
356
+ "rewards/chosen": 0.13546457886695862,
357
+ "rewards/margins": 0.1584644615650177,
358
+ "rewards/rejected": -0.02299986407160759,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "eval_logits/chosen": -2.1062474250793457,
364
+ "eval_logits/rejected": -1.9863277673721313,
365
+ "eval_logps/chosen": -266.3520812988281,
366
+ "eval_logps/rejected": -220.30364990234375,
367
+ "eval_loss": 0.6318375468254089,
368
+ "eval_rewards/accuracies": 0.6600000262260437,
369
+ "eval_rewards/chosen": 0.12281632423400879,
370
+ "eval_rewards/margins": 0.1532445251941681,
371
+ "eval_rewards/rejected": -0.030428189784288406,
372
+ "eval_runtime": 170.6685,
373
+ "eval_samples_per_second": 11.719,
374
+ "eval_steps_per_second": 0.732,
375
+ "step": 242
376
+ },
377
+ {
378
+ "epoch": 1.03,
379
+ "learning_rate": 3.6447166921898925e-07,
380
+ "logits/chosen": -2.3959295749664307,
381
+ "logits/rejected": -2.3386054039001465,
382
+ "logps/chosen": -259.109619140625,
383
+ "logps/rejected": -209.0337677001953,
384
+ "loss": 0.6337,
385
+ "rewards/accuracies": 0.684374988079071,
386
+ "rewards/chosen": 0.12544557452201843,
387
+ "rewards/margins": 0.1500503271818161,
388
+ "rewards/rejected": -0.024604763835668564,
389
+ "step": 250
390
+ },
391
+ {
392
+ "epoch": 1.07,
393
+ "learning_rate": 3.568147013782542e-07,
394
+ "logits/chosen": -2.4076590538024902,
395
+ "logits/rejected": -2.3606529235839844,
396
+ "logps/chosen": -263.9520568847656,
397
+ "logps/rejected": -225.4473876953125,
398
+ "loss": 0.6258,
399
+ "rewards/accuracies": 0.721875011920929,
400
+ "rewards/chosen": 0.1394219994544983,
401
+ "rewards/margins": 0.1773197054862976,
402
+ "rewards/rejected": -0.03789771348237991,
403
+ "step": 260
404
+ },
405
+ {
406
+ "epoch": 1.12,
407
+ "learning_rate": 3.4915773353751913e-07,
408
+ "logits/chosen": -2.417232036590576,
409
+ "logits/rejected": -2.346893072128296,
410
+ "logps/chosen": -280.55291748046875,
411
+ "logps/rejected": -234.11996459960938,
412
+ "loss": 0.6203,
413
+ "rewards/accuracies": 0.729687511920929,
414
+ "rewards/chosen": 0.1420687586069107,
415
+ "rewards/margins": 0.20756664872169495,
416
+ "rewards/rejected": -0.06549788266420364,
417
+ "step": 270
418
+ },
419
+ {
420
+ "epoch": 1.16,
421
+ "learning_rate": 3.41500765696784e-07,
422
+ "logits/chosen": -2.4278924465179443,
423
+ "logits/rejected": -2.3850674629211426,
424
+ "logps/chosen": -253.51803588867188,
425
+ "logps/rejected": -224.95968627929688,
426
+ "loss": 0.6242,
427
+ "rewards/accuracies": 0.6968749761581421,
428
+ "rewards/chosen": 0.13101010024547577,
429
+ "rewards/margins": 0.16972467303276062,
430
+ "rewards/rejected": -0.03871458023786545,
431
+ "step": 280
432
+ },
433
+ {
434
+ "epoch": 1.2,
435
+ "learning_rate": 3.33843797856049e-07,
436
+ "logits/chosen": -2.3946681022644043,
437
+ "logits/rejected": -2.380558967590332,
438
+ "logps/chosen": -284.79608154296875,
439
+ "logps/rejected": -226.66262817382812,
440
+ "loss": 0.6194,
441
+ "rewards/accuracies": 0.71875,
442
+ "rewards/chosen": 0.13935603201389313,
443
+ "rewards/margins": 0.20914654433727264,
444
+ "rewards/rejected": -0.06979051232337952,
445
+ "step": 290
446
+ },
447
+ {
448
+ "epoch": 1.24,
449
+ "learning_rate": 3.2618683001531396e-07,
450
+ "logits/chosen": -2.3978686332702637,
451
+ "logits/rejected": -2.3249762058258057,
452
+ "logps/chosen": -263.2580871582031,
453
+ "logps/rejected": -226.63101196289062,
454
+ "loss": 0.6169,
455
+ "rewards/accuracies": 0.706250011920929,
456
+ "rewards/chosen": 0.14139556884765625,
457
+ "rewards/margins": 0.1872793436050415,
458
+ "rewards/rejected": -0.04588378220796585,
459
+ "step": 300
460
+ },
461
+ {
462
+ "epoch": 1.28,
463
+ "learning_rate": 3.1852986217457885e-07,
464
+ "logits/chosen": -2.4475905895233154,
465
+ "logits/rejected": -2.363696575164795,
466
+ "logps/chosen": -262.8795471191406,
467
+ "logps/rejected": -227.06930541992188,
468
+ "loss": 0.619,
469
+ "rewards/accuracies": 0.7203124761581421,
470
+ "rewards/chosen": 0.15051104128360748,
471
+ "rewards/margins": 0.1968255192041397,
472
+ "rewards/rejected": -0.04631447046995163,
473
+ "step": 310
474
+ },
475
+ {
476
+ "epoch": 1.32,
477
+ "learning_rate": 3.108728943338438e-07,
478
+ "logits/chosen": -2.438253164291382,
479
+ "logits/rejected": -2.421774387359619,
480
+ "logps/chosen": -253.95639038085938,
481
+ "logps/rejected": -236.4005584716797,
482
+ "loss": 0.6208,
483
+ "rewards/accuracies": 0.65625,
484
+ "rewards/chosen": 0.13959024846553802,
485
+ "rewards/margins": 0.1706792563199997,
486
+ "rewards/rejected": -0.031088998541235924,
487
+ "step": 320
488
+ },
489
+ {
490
+ "epoch": 1.36,
491
+ "learning_rate": 3.0321592649310873e-07,
492
+ "logits/chosen": -2.4183051586151123,
493
+ "logits/rejected": -2.339179039001465,
494
+ "logps/chosen": -254.692626953125,
495
+ "logps/rejected": -222.8722381591797,
496
+ "loss": 0.6141,
497
+ "rewards/accuracies": 0.699999988079071,
498
+ "rewards/chosen": 0.1566968411207199,
499
+ "rewards/margins": 0.19593168795108795,
500
+ "rewards/rejected": -0.03923482820391655,
501
+ "step": 330
502
+ },
503
+ {
504
+ "epoch": 1.4,
505
+ "learning_rate": 2.955589586523736e-07,
506
+ "logits/chosen": -2.4377613067626953,
507
+ "logits/rejected": -2.366072177886963,
508
+ "logps/chosen": -267.422607421875,
509
+ "logps/rejected": -224.8284149169922,
510
+ "loss": 0.6188,
511
+ "rewards/accuracies": 0.6890624761581421,
512
+ "rewards/chosen": 0.14487192034721375,
513
+ "rewards/margins": 0.2071552723646164,
514
+ "rewards/rejected": -0.06228336691856384,
515
+ "step": 340
516
+ },
517
+ {
518
+ "epoch": 1.45,
519
+ "learning_rate": 2.8790199081163856e-07,
520
+ "logits/chosen": -2.435045003890991,
521
+ "logits/rejected": -2.3581719398498535,
522
+ "logps/chosen": -267.0567626953125,
523
+ "logps/rejected": -223.5650177001953,
524
+ "loss": 0.6136,
525
+ "rewards/accuracies": 0.6890624761581421,
526
+ "rewards/chosen": 0.1428806185722351,
527
+ "rewards/margins": 0.20400968194007874,
528
+ "rewards/rejected": -0.06112906336784363,
529
+ "step": 350
530
+ },
531
+ {
532
+ "epoch": 1.49,
533
+ "learning_rate": 2.802450229709035e-07,
534
+ "logits/chosen": -2.3934175968170166,
535
+ "logits/rejected": -2.3315463066101074,
536
+ "logps/chosen": -256.1235656738281,
537
+ "logps/rejected": -211.8139190673828,
538
+ "loss": 0.6093,
539
+ "rewards/accuracies": 0.6937500238418579,
540
+ "rewards/chosen": 0.14465923607349396,
541
+ "rewards/margins": 0.20695683360099792,
542
+ "rewards/rejected": -0.06229761987924576,
543
+ "step": 360
544
+ },
545
+ {
546
+ "epoch": 1.53,
547
+ "learning_rate": 2.725880551301684e-07,
548
+ "logits/chosen": -2.4487271308898926,
549
+ "logits/rejected": -2.3933699131011963,
550
+ "logps/chosen": -275.8642578125,
551
+ "logps/rejected": -231.2057342529297,
552
+ "loss": 0.6129,
553
+ "rewards/accuracies": 0.692187488079071,
554
+ "rewards/chosen": 0.14190760254859924,
555
+ "rewards/margins": 0.22471928596496582,
556
+ "rewards/rejected": -0.08281168341636658,
557
+ "step": 370
558
+ },
559
+ {
560
+ "epoch": 1.57,
561
+ "learning_rate": 2.649310872894334e-07,
562
+ "logits/chosen": -2.4377894401550293,
563
+ "logits/rejected": -2.406348466873169,
564
+ "logps/chosen": -276.6672058105469,
565
+ "logps/rejected": -221.1224365234375,
566
+ "loss": 0.603,
567
+ "rewards/accuracies": 0.7093750238418579,
568
+ "rewards/chosen": 0.15706932544708252,
569
+ "rewards/margins": 0.24053001403808594,
570
+ "rewards/rejected": -0.08346069604158401,
571
+ "step": 380
572
+ },
573
+ {
574
+ "epoch": 1.61,
575
+ "learning_rate": 2.572741194486983e-07,
576
+ "logits/chosen": -2.4386019706726074,
577
+ "logits/rejected": -2.3754172325134277,
578
+ "logps/chosen": -274.6893005371094,
579
+ "logps/rejected": -206.1323699951172,
580
+ "loss": 0.6004,
581
+ "rewards/accuracies": 0.715624988079071,
582
+ "rewards/chosen": 0.167318657040596,
583
+ "rewards/margins": 0.25797349214553833,
584
+ "rewards/rejected": -0.09065485745668411,
585
+ "step": 390
586
+ },
587
+ {
588
+ "epoch": 1.65,
589
+ "learning_rate": 2.496171516079632e-07,
590
+ "logits/chosen": -2.4488420486450195,
591
+ "logits/rejected": -2.3803772926330566,
592
+ "logps/chosen": -294.0111389160156,
593
+ "logps/rejected": -237.24282836914062,
594
+ "loss": 0.6033,
595
+ "rewards/accuracies": 0.714062511920929,
596
+ "rewards/chosen": 0.17194847762584686,
597
+ "rewards/margins": 0.2693827748298645,
598
+ "rewards/rejected": -0.09743430465459824,
599
+ "step": 400
600
+ },
601
+ {
602
+ "epoch": 1.69,
603
+ "learning_rate": 2.4196018376722816e-07,
604
+ "logits/chosen": -2.413655996322632,
605
+ "logits/rejected": -2.38279390335083,
606
+ "logps/chosen": -279.74090576171875,
607
+ "logps/rejected": -226.3095245361328,
608
+ "loss": 0.6081,
609
+ "rewards/accuracies": 0.6953125,
610
+ "rewards/chosen": 0.16821043193340302,
611
+ "rewards/margins": 0.26299187541007996,
612
+ "rewards/rejected": -0.09478144347667694,
613
+ "step": 410
614
+ },
615
+ {
616
+ "epoch": 1.74,
617
+ "learning_rate": 2.343032159264931e-07,
618
+ "logits/chosen": -2.390028476715088,
619
+ "logits/rejected": -2.3162546157836914,
620
+ "logps/chosen": -270.34320068359375,
621
+ "logps/rejected": -221.6541290283203,
622
+ "loss": 0.6019,
623
+ "rewards/accuracies": 0.721875011920929,
624
+ "rewards/chosen": 0.17296920716762543,
625
+ "rewards/margins": 0.2894430458545685,
626
+ "rewards/rejected": -0.11647380888462067,
627
+ "step": 420
628
+ },
629
+ {
630
+ "epoch": 1.78,
631
+ "learning_rate": 2.26646248085758e-07,
632
+ "logits/chosen": -2.3680365085601807,
633
+ "logits/rejected": -2.3160338401794434,
634
+ "logps/chosen": -260.14593505859375,
635
+ "logps/rejected": -214.6702423095703,
636
+ "loss": 0.5951,
637
+ "rewards/accuracies": 0.721875011920929,
638
+ "rewards/chosen": 0.16743981838226318,
639
+ "rewards/margins": 0.2915259897708893,
640
+ "rewards/rejected": -0.1240861564874649,
641
+ "step": 430
642
+ },
643
+ {
644
+ "epoch": 1.82,
645
+ "learning_rate": 2.1898928024502298e-07,
646
+ "logits/chosen": -2.443164825439453,
647
+ "logits/rejected": -2.3816096782684326,
648
+ "logps/chosen": -264.55914306640625,
649
+ "logps/rejected": -219.47341918945312,
650
+ "loss": 0.6012,
651
+ "rewards/accuracies": 0.675000011920929,
652
+ "rewards/chosen": 0.15291035175323486,
653
+ "rewards/margins": 0.2545214593410492,
654
+ "rewards/rejected": -0.10161112248897552,
655
+ "step": 440
656
+ },
657
+ {
658
+ "epoch": 1.86,
659
+ "learning_rate": 2.113323124042879e-07,
660
+ "logits/chosen": -2.416532039642334,
661
+ "logits/rejected": -2.3757832050323486,
662
+ "logps/chosen": -272.0744934082031,
663
+ "logps/rejected": -228.9375,
664
+ "loss": 0.5995,
665
+ "rewards/accuracies": 0.698437511920929,
666
+ "rewards/chosen": 0.17259187996387482,
667
+ "rewards/margins": 0.2831020951271057,
668
+ "rewards/rejected": -0.1105102151632309,
669
+ "step": 450
670
+ },
671
+ {
672
+ "epoch": 1.9,
673
+ "learning_rate": 2.036753445635528e-07,
674
+ "logits/chosen": -2.4263906478881836,
675
+ "logits/rejected": -2.4025325775146484,
676
+ "logps/chosen": -284.97576904296875,
677
+ "logps/rejected": -235.3527069091797,
678
+ "loss": 0.5904,
679
+ "rewards/accuracies": 0.706250011920929,
680
+ "rewards/chosen": 0.16448193788528442,
681
+ "rewards/margins": 0.32010418176651,
682
+ "rewards/rejected": -0.1556222140789032,
683
+ "step": 460
684
+ },
685
+ {
686
+ "epoch": 1.94,
687
+ "learning_rate": 1.9601837672281775e-07,
688
+ "logits/chosen": -2.402890920639038,
689
+ "logits/rejected": -2.3698153495788574,
690
+ "logps/chosen": -271.0093688964844,
691
+ "logps/rejected": -234.02481079101562,
692
+ "loss": 0.5976,
693
+ "rewards/accuracies": 0.7250000238418579,
694
+ "rewards/chosen": 0.14684638381004333,
695
+ "rewards/margins": 0.2707447409629822,
696
+ "rewards/rejected": -0.12389836460351944,
697
+ "step": 470
698
+ },
699
+ {
700
+ "epoch": 1.98,
701
+ "learning_rate": 1.883614088820827e-07,
702
+ "logits/chosen": -2.425776958465576,
703
+ "logits/rejected": -2.371450901031494,
704
+ "logps/chosen": -269.1894226074219,
705
+ "logps/rejected": -222.6921844482422,
706
+ "loss": 0.6013,
707
+ "rewards/accuracies": 0.6968749761581421,
708
+ "rewards/chosen": 0.1555326133966446,
709
+ "rewards/margins": 0.28726813197135925,
710
+ "rewards/rejected": -0.13173556327819824,
711
+ "step": 480
712
+ },
713
+ {
714
+ "epoch": 2.0,
715
+ "eval_logits/chosen": -2.09140682220459,
716
+ "eval_logits/rejected": -1.9723255634307861,
717
+ "eval_logps/chosen": -266.0958557128906,
718
+ "eval_logps/rejected": -221.3338165283203,
719
+ "eval_loss": 0.598257839679718,
720
+ "eval_rewards/accuracies": 0.6759999990463257,
721
+ "eval_rewards/chosen": 0.14844101667404175,
722
+ "eval_rewards/margins": 0.28188496828079224,
723
+ "eval_rewards/rejected": -0.1334439367055893,
724
+ "eval_runtime": 171.7034,
725
+ "eval_samples_per_second": 11.648,
726
+ "eval_steps_per_second": 0.728,
727
+ "step": 484
728
+ },
729
+ {
730
+ "epoch": 2.02,
731
+ "learning_rate": 1.807044410413476e-07,
732
+ "logits/chosen": -2.3953592777252197,
733
+ "logits/rejected": -2.363999843597412,
734
+ "logps/chosen": -258.8542175292969,
735
+ "logps/rejected": -233.6983642578125,
736
+ "loss": 0.5992,
737
+ "rewards/accuracies": 0.7171875238418579,
738
+ "rewards/chosen": 0.1408676654100418,
739
+ "rewards/margins": 0.27908438444137573,
740
+ "rewards/rejected": -0.13821670413017273,
741
+ "step": 490
742
+ },
743
+ {
744
+ "epoch": 2.07,
745
+ "learning_rate": 1.7304747320061255e-07,
746
+ "logits/chosen": -2.3902556896209717,
747
+ "logits/rejected": -2.3884072303771973,
748
+ "logps/chosen": -265.0683288574219,
749
+ "logps/rejected": -228.423828125,
750
+ "loss": 0.5955,
751
+ "rewards/accuracies": 0.684374988079071,
752
+ "rewards/chosen": 0.17755301296710968,
753
+ "rewards/margins": 0.3005513548851013,
754
+ "rewards/rejected": -0.12299831956624985,
755
+ "step": 500
756
+ },
757
+ {
758
+ "epoch": 2.11,
759
+ "learning_rate": 1.6539050535987747e-07,
760
+ "logits/chosen": -2.473872661590576,
761
+ "logits/rejected": -2.3617444038391113,
762
+ "logps/chosen": -270.9250793457031,
763
+ "logps/rejected": -222.18264770507812,
764
+ "loss": 0.5912,
765
+ "rewards/accuracies": 0.714062511920929,
766
+ "rewards/chosen": 0.16197429597377777,
767
+ "rewards/margins": 0.3233007788658142,
768
+ "rewards/rejected": -0.16132643818855286,
769
+ "step": 510
770
+ },
771
+ {
772
+ "epoch": 2.15,
773
+ "learning_rate": 1.5773353751914243e-07,
774
+ "logits/chosen": -2.4702870845794678,
775
+ "logits/rejected": -2.4053397178649902,
776
+ "logps/chosen": -285.6563720703125,
777
+ "logps/rejected": -227.668212890625,
778
+ "loss": 0.5944,
779
+ "rewards/accuracies": 0.7046874761581421,
780
+ "rewards/chosen": 0.1673307716846466,
781
+ "rewards/margins": 0.3238461911678314,
782
+ "rewards/rejected": -0.15651538968086243,
783
+ "step": 520
784
+ },
785
+ {
786
+ "epoch": 2.19,
787
+ "learning_rate": 1.5007656967840735e-07,
788
+ "logits/chosen": -2.381838798522949,
789
+ "logits/rejected": -2.3299927711486816,
790
+ "logps/chosen": -254.9144287109375,
791
+ "logps/rejected": -228.2997589111328,
792
+ "loss": 0.5935,
793
+ "rewards/accuracies": 0.721875011920929,
794
+ "rewards/chosen": 0.16740316152572632,
795
+ "rewards/margins": 0.29852622747421265,
796
+ "rewards/rejected": -0.13112305104732513,
797
+ "step": 530
798
+ },
799
+ {
800
+ "epoch": 2.23,
801
+ "learning_rate": 1.4241960183767226e-07,
802
+ "logits/chosen": -2.4169716835021973,
803
+ "logits/rejected": -2.3592681884765625,
804
+ "logps/chosen": -283.3446350097656,
805
+ "logps/rejected": -233.24679565429688,
806
+ "loss": 0.5918,
807
+ "rewards/accuracies": 0.684374988079071,
808
+ "rewards/chosen": 0.1610698103904724,
809
+ "rewards/margins": 0.3080851435661316,
810
+ "rewards/rejected": -0.1470153033733368,
811
+ "step": 540
812
+ },
813
+ {
814
+ "epoch": 2.27,
815
+ "learning_rate": 1.347626339969372e-07,
816
+ "logits/chosen": -2.408930540084839,
817
+ "logits/rejected": -2.37349009513855,
818
+ "logps/chosen": -271.91973876953125,
819
+ "logps/rejected": -240.18679809570312,
820
+ "loss": 0.5949,
821
+ "rewards/accuracies": 0.692187488079071,
822
+ "rewards/chosen": 0.17847296595573425,
823
+ "rewards/margins": 0.29651308059692383,
824
+ "rewards/rejected": -0.118040069937706,
825
+ "step": 550
826
+ },
827
+ {
828
+ "epoch": 2.31,
829
+ "learning_rate": 1.2710566615620215e-07,
830
+ "logits/chosen": -2.367671489715576,
831
+ "logits/rejected": -2.320443868637085,
832
+ "logps/chosen": -275.58294677734375,
833
+ "logps/rejected": -222.78842163085938,
834
+ "loss": 0.5862,
835
+ "rewards/accuracies": 0.7109375,
836
+ "rewards/chosen": 0.17732994258403778,
837
+ "rewards/margins": 0.3325561583042145,
838
+ "rewards/rejected": -0.1552262306213379,
839
+ "step": 560
840
+ },
841
+ {
842
+ "epoch": 2.35,
843
+ "learning_rate": 1.1944869831546706e-07,
844
+ "logits/chosen": -2.388644218444824,
845
+ "logits/rejected": -2.3782949447631836,
846
+ "logps/chosen": -268.08172607421875,
847
+ "logps/rejected": -236.77163696289062,
848
+ "loss": 0.5938,
849
+ "rewards/accuracies": 0.668749988079071,
850
+ "rewards/chosen": 0.1551700383424759,
851
+ "rewards/margins": 0.3083476424217224,
852
+ "rewards/rejected": -0.15317757427692413,
853
+ "step": 570
854
+ },
855
+ {
856
+ "epoch": 2.4,
857
+ "learning_rate": 1.11791730474732e-07,
858
+ "logits/chosen": -2.398094654083252,
859
+ "logits/rejected": -2.3552727699279785,
860
+ "logps/chosen": -263.4698181152344,
861
+ "logps/rejected": -223.05538940429688,
862
+ "loss": 0.5874,
863
+ "rewards/accuracies": 0.715624988079071,
864
+ "rewards/chosen": 0.16049396991729736,
865
+ "rewards/margins": 0.35638219118118286,
866
+ "rewards/rejected": -0.1958882361650467,
867
+ "step": 580
868
+ },
869
+ {
870
+ "epoch": 2.44,
871
+ "learning_rate": 1.0413476263399694e-07,
872
+ "logits/chosen": -2.4469425678253174,
873
+ "logits/rejected": -2.392503261566162,
874
+ "logps/chosen": -270.57635498046875,
875
+ "logps/rejected": -216.8152618408203,
876
+ "loss": 0.5878,
877
+ "rewards/accuracies": 0.7109375,
878
+ "rewards/chosen": 0.15162338316440582,
879
+ "rewards/margins": 0.3167043924331665,
880
+ "rewards/rejected": -0.1650809943675995,
881
+ "step": 590
882
+ },
883
+ {
884
+ "epoch": 2.48,
885
+ "learning_rate": 9.647779479326186e-08,
886
+ "logits/chosen": -2.3848066329956055,
887
+ "logits/rejected": -2.3738114833831787,
888
+ "logps/chosen": -251.4600372314453,
889
+ "logps/rejected": -222.6741180419922,
890
+ "loss": 0.5932,
891
+ "rewards/accuracies": 0.690625011920929,
892
+ "rewards/chosen": 0.16963472962379456,
893
+ "rewards/margins": 0.2706124782562256,
894
+ "rewards/rejected": -0.10097774118185043,
895
+ "step": 600
896
+ },
897
+ {
898
+ "epoch": 2.52,
899
+ "learning_rate": 8.88208269525268e-08,
900
+ "logits/chosen": -2.377875804901123,
901
+ "logits/rejected": -2.374070644378662,
902
+ "logps/chosen": -261.51788330078125,
903
+ "logps/rejected": -221.13034057617188,
904
+ "loss": 0.5909,
905
+ "rewards/accuracies": 0.690625011920929,
906
+ "rewards/chosen": 0.1429089605808258,
907
+ "rewards/margins": 0.29002851247787476,
908
+ "rewards/rejected": -0.14711955189704895,
909
+ "step": 610
910
+ },
911
+ {
912
+ "epoch": 2.56,
913
+ "learning_rate": 8.116385911179173e-08,
914
+ "logits/chosen": -2.3656888008117676,
915
+ "logits/rejected": -2.3523547649383545,
916
+ "logps/chosen": -281.01617431640625,
917
+ "logps/rejected": -217.3480682373047,
918
+ "loss": 0.5924,
919
+ "rewards/accuracies": 0.7109375,
920
+ "rewards/chosen": 0.17781397700309753,
921
+ "rewards/margins": 0.3453444540500641,
922
+ "rewards/rejected": -0.16753047704696655,
923
+ "step": 620
924
+ },
925
+ {
926
+ "epoch": 2.6,
927
+ "learning_rate": 7.350689127105667e-08,
928
+ "logits/chosen": -2.4372496604919434,
929
+ "logits/rejected": -2.3591084480285645,
930
+ "logps/chosen": -273.20965576171875,
931
+ "logps/rejected": -229.5889434814453,
932
+ "loss": 0.5833,
933
+ "rewards/accuracies": 0.6890624761581421,
934
+ "rewards/chosen": 0.17219647765159607,
935
+ "rewards/margins": 0.35740959644317627,
936
+ "rewards/rejected": -0.185213103890419,
937
+ "step": 630
938
+ },
939
+ {
940
+ "epoch": 2.64,
941
+ "learning_rate": 6.584992343032159e-08,
942
+ "logits/chosen": -2.351811647415161,
943
+ "logits/rejected": -2.3350961208343506,
944
+ "logps/chosen": -263.6435241699219,
945
+ "logps/rejected": -226.4728240966797,
946
+ "loss": 0.5973,
947
+ "rewards/accuracies": 0.682812511920929,
948
+ "rewards/chosen": 0.15416522324085236,
949
+ "rewards/margins": 0.29506421089172363,
950
+ "rewards/rejected": -0.14089898765087128,
951
+ "step": 640
952
+ },
953
+ {
954
+ "epoch": 2.69,
955
+ "learning_rate": 5.819295558958652e-08,
956
+ "logits/chosen": -2.390491008758545,
957
+ "logits/rejected": -2.3410990238189697,
958
+ "logps/chosen": -289.5431823730469,
959
+ "logps/rejected": -219.86788940429688,
960
+ "loss": 0.5824,
961
+ "rewards/accuracies": 0.7328125238418579,
962
+ "rewards/chosen": 0.18201851844787598,
963
+ "rewards/margins": 0.38538652658462524,
964
+ "rewards/rejected": -0.20336803793907166,
965
+ "step": 650
966
+ },
967
+ {
968
+ "epoch": 2.73,
969
+ "learning_rate": 5.0535987748851455e-08,
970
+ "logits/chosen": -2.449439525604248,
971
+ "logits/rejected": -2.3970067501068115,
972
+ "logps/chosen": -276.04620361328125,
973
+ "logps/rejected": -229.76620483398438,
974
+ "loss": 0.5751,
975
+ "rewards/accuracies": 0.7093750238418579,
976
+ "rewards/chosen": 0.15049387514591217,
977
+ "rewards/margins": 0.32289570569992065,
978
+ "rewards/rejected": -0.17240183055400848,
979
+ "step": 660
980
+ },
981
+ {
982
+ "epoch": 2.77,
983
+ "learning_rate": 4.287901990811638e-08,
984
+ "logits/chosen": -2.436511993408203,
985
+ "logits/rejected": -2.341296672821045,
986
+ "logps/chosen": -269.1939392089844,
987
+ "logps/rejected": -242.32968139648438,
988
+ "loss": 0.583,
989
+ "rewards/accuracies": 0.699999988079071,
990
+ "rewards/chosen": 0.16247320175170898,
991
+ "rewards/margins": 0.3472925126552582,
992
+ "rewards/rejected": -0.1848193258047104,
993
+ "step": 670
994
+ },
995
+ {
996
+ "epoch": 2.81,
997
+ "learning_rate": 3.522205206738132e-08,
998
+ "logits/chosen": -2.3986496925354004,
999
+ "logits/rejected": -2.3604884147644043,
1000
+ "logps/chosen": -276.03436279296875,
1001
+ "logps/rejected": -227.72573852539062,
1002
+ "loss": 0.5863,
1003
+ "rewards/accuracies": 0.7124999761581421,
1004
+ "rewards/chosen": 0.16009405255317688,
1005
+ "rewards/margins": 0.34014397859573364,
1006
+ "rewards/rejected": -0.18004995584487915,
1007
+ "step": 680
1008
+ },
1009
+ {
1010
+ "epoch": 2.85,
1011
+ "learning_rate": 2.7565084226646246e-08,
1012
+ "logits/chosen": -2.375347137451172,
1013
+ "logits/rejected": -2.3558340072631836,
1014
+ "logps/chosen": -265.17486572265625,
1015
+ "logps/rejected": -234.3466796875,
1016
+ "loss": 0.5896,
1017
+ "rewards/accuracies": 0.721875011920929,
1018
+ "rewards/chosen": 0.16287991404533386,
1019
+ "rewards/margins": 0.35990676283836365,
1020
+ "rewards/rejected": -0.19702686369419098,
1021
+ "step": 690
1022
+ },
1023
+ {
1024
+ "epoch": 2.89,
1025
+ "learning_rate": 1.9908116385911178e-08,
1026
+ "logits/chosen": -2.4065492153167725,
1027
+ "logits/rejected": -2.36779522895813,
1028
+ "logps/chosen": -261.1010437011719,
1029
+ "logps/rejected": -224.9178009033203,
1030
+ "loss": 0.5892,
1031
+ "rewards/accuracies": 0.6546875238418579,
1032
+ "rewards/chosen": 0.15390679240226746,
1033
+ "rewards/margins": 0.2883544862270355,
1034
+ "rewards/rejected": -0.13444769382476807,
1035
+ "step": 700
1036
+ },
1037
+ {
1038
+ "epoch": 2.93,
1039
+ "learning_rate": 1.225114854517611e-08,
1040
+ "logits/chosen": -2.433964252471924,
1041
+ "logits/rejected": -2.3655142784118652,
1042
+ "logps/chosen": -281.37139892578125,
1043
+ "logps/rejected": -219.88296508789062,
1044
+ "loss": 0.5838,
1045
+ "rewards/accuracies": 0.7093750238418579,
1046
+ "rewards/chosen": 0.1487404853105545,
1047
+ "rewards/margins": 0.35009315609931946,
1048
+ "rewards/rejected": -0.20135268568992615,
1049
+ "step": 710
1050
+ },
1051
+ {
1052
+ "epoch": 2.97,
1053
+ "learning_rate": 4.594180704441042e-09,
1054
+ "logits/chosen": -2.3966562747955322,
1055
+ "logits/rejected": -2.368652820587158,
1056
+ "logps/chosen": -258.764892578125,
1057
+ "logps/rejected": -223.00405883789062,
1058
+ "loss": 0.5889,
1059
+ "rewards/accuracies": 0.707812488079071,
1060
+ "rewards/chosen": 0.14300301671028137,
1061
+ "rewards/margins": 0.3152904212474823,
1062
+ "rewards/rejected": -0.17228738963603973,
1063
+ "step": 720
1064
+ },
1065
+ {
1066
+ "epoch": 3.0,
1067
+ "eval_logits/chosen": -2.085902214050293,
1068
+ "eval_logits/rejected": -1.9675065279006958,
1069
+ "eval_logps/chosen": -266.14141845703125,
1070
+ "eval_logps/rejected": -221.83285522460938,
1071
+ "eval_loss": 0.5895045399665833,
1072
+ "eval_rewards/accuracies": 0.6880000233650208,
1073
+ "eval_rewards/chosen": 0.14388784766197205,
1074
+ "eval_rewards/margins": 0.3272360563278198,
1075
+ "eval_rewards/rejected": -0.1833481788635254,
1076
+ "eval_runtime": 170.0,
1077
+ "eval_samples_per_second": 11.765,
1078
+ "eval_steps_per_second": 0.735,
1079
+ "step": 726
1080
+ },
1081
+ {
1082
+ "epoch": 3.0,
1083
+ "step": 726,
1084
+ "total_flos": 0.0,
1085
+ "train_loss": 0.622471270236102,
1086
+ "train_runtime": 20371.8366,
1087
+ "train_samples_per_second": 9.125,
1088
+ "train_steps_per_second": 0.036
1089
+ }
1090
+ ],
1091
+ "logging_steps": 10,
1092
+ "max_steps": 726,
1093
+ "num_train_epochs": 3,
1094
+ "save_steps": 500,
1095
+ "total_flos": 0.0,
1096
+ "trial_name": null,
1097
+ "trial_params": null
1098
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3755299ac3d7fee80db1f1444dda220c2fca58cd01a89e4b8cf67c82f95bb3d6
3
+ size 4728