sambar commited on
Commit
3d5dc29
·
1 Parent(s): a7b7cbd

Model save

Browse files
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-v0.1
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: sambar-7b-dpo-lora
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # sambar-7b-dpo-lora
15
+
16
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.5747
19
+ - Rewards/chosen: -0.0141
20
+ - Rewards/rejected: -0.4147
21
+ - Rewards/accuracies: 0.7060
22
+ - Rewards/margins: 0.4006
23
+ - Logps/rejected: -221.3069
24
+ - Logps/chosen: -263.0773
25
+ - Logits/rejected: -2.1478
26
+ - Logits/chosen: -2.2594
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 2
47
+ - eval_batch_size: 4
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 4
51
+ - gradient_accumulation_steps: 32
52
+ - total_train_batch_size: 256
53
+ - total_eval_batch_size: 16
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: linear
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 3
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6213 | 1.0 | 242 | 0.6182 | 0.0426 | -0.1569 | 0.6860 | 0.1995 | -218.7293 | -262.5110 | -2.1605 | -2.2727 |
64
+ | 0.5903 | 2.0 | 484 | 0.5826 | 0.0046 | -0.3500 | 0.6940 | 0.3546 | -220.6603 | -262.8906 | -2.1517 | -2.2634 |
65
+ | 0.5743 | 3.0 | 726 | 0.5747 | -0.0141 | -0.4147 | 0.7060 | 0.4006 | -221.3069 | -263.0773 | -2.1478 | -2.2594 |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.35.0
71
+ - Pytorch 2.1.2+cu121
72
+ - Datasets 2.14.6
73
+ - Tokenizers 0.14.1
adapter_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 16,
12
+ "lora_dropout": 0.1,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 64,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "q_proj",
20
+ "o_proj",
21
+ "v_proj",
22
+ "k_proj"
23
+ ],
24
+ "task_type": "CAUSAL_LM"
25
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8213aa18221d9621f8acf465fae81941547afc8f863ba01eb0c3b90f44c7a05
3
+ size 218138576
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.259361505508423,
4
+ "eval_logits/rejected": -2.14780330657959,
5
+ "eval_logps/chosen": -263.0772705078125,
6
+ "eval_logps/rejected": -221.30685424804688,
7
+ "eval_loss": 0.5746620893478394,
8
+ "eval_rewards/accuracies": 0.7059999704360962,
9
+ "eval_rewards/chosen": -0.014065464027225971,
10
+ "eval_rewards/margins": 0.4006173312664032,
11
+ "eval_rewards/rejected": -0.41468286514282227,
12
+ "eval_runtime": 237.4099,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 8.424,
15
+ "eval_steps_per_second": 0.527,
16
+ "train_loss": 0.6110922341996973,
17
+ "train_runtime": 33040.6759,
18
+ "train_samples": 61966,
19
+ "train_samples_per_second": 5.626,
20
+ "train_steps_per_second": 0.022
21
+ }
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 14336,
12
+ "max_position_embeddings": 32768,
13
+ "model_type": "mistral",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_theta": 10000.0,
19
+ "sliding_window": 4096,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.35.0",
23
+ "use_cache": true,
24
+ "vocab_size": 32000
25
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.259361505508423,
4
+ "eval_logits/rejected": -2.14780330657959,
5
+ "eval_logps/chosen": -263.0772705078125,
6
+ "eval_logps/rejected": -221.30685424804688,
7
+ "eval_loss": 0.5746620893478394,
8
+ "eval_rewards/accuracies": 0.7059999704360962,
9
+ "eval_rewards/chosen": -0.014065464027225971,
10
+ "eval_rewards/margins": 0.4006173312664032,
11
+ "eval_rewards/rejected": -0.41468286514282227,
12
+ "eval_runtime": 237.4099,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 8.424,
15
+ "eval_steps_per_second": 0.527
16
+ }
runs/Jan02_22-07-01_node-0/events.out.tfevents.1704262093.node-0.91323.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d52f905ed0ff120b4f4826276a1830bd910e302635f52a86a19a3d94e4e483a
3
+ size 53035
runs/Jan02_22-07-01_node-0/events.out.tfevents.1704295373.node-0.91323.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74a2baa2923367618c442e4eaf1575001facd4f31cdff37bdef496979b24fd87
3
+ size 828
runs/Jan03_09-50-50_node-0/events.out.tfevents.1704304326.node-0.177510.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe5aff3b80f7b55c3e54be8fa5588a36a71264ef0c1ce8ad9090daa2c016ea1
3
+ size 53035
runs/Jan03_09-50-50_node-0/events.out.tfevents.1704337604.node-0.177510.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c90567cbfcdfa37c1ab8a67d8a7f63c072d58f918c1efedce700e1f1ed7cb62e
3
+ size 828
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "bos_token": "<s>",
30
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": true,
34
+ "model_max_length": 2048,
35
+ "pad_token": "</s>",
36
+ "sp_model_kwargs": {},
37
+ "spaces_between_special_tokens": false,
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.6110922341996973,
4
+ "train_runtime": 33040.6759,
5
+ "train_samples": 61966,
6
+ "train_samples_per_second": 5.626,
7
+ "train_steps_per_second": 0.022
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1098 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9992254066615027,
5
+ "eval_steps": 100,
6
+ "global_step": 726,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 6.84931506849315e-09,
14
+ "logits/chosen": -2.445258378982544,
15
+ "logits/rejected": -2.482508659362793,
16
+ "logps/chosen": -270.6954040527344,
17
+ "logps/rejected": -206.76272583007812,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.04,
27
+ "learning_rate": 6.84931506849315e-08,
28
+ "logits/chosen": -2.507073163986206,
29
+ "logits/rejected": -2.4275779724121094,
30
+ "logps/chosen": -291.61688232421875,
31
+ "logps/rejected": -224.1806182861328,
32
+ "loss": 0.6936,
33
+ "rewards/accuracies": 0.4322916567325592,
34
+ "rewards/chosen": 0.0008433780749328434,
35
+ "rewards/margins": -0.0009758697124198079,
36
+ "rewards/rejected": 0.0018192478455603123,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.08,
41
+ "learning_rate": 1.36986301369863e-07,
42
+ "logits/chosen": -2.517071008682251,
43
+ "logits/rejected": -2.4758286476135254,
44
+ "logps/chosen": -276.9480285644531,
45
+ "logps/rejected": -214.7642822265625,
46
+ "loss": 0.6931,
47
+ "rewards/accuracies": 0.49687498807907104,
48
+ "rewards/chosen": -0.00105036492459476,
49
+ "rewards/margins": 0.001187985180877149,
50
+ "rewards/rejected": -0.0022383497562259436,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "learning_rate": 2.054794520547945e-07,
56
+ "logits/chosen": -2.481997013092041,
57
+ "logits/rejected": -2.4269680976867676,
58
+ "logps/chosen": -251.0941162109375,
59
+ "logps/rejected": -205.1868438720703,
60
+ "loss": 0.6928,
61
+ "rewards/accuracies": 0.4781250059604645,
62
+ "rewards/chosen": 0.0005288544343784451,
63
+ "rewards/margins": 0.0002331261057406664,
64
+ "rewards/rejected": 0.0002957289107143879,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.17,
69
+ "learning_rate": 2.73972602739726e-07,
70
+ "logits/chosen": -2.548166036605835,
71
+ "logits/rejected": -2.4914188385009766,
72
+ "logps/chosen": -281.9214782714844,
73
+ "logps/rejected": -214.442626953125,
74
+ "loss": 0.6916,
75
+ "rewards/accuracies": 0.520312488079071,
76
+ "rewards/chosen": 0.002253689104691148,
77
+ "rewards/margins": 0.003704611212015152,
78
+ "rewards/rejected": -0.0014509217580780387,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.21,
83
+ "learning_rate": 3.424657534246575e-07,
84
+ "logits/chosen": -2.5232255458831787,
85
+ "logits/rejected": -2.47584867477417,
86
+ "logps/chosen": -265.45989990234375,
87
+ "logps/rejected": -221.6902313232422,
88
+ "loss": 0.6911,
89
+ "rewards/accuracies": 0.5218750238418579,
90
+ "rewards/chosen": -0.0004125732812099159,
91
+ "rewards/margins": 0.001857149414718151,
92
+ "rewards/rejected": -0.0022697225213050842,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.25,
97
+ "learning_rate": 4.10958904109589e-07,
98
+ "logits/chosen": -2.503446578979492,
99
+ "logits/rejected": -2.4641902446746826,
100
+ "logps/chosen": -265.10125732421875,
101
+ "logps/rejected": -212.9536590576172,
102
+ "loss": 0.6885,
103
+ "rewards/accuracies": 0.5765625238418579,
104
+ "rewards/chosen": 0.0057233949191868305,
105
+ "rewards/margins": 0.011815843172371387,
106
+ "rewards/rejected": -0.006092446856200695,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.29,
111
+ "learning_rate": 4.794520547945205e-07,
112
+ "logits/chosen": -2.4819159507751465,
113
+ "logits/rejected": -2.4763991832733154,
114
+ "logps/chosen": -252.2348175048828,
115
+ "logps/rejected": -212.4091339111328,
116
+ "loss": 0.6854,
117
+ "rewards/accuracies": 0.582812488079071,
118
+ "rewards/chosen": 0.007895523682236671,
119
+ "rewards/margins": 0.018240801990032196,
120
+ "rewards/rejected": -0.01034527737647295,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.33,
125
+ "learning_rate": 4.946401225114854e-07,
126
+ "logits/chosen": -2.5163493156433105,
127
+ "logits/rejected": -2.4587182998657227,
128
+ "logps/chosen": -263.52581787109375,
129
+ "logps/rejected": -216.7571258544922,
130
+ "loss": 0.6829,
131
+ "rewards/accuracies": 0.606249988079071,
132
+ "rewards/chosen": 0.01336698792874813,
133
+ "rewards/margins": 0.022466376423835754,
134
+ "rewards/rejected": -0.009099386632442474,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.37,
139
+ "learning_rate": 4.869831546707504e-07,
140
+ "logits/chosen": -2.559368848800659,
141
+ "logits/rejected": -2.5,
142
+ "logps/chosen": -269.92730712890625,
143
+ "logps/rejected": -222.235107421875,
144
+ "loss": 0.6783,
145
+ "rewards/accuracies": 0.6390625238418579,
146
+ "rewards/chosen": 0.019632169976830482,
147
+ "rewards/margins": 0.03590407967567444,
148
+ "rewards/rejected": -0.016271911561489105,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.41,
153
+ "learning_rate": 4.793261868300153e-07,
154
+ "logits/chosen": -2.5028889179229736,
155
+ "logits/rejected": -2.4726669788360596,
156
+ "logps/chosen": -272.01385498046875,
157
+ "logps/rejected": -225.379150390625,
158
+ "loss": 0.6748,
159
+ "rewards/accuracies": 0.651562511920929,
160
+ "rewards/chosen": 0.024208087474107742,
161
+ "rewards/margins": 0.0442068949341774,
162
+ "rewards/rejected": -0.019998803734779358,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.45,
167
+ "learning_rate": 4.7166921898928023e-07,
168
+ "logits/chosen": -2.539785623550415,
169
+ "logits/rejected": -2.4715189933776855,
170
+ "logps/chosen": -250.6864776611328,
171
+ "logps/rejected": -220.31857299804688,
172
+ "loss": 0.6705,
173
+ "rewards/accuracies": 0.651562511920929,
174
+ "rewards/chosen": 0.018719878047704697,
175
+ "rewards/margins": 0.04420315474271774,
176
+ "rewards/rejected": -0.025483276695013046,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.5,
181
+ "learning_rate": 4.640122511485451e-07,
182
+ "logits/chosen": -2.5032382011413574,
183
+ "logits/rejected": -2.4787399768829346,
184
+ "logps/chosen": -255.07101440429688,
185
+ "logps/rejected": -217.56332397460938,
186
+ "loss": 0.6669,
187
+ "rewards/accuracies": 0.651562511920929,
188
+ "rewards/chosen": 0.02690967358648777,
189
+ "rewards/margins": 0.0577348992228508,
190
+ "rewards/rejected": -0.030825233086943626,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.54,
195
+ "learning_rate": 4.563552833078101e-07,
196
+ "logits/chosen": -2.5442593097686768,
197
+ "logits/rejected": -2.4793853759765625,
198
+ "logps/chosen": -264.15234375,
199
+ "logps/rejected": -223.4934844970703,
200
+ "loss": 0.6589,
201
+ "rewards/accuracies": 0.690625011920929,
202
+ "rewards/chosen": 0.029823919758200645,
203
+ "rewards/margins": 0.07180732488632202,
204
+ "rewards/rejected": -0.041983410716056824,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.58,
209
+ "learning_rate": 4.4869831546707505e-07,
210
+ "logits/chosen": -2.5460541248321533,
211
+ "logits/rejected": -2.4870338439941406,
212
+ "logps/chosen": -268.638427734375,
213
+ "logps/rejected": -227.9025421142578,
214
+ "loss": 0.6579,
215
+ "rewards/accuracies": 0.6734374761581421,
216
+ "rewards/chosen": 0.041604138910770416,
217
+ "rewards/margins": 0.09325676411390305,
218
+ "rewards/rejected": -0.05165262892842293,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.62,
223
+ "learning_rate": 4.4104134762633994e-07,
224
+ "logits/chosen": -2.5670578479766846,
225
+ "logits/rejected": -2.5104963779449463,
226
+ "logps/chosen": -264.26611328125,
227
+ "logps/rejected": -224.1782684326172,
228
+ "loss": 0.6536,
229
+ "rewards/accuracies": 0.692187488079071,
230
+ "rewards/chosen": 0.03908708691596985,
231
+ "rewards/margins": 0.09105747193098068,
232
+ "rewards/rejected": -0.05197037383913994,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.66,
237
+ "learning_rate": 4.333843797856049e-07,
238
+ "logits/chosen": -2.5320677757263184,
239
+ "logits/rejected": -2.4734749794006348,
240
+ "logps/chosen": -277.51861572265625,
241
+ "logps/rejected": -236.5709991455078,
242
+ "loss": 0.6459,
243
+ "rewards/accuracies": 0.6875,
244
+ "rewards/chosen": 0.04750001057982445,
245
+ "rewards/margins": 0.1084158793091774,
246
+ "rewards/rejected": -0.06091586500406265,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.7,
251
+ "learning_rate": 4.257274119448698e-07,
252
+ "logits/chosen": -2.565639019012451,
253
+ "logits/rejected": -2.530691146850586,
254
+ "logps/chosen": -268.9267578125,
255
+ "logps/rejected": -236.61929321289062,
256
+ "loss": 0.6453,
257
+ "rewards/accuracies": 0.6703125238418579,
258
+ "rewards/chosen": 0.03782298043370247,
259
+ "rewards/margins": 0.10148061811923981,
260
+ "rewards/rejected": -0.06365764141082764,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.74,
265
+ "learning_rate": 4.180704441041347e-07,
266
+ "logits/chosen": -2.522090196609497,
267
+ "logits/rejected": -2.470818519592285,
268
+ "logps/chosen": -254.6784210205078,
269
+ "logps/rejected": -223.4536590576172,
270
+ "loss": 0.6455,
271
+ "rewards/accuracies": 0.668749988079071,
272
+ "rewards/chosen": 0.03428062051534653,
273
+ "rewards/margins": 0.1071067601442337,
274
+ "rewards/rejected": -0.07282613217830658,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.78,
279
+ "learning_rate": 4.1041347626339966e-07,
280
+ "logits/chosen": -2.5339770317077637,
281
+ "logits/rejected": -2.485583543777466,
282
+ "logps/chosen": -259.6706237792969,
283
+ "logps/rejected": -210.58071899414062,
284
+ "loss": 0.6367,
285
+ "rewards/accuracies": 0.6703125238418579,
286
+ "rewards/chosen": 0.04104261472821236,
287
+ "rewards/margins": 0.13799390196800232,
288
+ "rewards/rejected": -0.09695132076740265,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.83,
293
+ "learning_rate": 4.027565084226646e-07,
294
+ "logits/chosen": -2.559802770614624,
295
+ "logits/rejected": -2.4930777549743652,
296
+ "logps/chosen": -260.19580078125,
297
+ "logps/rejected": -215.35610961914062,
298
+ "loss": 0.6291,
299
+ "rewards/accuracies": 0.7093750238418579,
300
+ "rewards/chosen": 0.04894023388624191,
301
+ "rewards/margins": 0.15646891295909882,
302
+ "rewards/rejected": -0.1075286716222763,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.87,
307
+ "learning_rate": 3.9509954058192954e-07,
308
+ "logits/chosen": -2.5613999366760254,
309
+ "logits/rejected": -2.517932653427124,
310
+ "logps/chosen": -281.9465026855469,
311
+ "logps/rejected": -228.8275909423828,
312
+ "loss": 0.6252,
313
+ "rewards/accuracies": 0.6796875,
314
+ "rewards/chosen": 0.05419199541211128,
315
+ "rewards/margins": 0.1751035749912262,
316
+ "rewards/rejected": -0.12091157585382462,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.91,
321
+ "learning_rate": 3.874425727411945e-07,
322
+ "logits/chosen": -2.5146591663360596,
323
+ "logits/rejected": -2.488956928253174,
324
+ "logps/chosen": -274.73712158203125,
325
+ "logps/rejected": -226.3198699951172,
326
+ "loss": 0.6242,
327
+ "rewards/accuracies": 0.6859375238418579,
328
+ "rewards/chosen": 0.04880619794130325,
329
+ "rewards/margins": 0.178890660405159,
330
+ "rewards/rejected": -0.13008446991443634,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.95,
335
+ "learning_rate": 3.797856049004594e-07,
336
+ "logits/chosen": -2.5332489013671875,
337
+ "logits/rejected": -2.4790148735046387,
338
+ "logps/chosen": -262.056884765625,
339
+ "logps/rejected": -225.40811157226562,
340
+ "loss": 0.6235,
341
+ "rewards/accuracies": 0.675000011920929,
342
+ "rewards/chosen": 0.052485816180706024,
343
+ "rewards/margins": 0.17873048782348633,
344
+ "rewards/rejected": -0.1262446939945221,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.99,
349
+ "learning_rate": 3.7212863705972436e-07,
350
+ "logits/chosen": -2.5549776554107666,
351
+ "logits/rejected": -2.506992816925049,
352
+ "logps/chosen": -269.25286865234375,
353
+ "logps/rejected": -219.7020721435547,
354
+ "loss": 0.6213,
355
+ "rewards/accuracies": 0.706250011920929,
356
+ "rewards/chosen": 0.053668104112148285,
357
+ "rewards/margins": 0.18163269758224487,
358
+ "rewards/rejected": -0.1279645711183548,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 1.0,
363
+ "eval_logits/chosen": -2.2727341651916504,
364
+ "eval_logits/rejected": -2.160538911819458,
365
+ "eval_logps/chosen": -262.5110168457031,
366
+ "eval_logps/rejected": -218.72930908203125,
367
+ "eval_loss": 0.618248462677002,
368
+ "eval_rewards/accuracies": 0.6859999895095825,
369
+ "eval_rewards/chosen": 0.04256003722548485,
370
+ "eval_rewards/margins": 0.1994897723197937,
371
+ "eval_rewards/rejected": -0.15692974627017975,
372
+ "eval_runtime": 239.4377,
373
+ "eval_samples_per_second": 8.353,
374
+ "eval_steps_per_second": 0.522,
375
+ "step": 242
376
+ },
377
+ {
378
+ "epoch": 1.03,
379
+ "learning_rate": 3.6447166921898925e-07,
380
+ "logits/chosen": -2.489406108856201,
381
+ "logits/rejected": -2.4407076835632324,
382
+ "logps/chosen": -255.3499298095703,
383
+ "logps/rejected": -207.3684539794922,
384
+ "loss": 0.6244,
385
+ "rewards/accuracies": 0.6796875,
386
+ "rewards/chosen": 0.039003290235996246,
387
+ "rewards/margins": 0.18538300693035126,
388
+ "rewards/rejected": -0.1463797241449356,
389
+ "step": 250
390
+ },
391
+ {
392
+ "epoch": 1.07,
393
+ "learning_rate": 3.568147013782542e-07,
394
+ "logits/chosen": -2.511629581451416,
395
+ "logits/rejected": -2.4620814323425293,
396
+ "logps/chosen": -260.0677490234375,
397
+ "logps/rejected": -223.49169921875,
398
+ "loss": 0.6101,
399
+ "rewards/accuracies": 0.7281249761581421,
400
+ "rewards/chosen": 0.04925510287284851,
401
+ "rewards/margins": 0.21666565537452698,
402
+ "rewards/rejected": -0.16741053760051727,
403
+ "step": 260
404
+ },
405
+ {
406
+ "epoch": 1.12,
407
+ "learning_rate": 3.4915773353751913e-07,
408
+ "logits/chosen": -2.5199971199035645,
409
+ "logits/rejected": -2.4499754905700684,
410
+ "logps/chosen": -276.2830505371094,
411
+ "logps/rejected": -233.03201293945312,
412
+ "loss": 0.6031,
413
+ "rewards/accuracies": 0.765625,
414
+ "rewards/chosen": 0.06275217235088348,
415
+ "rewards/margins": 0.2755245864391327,
416
+ "rewards/rejected": -0.2127724140882492,
417
+ "step": 270
418
+ },
419
+ {
420
+ "epoch": 1.16,
421
+ "learning_rate": 3.41500765696784e-07,
422
+ "logits/chosen": -2.527909517288208,
423
+ "logits/rejected": -2.4822728633880615,
424
+ "logps/chosen": -249.1805419921875,
425
+ "logps/rejected": -222.91024780273438,
426
+ "loss": 0.6109,
427
+ "rewards/accuracies": 0.6937500238418579,
428
+ "rewards/chosen": 0.04066196084022522,
429
+ "rewards/margins": 0.20734688639640808,
430
+ "rewards/rejected": -0.16668489575386047,
431
+ "step": 280
432
+ },
433
+ {
434
+ "epoch": 1.2,
435
+ "learning_rate": 3.33843797856049e-07,
436
+ "logits/chosen": -2.506227731704712,
437
+ "logits/rejected": -2.483896493911743,
438
+ "logps/chosen": -281.0603332519531,
439
+ "logps/rejected": -225.3281707763672,
440
+ "loss": 0.6033,
441
+ "rewards/accuracies": 0.729687511920929,
442
+ "rewards/chosen": 0.04976457357406616,
443
+ "rewards/margins": 0.2599830627441406,
444
+ "rewards/rejected": -0.21021847426891327,
445
+ "step": 290
446
+ },
447
+ {
448
+ "epoch": 1.24,
449
+ "learning_rate": 3.2618683001531396e-07,
450
+ "logits/chosen": -2.4969980716705322,
451
+ "logits/rejected": -2.4238204956054688,
452
+ "logps/chosen": -259.1755065917969,
453
+ "logps/rejected": -224.92581176757812,
454
+ "loss": 0.5988,
455
+ "rewards/accuracies": 0.6875,
456
+ "rewards/chosen": 0.0504862442612648,
457
+ "rewards/margins": 0.24086256325244904,
458
+ "rewards/rejected": -0.19037629663944244,
459
+ "step": 300
460
+ },
461
+ {
462
+ "epoch": 1.28,
463
+ "learning_rate": 3.1852986217457885e-07,
464
+ "logits/chosen": -2.5485026836395264,
465
+ "logits/rejected": -2.4636754989624023,
466
+ "logps/chosen": -259.2119140625,
467
+ "logps/rejected": -225.5343017578125,
468
+ "loss": 0.6058,
469
+ "rewards/accuracies": 0.690625011920929,
470
+ "rewards/chosen": 0.04171649366617203,
471
+ "rewards/margins": 0.2371227741241455,
472
+ "rewards/rejected": -0.19540627300739288,
473
+ "step": 310
474
+ },
475
+ {
476
+ "epoch": 1.32,
477
+ "learning_rate": 3.108728943338438e-07,
478
+ "logits/chosen": -2.5411689281463623,
479
+ "logits/rejected": -2.5233051776885986,
480
+ "logps/chosen": -250.4091796875,
481
+ "logps/rejected": -234.99911499023438,
482
+ "loss": 0.6045,
483
+ "rewards/accuracies": 0.6656249761581421,
484
+ "rewards/chosen": 0.029940366744995117,
485
+ "rewards/margins": 0.20695392787456512,
486
+ "rewards/rejected": -0.17701356112957,
487
+ "step": 320
488
+ },
489
+ {
490
+ "epoch": 1.36,
491
+ "learning_rate": 3.0321592649310873e-07,
492
+ "logits/chosen": -2.518437385559082,
493
+ "logits/rejected": -2.4455406665802,
494
+ "logps/chosen": -251.207275390625,
495
+ "logps/rejected": -221.4502410888672,
496
+ "loss": 0.5953,
497
+ "rewards/accuracies": 0.703125,
498
+ "rewards/chosen": 0.05027127265930176,
499
+ "rewards/margins": 0.2602222263813019,
500
+ "rewards/rejected": -0.20995095372200012,
501
+ "step": 330
502
+ },
503
+ {
504
+ "epoch": 1.4,
505
+ "learning_rate": 2.955589586523736e-07,
506
+ "logits/chosen": -2.536722183227539,
507
+ "logits/rejected": -2.4708170890808105,
508
+ "logps/chosen": -263.5827941894531,
509
+ "logps/rejected": -223.34375,
510
+ "loss": 0.6006,
511
+ "rewards/accuracies": 0.7124999761581421,
512
+ "rewards/chosen": 0.0343378521502018,
513
+ "rewards/margins": 0.2729324400424957,
514
+ "rewards/rejected": -0.23859457671642303,
515
+ "step": 340
516
+ },
517
+ {
518
+ "epoch": 1.45,
519
+ "learning_rate": 2.8790199081163856e-07,
520
+ "logits/chosen": -2.5344901084899902,
521
+ "logits/rejected": -2.457329511642456,
522
+ "logps/chosen": -263.54827880859375,
523
+ "logps/rejected": -222.186767578125,
524
+ "loss": 0.6012,
525
+ "rewards/accuracies": 0.692187488079071,
526
+ "rewards/chosen": 0.02179412916302681,
527
+ "rewards/margins": 0.24006810784339905,
528
+ "rewards/rejected": -0.21827396750450134,
529
+ "step": 350
530
+ },
531
+ {
532
+ "epoch": 1.49,
533
+ "learning_rate": 2.802450229709035e-07,
534
+ "logits/chosen": -2.499368190765381,
535
+ "logits/rejected": -2.4326324462890625,
536
+ "logps/chosen": -252.73818969726562,
537
+ "logps/rejected": -210.35055541992188,
538
+ "loss": 0.5947,
539
+ "rewards/accuracies": 0.6890624761581421,
540
+ "rewards/chosen": 0.017355820164084435,
541
+ "rewards/margins": 0.2538844645023346,
542
+ "rewards/rejected": -0.236528679728508,
543
+ "step": 360
544
+ },
545
+ {
546
+ "epoch": 1.53,
547
+ "learning_rate": 2.725880551301684e-07,
548
+ "logits/chosen": -2.5512120723724365,
549
+ "logits/rejected": -2.491529941558838,
550
+ "logps/chosen": -272.677978515625,
551
+ "logps/rejected": -230.0671844482422,
552
+ "loss": 0.6009,
553
+ "rewards/accuracies": 0.6812499761581421,
554
+ "rewards/chosen": 0.02877696417272091,
555
+ "rewards/margins": 0.27019625902175903,
556
+ "rewards/rejected": -0.24141927063465118,
557
+ "step": 370
558
+ },
559
+ {
560
+ "epoch": 1.57,
561
+ "learning_rate": 2.649310872894334e-07,
562
+ "logits/chosen": -2.5440518856048584,
563
+ "logits/rejected": -2.498530149459839,
564
+ "logps/chosen": -272.7989196777344,
565
+ "logps/rejected": -219.9661865234375,
566
+ "loss": 0.5936,
567
+ "rewards/accuracies": 0.6937500238418579,
568
+ "rewards/chosen": 0.0328015573322773,
569
+ "rewards/margins": 0.30769142508506775,
570
+ "rewards/rejected": -0.27488988637924194,
571
+ "step": 380
572
+ },
573
+ {
574
+ "epoch": 1.61,
575
+ "learning_rate": 2.572741194486983e-07,
576
+ "logits/chosen": -2.5445454120635986,
577
+ "logits/rejected": -2.478445529937744,
578
+ "logps/chosen": -271.42205810546875,
579
+ "logps/rejected": -205.12210083007812,
580
+ "loss": 0.5876,
581
+ "rewards/accuracies": 0.703125,
582
+ "rewards/chosen": 0.02636413648724556,
583
+ "rewards/margins": 0.3068538010120392,
584
+ "rewards/rejected": -0.28048965334892273,
585
+ "step": 390
586
+ },
587
+ {
588
+ "epoch": 1.65,
589
+ "learning_rate": 2.496171516079632e-07,
590
+ "logits/chosen": -2.565645217895508,
591
+ "logits/rejected": -2.490952491760254,
592
+ "logps/chosen": -290.6482849121094,
593
+ "logps/rejected": -236.2904510498047,
594
+ "loss": 0.5884,
595
+ "rewards/accuracies": 0.721875011920929,
596
+ "rewards/chosen": 0.05599096417427063,
597
+ "rewards/margins": 0.34153956174850464,
598
+ "rewards/rejected": -0.2855486273765564,
599
+ "step": 400
600
+ },
601
+ {
602
+ "epoch": 1.69,
603
+ "learning_rate": 2.4196018376722816e-07,
604
+ "logits/chosen": -2.52652645111084,
605
+ "logits/rejected": -2.4913227558135986,
606
+ "logps/chosen": -276.25018310546875,
607
+ "logps/rejected": -225.22946166992188,
608
+ "loss": 0.5948,
609
+ "rewards/accuracies": 0.682812511920929,
610
+ "rewards/chosen": 0.0320248007774353,
611
+ "rewards/margins": 0.31556203961372375,
612
+ "rewards/rejected": -0.28353720903396606,
613
+ "step": 410
614
+ },
615
+ {
616
+ "epoch": 1.74,
617
+ "learning_rate": 2.343032159264931e-07,
618
+ "logits/chosen": -2.500330686569214,
619
+ "logits/rejected": -2.426058530807495,
620
+ "logps/chosen": -266.7002868652344,
621
+ "logps/rejected": -220.94320678710938,
622
+ "loss": 0.5856,
623
+ "rewards/accuracies": 0.723437488079071,
624
+ "rewards/chosen": 0.03841843083500862,
625
+ "rewards/margins": 0.3689562678337097,
626
+ "rewards/rejected": -0.3305378556251526,
627
+ "step": 420
628
+ },
629
+ {
630
+ "epoch": 1.78,
631
+ "learning_rate": 2.26646248085758e-07,
632
+ "logits/chosen": -2.489319324493408,
633
+ "logits/rejected": -2.429903745651245,
634
+ "logps/chosen": -257.18353271484375,
635
+ "logps/rejected": -214.43701171875,
636
+ "loss": 0.5798,
637
+ "rewards/accuracies": 0.731249988079071,
638
+ "rewards/chosen": 0.027863550931215286,
639
+ "rewards/margins": 0.3603667616844177,
640
+ "rewards/rejected": -0.3325032591819763,
641
+ "step": 430
642
+ },
643
+ {
644
+ "epoch": 1.82,
645
+ "learning_rate": 2.1898928024502298e-07,
646
+ "logits/chosen": -2.546457290649414,
647
+ "logits/rejected": -2.479651927947998,
648
+ "logps/chosen": -261.133544921875,
649
+ "logps/rejected": -218.47610473632812,
650
+ "loss": 0.5824,
651
+ "rewards/accuracies": 0.698437511920929,
652
+ "rewards/chosen": 0.03679219260811806,
653
+ "rewards/margins": 0.3415088653564453,
654
+ "rewards/rejected": -0.30471667647361755,
655
+ "step": 440
656
+ },
657
+ {
658
+ "epoch": 1.86,
659
+ "learning_rate": 2.113323124042879e-07,
660
+ "logits/chosen": -2.5231552124023438,
661
+ "logits/rejected": -2.479872465133667,
662
+ "logps/chosen": -268.78851318359375,
663
+ "logps/rejected": -228.0364990234375,
664
+ "loss": 0.5827,
665
+ "rewards/accuracies": 0.7203124761581421,
666
+ "rewards/chosen": 0.044063158333301544,
667
+ "rewards/margins": 0.36794179677963257,
668
+ "rewards/rejected": -0.32387861609458923,
669
+ "step": 450
670
+ },
671
+ {
672
+ "epoch": 1.9,
673
+ "learning_rate": 2.036753445635528e-07,
674
+ "logits/chosen": -2.530987024307251,
675
+ "logits/rejected": -2.5036771297454834,
676
+ "logps/chosen": -281.70513916015625,
677
+ "logps/rejected": -234.73934936523438,
678
+ "loss": 0.5788,
679
+ "rewards/accuracies": 0.6968749761581421,
680
+ "rewards/chosen": 0.032568901777267456,
681
+ "rewards/margins": 0.3853657841682434,
682
+ "rewards/rejected": -0.35279688239097595,
683
+ "step": 460
684
+ },
685
+ {
686
+ "epoch": 1.94,
687
+ "learning_rate": 1.9601837672281775e-07,
688
+ "logits/chosen": -2.5145978927612305,
689
+ "logits/rejected": -2.474416971206665,
690
+ "logps/chosen": -267.7714538574219,
691
+ "logps/rejected": -233.2850341796875,
692
+ "loss": 0.5832,
693
+ "rewards/accuracies": 0.7328125238418579,
694
+ "rewards/chosen": 0.012133514508605003,
695
+ "rewards/margins": 0.3285755217075348,
696
+ "rewards/rejected": -0.31644195318222046,
697
+ "step": 470
698
+ },
699
+ {
700
+ "epoch": 1.98,
701
+ "learning_rate": 1.883614088820827e-07,
702
+ "logits/chosen": -2.534013032913208,
703
+ "logits/rejected": -2.479665517807007,
704
+ "logps/chosen": -266.11285400390625,
705
+ "logps/rejected": -221.65478515625,
706
+ "loss": 0.5903,
707
+ "rewards/accuracies": 0.721875011920929,
708
+ "rewards/chosen": 0.015116107650101185,
709
+ "rewards/margins": 0.34223589301109314,
710
+ "rewards/rejected": -0.32711976766586304,
711
+ "step": 480
712
+ },
713
+ {
714
+ "epoch": 2.0,
715
+ "eval_logits/chosen": -2.2634127140045166,
716
+ "eval_logits/rejected": -2.151700496673584,
717
+ "eval_logps/chosen": -262.890625,
718
+ "eval_logps/rejected": -220.66033935546875,
719
+ "eval_loss": 0.5826324224472046,
720
+ "eval_rewards/accuracies": 0.6940000057220459,
721
+ "eval_rewards/chosen": 0.004602876491844654,
722
+ "eval_rewards/margins": 0.35463690757751465,
723
+ "eval_rewards/rejected": -0.3500339984893799,
724
+ "eval_runtime": 238.0856,
725
+ "eval_samples_per_second": 8.4,
726
+ "eval_steps_per_second": 0.525,
727
+ "step": 484
728
+ },
729
+ {
730
+ "epoch": 2.02,
731
+ "learning_rate": 1.807044410413476e-07,
732
+ "logits/chosen": -2.509183645248413,
733
+ "logits/rejected": -2.474013566970825,
734
+ "logps/chosen": -255.69241333007812,
735
+ "logps/rejected": -233.1560516357422,
736
+ "loss": 0.586,
737
+ "rewards/accuracies": 0.6968749761581421,
738
+ "rewards/chosen": 0.010258705355226994,
739
+ "rewards/margins": 0.35224205255508423,
740
+ "rewards/rejected": -0.3419833481311798,
741
+ "step": 490
742
+ },
743
+ {
744
+ "epoch": 2.07,
745
+ "learning_rate": 1.7304747320061255e-07,
746
+ "logits/chosen": -2.5044076442718506,
747
+ "logits/rejected": -2.4872841835021973,
748
+ "logps/chosen": -261.4200134277344,
749
+ "logps/rejected": -227.45669555664062,
750
+ "loss": 0.5865,
751
+ "rewards/accuracies": 0.703125,
752
+ "rewards/chosen": 0.03717802092432976,
753
+ "rewards/margins": 0.35018208622932434,
754
+ "rewards/rejected": -0.3130040466785431,
755
+ "step": 500
756
+ },
757
+ {
758
+ "epoch": 2.11,
759
+ "learning_rate": 1.6539050535987747e-07,
760
+ "logits/chosen": -2.573312759399414,
761
+ "logits/rejected": -2.4606995582580566,
762
+ "logps/chosen": -267.6290588378906,
763
+ "logps/rejected": -221.90316772460938,
764
+ "loss": 0.5773,
765
+ "rewards/accuracies": 0.721875011920929,
766
+ "rewards/chosen": 0.02148844487965107,
767
+ "rewards/margins": 0.38462623953819275,
768
+ "rewards/rejected": -0.36313778162002563,
769
+ "step": 510
770
+ },
771
+ {
772
+ "epoch": 2.15,
773
+ "learning_rate": 1.5773353751914243e-07,
774
+ "logits/chosen": -2.583714723587036,
775
+ "logits/rejected": -2.502439022064209,
776
+ "logps/chosen": -282.7274475097656,
777
+ "logps/rejected": -226.8433074951172,
778
+ "loss": 0.5783,
779
+ "rewards/accuracies": 0.731249988079071,
780
+ "rewards/chosen": 0.0283358097076416,
781
+ "rewards/margins": 0.40725621581077576,
782
+ "rewards/rejected": -0.37892037630081177,
783
+ "step": 520
784
+ },
785
+ {
786
+ "epoch": 2.19,
787
+ "learning_rate": 1.5007656967840735e-07,
788
+ "logits/chosen": -2.4836316108703613,
789
+ "logits/rejected": -2.4393324851989746,
790
+ "logps/chosen": -251.7774200439453,
791
+ "logps/rejected": -227.4025421142578,
792
+ "loss": 0.5791,
793
+ "rewards/accuracies": 0.715624988079071,
794
+ "rewards/chosen": 0.011759527027606964,
795
+ "rewards/margins": 0.36558085680007935,
796
+ "rewards/rejected": -0.353821337223053,
797
+ "step": 530
798
+ },
799
+ {
800
+ "epoch": 2.23,
801
+ "learning_rate": 1.4241960183767226e-07,
802
+ "logits/chosen": -2.5330307483673096,
803
+ "logits/rejected": -2.468540668487549,
804
+ "logps/chosen": -280.36077880859375,
805
+ "logps/rejected": -232.63845825195312,
806
+ "loss": 0.5811,
807
+ "rewards/accuracies": 0.690625011920929,
808
+ "rewards/chosen": 0.0011851644376292825,
809
+ "rewards/margins": 0.37196025252342224,
810
+ "rewards/rejected": -0.37077510356903076,
811
+ "step": 540
812
+ },
813
+ {
814
+ "epoch": 2.27,
815
+ "learning_rate": 1.347626339969372e-07,
816
+ "logits/chosen": -2.519770383834839,
817
+ "logits/rejected": -2.4823544025421143,
818
+ "logps/chosen": -268.5936584472656,
819
+ "logps/rejected": -239.03897094726562,
820
+ "loss": 0.5826,
821
+ "rewards/accuracies": 0.703125,
822
+ "rewards/chosen": 0.04637747257947922,
823
+ "rewards/margins": 0.3646572530269623,
824
+ "rewards/rejected": -0.31827980279922485,
825
+ "step": 550
826
+ },
827
+ {
828
+ "epoch": 2.31,
829
+ "learning_rate": 1.2710566615620215e-07,
830
+ "logits/chosen": -2.47658109664917,
831
+ "logits/rejected": -2.4247078895568848,
832
+ "logps/chosen": -272.190673828125,
833
+ "logps/rejected": -221.92849731445312,
834
+ "loss": 0.5687,
835
+ "rewards/accuracies": 0.7109375,
836
+ "rewards/chosen": 0.01268075406551361,
837
+ "rewards/margins": 0.39336925745010376,
838
+ "rewards/rejected": -0.38068851828575134,
839
+ "step": 560
840
+ },
841
+ {
842
+ "epoch": 2.35,
843
+ "learning_rate": 1.1944869831546706e-07,
844
+ "logits/chosen": -2.49946928024292,
845
+ "logits/rejected": -2.470533847808838,
846
+ "logps/chosen": -264.8629455566406,
847
+ "logps/rejected": -235.60354614257812,
848
+ "loss": 0.5808,
849
+ "rewards/accuracies": 0.692187488079071,
850
+ "rewards/chosen": -0.01222451962530613,
851
+ "rewards/margins": 0.35588544607162476,
852
+ "rewards/rejected": -0.36810994148254395,
853
+ "step": 570
854
+ },
855
+ {
856
+ "epoch": 2.4,
857
+ "learning_rate": 1.11791730474732e-07,
858
+ "logits/chosen": -2.512359619140625,
859
+ "logits/rejected": -2.4669761657714844,
860
+ "logps/chosen": -260.2524108886719,
861
+ "logps/rejected": -223.0822296142578,
862
+ "loss": 0.5741,
863
+ "rewards/accuracies": 0.7484375238418579,
864
+ "rewards/chosen": -6.574243161594495e-05,
865
+ "rewards/margins": 0.42497771978378296,
866
+ "rewards/rejected": -0.4250434935092926,
867
+ "step": 580
868
+ },
869
+ {
870
+ "epoch": 2.44,
871
+ "learning_rate": 1.0413476263399694e-07,
872
+ "logits/chosen": -2.5507304668426514,
873
+ "logits/rejected": -2.498631715774536,
874
+ "logps/chosen": -267.2727966308594,
875
+ "logps/rejected": -215.9676055908203,
876
+ "loss": 0.5743,
877
+ "rewards/accuracies": 0.7171875238418579,
878
+ "rewards/chosen": -0.014520371332764626,
879
+ "rewards/margins": 0.3767298758029938,
880
+ "rewards/rejected": -0.39125025272369385,
881
+ "step": 590
882
+ },
883
+ {
884
+ "epoch": 2.48,
885
+ "learning_rate": 9.647779479326186e-08,
886
+ "logits/chosen": -2.4919803142547607,
887
+ "logits/rejected": -2.4784021377563477,
888
+ "logps/chosen": -248.50048828125,
889
+ "logps/rejected": -221.4516143798828,
890
+ "loss": 0.5823,
891
+ "rewards/accuracies": 0.676562488079071,
892
+ "rewards/chosen": -0.003242678241804242,
893
+ "rewards/margins": 0.31586432456970215,
894
+ "rewards/rejected": -0.3191069960594177,
895
+ "step": 600
896
+ },
897
+ {
898
+ "epoch": 2.52,
899
+ "learning_rate": 8.88208269525268e-08,
900
+ "logits/chosen": -2.4886717796325684,
901
+ "logits/rejected": -2.4773054122924805,
902
+ "logps/chosen": -258.47509765625,
903
+ "logps/rejected": -220.34323120117188,
904
+ "loss": 0.5751,
905
+ "rewards/accuracies": 0.699999988079071,
906
+ "rewards/chosen": -0.01594376005232334,
907
+ "rewards/margins": 0.3604745864868164,
908
+ "rewards/rejected": -0.3764183521270752,
909
+ "step": 610
910
+ },
911
+ {
912
+ "epoch": 2.56,
913
+ "learning_rate": 8.116385911179173e-08,
914
+ "logits/chosen": -2.474463939666748,
915
+ "logits/rejected": -2.4515841007232666,
916
+ "logps/chosen": -277.49713134765625,
917
+ "logps/rejected": -216.4416046142578,
918
+ "loss": 0.5761,
919
+ "rewards/accuracies": 0.737500011920929,
920
+ "rewards/chosen": 0.03217107802629471,
921
+ "rewards/margins": 0.41897639632225037,
922
+ "rewards/rejected": -0.38680535554885864,
923
+ "step": 620
924
+ },
925
+ {
926
+ "epoch": 2.6,
927
+ "learning_rate": 7.350689127105667e-08,
928
+ "logits/chosen": -2.5482378005981445,
929
+ "logits/rejected": -2.4707536697387695,
930
+ "logps/chosen": -270.44488525390625,
931
+ "logps/rejected": -229.28286743164062,
932
+ "loss": 0.5673,
933
+ "rewards/accuracies": 0.698437511920929,
934
+ "rewards/chosen": 0.0038725235499441624,
935
+ "rewards/margins": 0.41428858041763306,
936
+ "rewards/rejected": -0.41041603684425354,
937
+ "step": 630
938
+ },
939
+ {
940
+ "epoch": 2.64,
941
+ "learning_rate": 6.584992343032159e-08,
942
+ "logits/chosen": -2.45513916015625,
943
+ "logits/rejected": -2.437708854675293,
944
+ "logps/chosen": -260.6726989746094,
945
+ "logps/rejected": -225.84298706054688,
946
+ "loss": 0.5806,
947
+ "rewards/accuracies": 0.682812511920929,
948
+ "rewards/chosen": 0.012735338881611824,
949
+ "rewards/margins": 0.38611000776290894,
950
+ "rewards/rejected": -0.3733746409416199,
951
+ "step": 640
952
+ },
953
+ {
954
+ "epoch": 2.69,
955
+ "learning_rate": 5.819295558958652e-08,
956
+ "logits/chosen": -2.4959685802459717,
957
+ "logits/rejected": -2.43827486038208,
958
+ "logps/chosen": -286.122802734375,
959
+ "logps/rejected": -219.0840606689453,
960
+ "loss": 0.5722,
961
+ "rewards/accuracies": 0.737500011920929,
962
+ "rewards/chosen": 0.03723993897438049,
963
+ "rewards/margins": 0.4596535563468933,
964
+ "rewards/rejected": -0.4224136471748352,
965
+ "step": 650
966
+ },
967
+ {
968
+ "epoch": 2.73,
969
+ "learning_rate": 5.0535987748851455e-08,
970
+ "logits/chosen": -2.55987811088562,
971
+ "logits/rejected": -2.5002965927124023,
972
+ "logps/chosen": -273.2501220703125,
973
+ "logps/rejected": -228.8253631591797,
974
+ "loss": 0.5661,
975
+ "rewards/accuracies": 0.7093750238418579,
976
+ "rewards/chosen": -0.019126426428556442,
977
+ "rewards/margins": 0.3617474436759949,
978
+ "rewards/rejected": -0.3808739185333252,
979
+ "step": 660
980
+ },
981
+ {
982
+ "epoch": 2.77,
983
+ "learning_rate": 4.287901990811638e-08,
984
+ "logits/chosen": -2.5399422645568848,
985
+ "logits/rejected": -2.4543564319610596,
986
+ "logps/chosen": -266.138427734375,
987
+ "logps/rejected": -241.64926147460938,
988
+ "loss": 0.5723,
989
+ "rewards/accuracies": 0.7281249761581421,
990
+ "rewards/chosen": 0.01595628634095192,
991
+ "rewards/margins": 0.42130351066589355,
992
+ "rewards/rejected": -0.40534719824790955,
993
+ "step": 670
994
+ },
995
+ {
996
+ "epoch": 2.81,
997
+ "learning_rate": 3.522205206738132e-08,
998
+ "logits/chosen": -2.5083065032958984,
999
+ "logits/rejected": -2.465390682220459,
1000
+ "logps/chosen": -272.95794677734375,
1001
+ "logps/rejected": -227.048828125,
1002
+ "loss": 0.5761,
1003
+ "rewards/accuracies": 0.6968749761581421,
1004
+ "rewards/chosen": -0.012082843109965324,
1005
+ "rewards/margins": 0.39171308279037476,
1006
+ "rewards/rejected": -0.40379589796066284,
1007
+ "step": 680
1008
+ },
1009
+ {
1010
+ "epoch": 2.85,
1011
+ "learning_rate": 2.7565084226646246e-08,
1012
+ "logits/chosen": -2.487924337387085,
1013
+ "logits/rejected": -2.4710183143615723,
1014
+ "logps/chosen": -262.0213317871094,
1015
+ "logps/rejected": -233.55136108398438,
1016
+ "loss": 0.5782,
1017
+ "rewards/accuracies": 0.7265625,
1018
+ "rewards/chosen": 0.0015483855968341231,
1019
+ "rewards/margins": 0.4110836088657379,
1020
+ "rewards/rejected": -0.4095352292060852,
1021
+ "step": 690
1022
+ },
1023
+ {
1024
+ "epoch": 2.89,
1025
+ "learning_rate": 1.9908116385911178e-08,
1026
+ "logits/chosen": -2.507427453994751,
1027
+ "logits/rejected": -2.48207688331604,
1028
+ "logps/chosen": -258.07647705078125,
1029
+ "logps/rejected": -224.3450927734375,
1030
+ "loss": 0.5765,
1031
+ "rewards/accuracies": 0.6703125238418579,
1032
+ "rewards/chosen": -0.017956208437681198,
1033
+ "rewards/margins": 0.3418615460395813,
1034
+ "rewards/rejected": -0.359817773103714,
1035
+ "step": 700
1036
+ },
1037
+ {
1038
+ "epoch": 2.93,
1039
+ "learning_rate": 1.225114854517611e-08,
1040
+ "logits/chosen": -2.5354719161987305,
1041
+ "logits/rejected": -2.4671473503112793,
1042
+ "logps/chosen": -278.4368896484375,
1043
+ "logps/rejected": -218.9778594970703,
1044
+ "loss": 0.5764,
1045
+ "rewards/accuracies": 0.715624988079071,
1046
+ "rewards/chosen": -0.004084877669811249,
1047
+ "rewards/margins": 0.3954611122608185,
1048
+ "rewards/rejected": -0.39954596757888794,
1049
+ "step": 710
1050
+ },
1051
+ {
1052
+ "epoch": 2.97,
1053
+ "learning_rate": 4.594180704441042e-09,
1054
+ "logits/chosen": -2.500247001647949,
1055
+ "logits/rejected": -2.4750142097473145,
1056
+ "logps/chosen": -255.7807159423828,
1057
+ "logps/rejected": -222.0048370361328,
1058
+ "loss": 0.5743,
1059
+ "rewards/accuracies": 0.703125,
1060
+ "rewards/chosen": -0.0047062961384654045,
1061
+ "rewards/margins": 0.3754323422908783,
1062
+ "rewards/rejected": -0.380138635635376,
1063
+ "step": 720
1064
+ },
1065
+ {
1066
+ "epoch": 3.0,
1067
+ "eval_logits/chosen": -2.259361505508423,
1068
+ "eval_logits/rejected": -2.14780330657959,
1069
+ "eval_logps/chosen": -263.0772705078125,
1070
+ "eval_logps/rejected": -221.30685424804688,
1071
+ "eval_loss": 0.5746620893478394,
1072
+ "eval_rewards/accuracies": 0.7059999704360962,
1073
+ "eval_rewards/chosen": -0.014065464027225971,
1074
+ "eval_rewards/margins": 0.4006173312664032,
1075
+ "eval_rewards/rejected": -0.41468286514282227,
1076
+ "eval_runtime": 237.7658,
1077
+ "eval_samples_per_second": 8.412,
1078
+ "eval_steps_per_second": 0.526,
1079
+ "step": 726
1080
+ },
1081
+ {
1082
+ "epoch": 3.0,
1083
+ "step": 726,
1084
+ "total_flos": 0.0,
1085
+ "train_loss": 0.6110922341996973,
1086
+ "train_runtime": 33040.6759,
1087
+ "train_samples_per_second": 5.626,
1088
+ "train_steps_per_second": 0.022
1089
+ }
1090
+ ],
1091
+ "logging_steps": 10,
1092
+ "max_steps": 726,
1093
+ "num_train_epochs": 3,
1094
+ "save_steps": 500,
1095
+ "total_flos": 0.0,
1096
+ "trial_name": null,
1097
+ "trial_params": null
1098
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d66eafa67fbcd51a04a2d80677de1f28016eb42571ccc6a98b6cb6997b977e32
3
+ size 4728