just1nseo commited on
Commit
79ce6ab
·
verified ·
1 Parent(s): 215ab56

Model save

Browse files
README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ library_name: peft
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ base_model: openbmb/Eurus-7b-sft
9
+ model-index:
10
+ - name: eurus-dpo-qlora-uf-ours-5e-7
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # eurus-dpo-qlora-uf-ours-5e-7
18
+
19
+ This model is a fine-tuned version of [openbmb/Eurus-7b-sft](https://huggingface.co/openbmb/Eurus-7b-sft) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.8251
22
+ - Rewards/chosen: -2.6509
23
+ - Rewards/rejected: -3.1193
24
+ - Rewards/accuracies: 0.5930
25
+ - Rewards/margins: 0.4684
26
+ - Rewards/margins Max: 3.5001
27
+ - Rewards/margins Min: -2.2741
28
+ - Rewards/margins Std: 1.9114
29
+ - Logps/rejected: -569.4471
30
+ - Logps/chosen: -539.9697
31
+ - Logits/rejected: -1.8277
32
+ - Logits/chosen: -1.9148
33
+
34
+ ## Model description
35
+
36
+ More information needed
37
+
38
+ ## Intended uses & limitations
39
+
40
+ More information needed
41
+
42
+ ## Training and evaluation data
43
+
44
+ More information needed
45
+
46
+ ## Training procedure
47
+
48
+ ### Training hyperparameters
49
+
50
+ The following hyperparameters were used during training:
51
+ - learning_rate: 5e-07
52
+ - train_batch_size: 4
53
+ - eval_batch_size: 8
54
+ - seed: 42
55
+ - distributed_type: multi-GPU
56
+ - num_devices: 2
57
+ - gradient_accumulation_steps: 2
58
+ - total_train_batch_size: 16
59
+ - total_eval_batch_size: 16
60
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
+ - lr_scheduler_type: cosine
62
+ - lr_scheduler_warmup_ratio: 0.1
63
+ - num_epochs: 3
64
+
65
+ ### Training results
66
+
67
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Rewards/margins Max | Rewards/margins Min | Rewards/margins Std | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
68
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:-------------------:|:-------------------:|:-------------------:|:--------------:|:------------:|:---------------:|:-------------:|
69
+ | 0.6787 | 0.28 | 100 | 0.6902 | -0.0196 | -0.0275 | 0.6050 | 0.0078 | 0.0658 | -0.0439 | 0.0352 | -260.2682 | -276.8446 | -2.1835 | -2.3057 |
70
+ | 0.6038 | 0.56 | 200 | 0.6829 | -0.2121 | -0.2562 | 0.5930 | 0.0440 | 0.4186 | -0.2883 | 0.2265 | -283.1364 | -296.0924 | -2.1563 | -2.2736 |
71
+ | 0.4746 | 0.85 | 300 | 0.7105 | -0.7773 | -0.8546 | 0.5660 | 0.0773 | 1.0401 | -0.8434 | 0.6093 | -342.9795 | -352.6140 | -2.0904 | -2.1991 |
72
+ | 0.4288 | 1.13 | 400 | 0.7566 | -1.3505 | -1.4749 | 0.5700 | 0.1245 | 1.6613 | -1.3515 | 0.9884 | -405.0142 | -409.9261 | -2.0237 | -2.1254 |
73
+ | 0.3807 | 1.41 | 500 | 0.7770 | -1.7690 | -1.9759 | 0.5760 | 0.2069 | 2.1466 | -1.6287 | 1.2537 | -455.1077 | -451.7817 | -1.9637 | -2.0584 |
74
+ | 0.3449 | 1.69 | 600 | 0.8093 | -2.3053 | -2.6236 | 0.5730 | 0.3183 | 2.7910 | -1.9845 | 1.5908 | -519.8788 | -505.4114 | -1.8829 | -1.9707 |
75
+ | 0.3253 | 1.97 | 700 | 0.8022 | -2.3688 | -2.7622 | 0.5900 | 0.3934 | 3.0600 | -2.0479 | 1.6969 | -533.7401 | -511.7566 | -1.8637 | -1.9524 |
76
+ | 0.2445 | 2.25 | 800 | 0.8262 | -2.6179 | -3.0584 | 0.5880 | 0.4405 | 3.3852 | -2.2378 | 1.8658 | -563.3621 | -536.6691 | -1.8329 | -1.9194 |
77
+ | 0.3015 | 2.54 | 900 | 0.8293 | -2.6774 | -3.1416 | 0.5930 | 0.4642 | 3.5043 | -2.2912 | 1.9184 | -571.6796 | -542.6185 | -1.8281 | -1.9147 |
78
+ | 0.2725 | 2.82 | 1000 | 0.8251 | -2.6509 | -3.1193 | 0.5930 | 0.4684 | 3.5001 | -2.2741 | 1.9114 | -569.4471 | -539.9697 | -1.8277 | -1.9148 |
79
+
80
+
81
+ ### Framework versions
82
+
83
+ - PEFT 0.7.1
84
+ - Transformers 4.39.0.dev0
85
+ - Pytorch 2.1.2+cu121
86
+ - Datasets 2.14.6
87
+ - Tokenizers 0.15.2
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed6bd3baef1520dab71bab42d90dc717b4a43f675825e4eb8369788a99749cca
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3069743f4f68e5e156c56059cf3ab89aa69b467d1185ff1a06b4c4fb5e5f2f
3
  size 671150064
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.4186206150502666,
4
+ "train_runtime": 20980.8825,
5
+ "train_samples": 5678,
6
+ "train_samples_per_second": 0.812,
7
+ "train_steps_per_second": 0.051
8
+ }
runs/Jul16_15-00-29_node25/events.out.tfevents.1721109642.node25.1416743.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b1ff8069a3fcb284d3978b5dbfcde3f5b5acd6d858d2885f84dbbedaafa6cf
3
- size 103256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2218acff401ab1091f48ff643166113dfad1733afd8009e8b374a7fb7049fbc2
3
+ size 108890
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.4186206150502666,
4
+ "train_runtime": 20980.8825,
5
+ "train_samples": 5678,
6
+ "train_samples_per_second": 0.812,
7
+ "train_steps_per_second": 0.051
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 100,
6
+ "global_step": 1065,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "grad_norm": 2.389887278304759,
14
+ "learning_rate": 4.6728971962616815e-09,
15
+ "logits/chosen": -2.4213736057281494,
16
+ "logits/rejected": -2.1724228858947754,
17
+ "logps/chosen": -311.7572021484375,
18
+ "logps/rejected": -242.86618041992188,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/margins_max": 0.0,
24
+ "rewards/margins_min": 0.0,
25
+ "rewards/margins_std": 0.0,
26
+ "rewards/rejected": 0.0,
27
+ "step": 1
28
+ },
29
+ {
30
+ "epoch": 0.03,
31
+ "grad_norm": 2.342772419823143,
32
+ "learning_rate": 4.672897196261682e-08,
33
+ "logits/chosen": -2.4386391639709473,
34
+ "logits/rejected": -2.2656712532043457,
35
+ "logps/chosen": -307.246826171875,
36
+ "logps/rejected": -312.6949157714844,
37
+ "loss": 0.6929,
38
+ "rewards/accuracies": 0.3333333432674408,
39
+ "rewards/chosen": 0.0003690579324029386,
40
+ "rewards/margins": 0.0005327555700205266,
41
+ "rewards/margins_max": 0.0024954795371741056,
42
+ "rewards/margins_min": -0.001210648100823164,
43
+ "rewards/margins_std": 0.001644498435780406,
44
+ "rewards/rejected": -0.0001636976667214185,
45
+ "step": 10
46
+ },
47
+ {
48
+ "epoch": 0.06,
49
+ "grad_norm": 2.708891346495448,
50
+ "learning_rate": 9.345794392523364e-08,
51
+ "logits/chosen": -2.376209259033203,
52
+ "logits/rejected": -2.2393646240234375,
53
+ "logps/chosen": -266.4408874511719,
54
+ "logps/rejected": -249.82864379882812,
55
+ "loss": 0.6932,
56
+ "rewards/accuracies": 0.4625000059604645,
57
+ "rewards/chosen": -4.360840466688387e-05,
58
+ "rewards/margins": -0.0001249962078873068,
59
+ "rewards/margins_max": 0.0032942264806479216,
60
+ "rewards/margins_min": -0.0032514198683202267,
61
+ "rewards/margins_std": 0.0029180804267525673,
62
+ "rewards/rejected": 8.138775592669845e-05,
63
+ "step": 20
64
+ },
65
+ {
66
+ "epoch": 0.08,
67
+ "grad_norm": 3.3723295030980913,
68
+ "learning_rate": 1.4018691588785045e-07,
69
+ "logits/chosen": -2.4367473125457764,
70
+ "logits/rejected": -2.2316088676452637,
71
+ "logps/chosen": -277.2991638183594,
72
+ "logps/rejected": -269.08251953125,
73
+ "loss": 0.6927,
74
+ "rewards/accuracies": 0.6625000238418579,
75
+ "rewards/chosen": 0.0005795572651550174,
76
+ "rewards/margins": 0.001557953073643148,
77
+ "rewards/margins_max": 0.005679330788552761,
78
+ "rewards/margins_min": -0.0020338702015578747,
79
+ "rewards/margins_std": 0.003446707036346197,
80
+ "rewards/rejected": -0.0009783958084881306,
81
+ "step": 30
82
+ },
83
+ {
84
+ "epoch": 0.11,
85
+ "grad_norm": 2.483665988923904,
86
+ "learning_rate": 1.8691588785046729e-07,
87
+ "logits/chosen": -2.5130770206451416,
88
+ "logits/rejected": -2.2882275581359863,
89
+ "logps/chosen": -264.2230529785156,
90
+ "logps/rejected": -255.9278106689453,
91
+ "loss": 0.6922,
92
+ "rewards/accuracies": 0.7124999761581421,
93
+ "rewards/chosen": 6.876837142044678e-05,
94
+ "rewards/margins": 0.0020052469335496426,
95
+ "rewards/margins_max": 0.006453676614910364,
96
+ "rewards/margins_min": -0.0016247869934886694,
97
+ "rewards/margins_std": 0.003689850913360715,
98
+ "rewards/rejected": -0.0019364787731319666,
99
+ "step": 40
100
+ },
101
+ {
102
+ "epoch": 0.14,
103
+ "grad_norm": 3.411713863829653,
104
+ "learning_rate": 2.336448598130841e-07,
105
+ "logits/chosen": -2.450375556945801,
106
+ "logits/rejected": -2.2387261390686035,
107
+ "logps/chosen": -256.00738525390625,
108
+ "logps/rejected": -272.00823974609375,
109
+ "loss": 0.691,
110
+ "rewards/accuracies": 0.762499988079071,
111
+ "rewards/chosen": 0.0013086812105029821,
112
+ "rewards/margins": 0.004717600531876087,
113
+ "rewards/margins_max": 0.0112897465005517,
114
+ "rewards/margins_min": -0.0005666814395226538,
115
+ "rewards/margins_std": 0.005381508264690638,
116
+ "rewards/rejected": -0.0034089195542037487,
117
+ "step": 50
118
+ },
119
+ {
120
+ "epoch": 0.17,
121
+ "grad_norm": 3.239077774946647,
122
+ "learning_rate": 2.803738317757009e-07,
123
+ "logits/chosen": -2.4118704795837402,
124
+ "logits/rejected": -2.242102861404419,
125
+ "logps/chosen": -288.9608459472656,
126
+ "logps/rejected": -306.24578857421875,
127
+ "loss": 0.6897,
128
+ "rewards/accuracies": 0.8999999761581421,
129
+ "rewards/chosen": 0.0009396459208801389,
130
+ "rewards/margins": 0.007233618758618832,
131
+ "rewards/margins_max": 0.01480179838836193,
132
+ "rewards/margins_min": 0.00047727013588882983,
133
+ "rewards/margins_std": 0.006457436829805374,
134
+ "rewards/rejected": -0.006293973419815302,
135
+ "step": 60
136
+ },
137
+ {
138
+ "epoch": 0.2,
139
+ "grad_norm": 2.4710687298974907,
140
+ "learning_rate": 3.271028037383177e-07,
141
+ "logits/chosen": -2.4623990058898926,
142
+ "logits/rejected": -2.2512192726135254,
143
+ "logps/chosen": -285.9200134277344,
144
+ "logps/rejected": -274.8953552246094,
145
+ "loss": 0.6884,
146
+ "rewards/accuracies": 0.875,
147
+ "rewards/chosen": 0.0010064051020890474,
148
+ "rewards/margins": 0.008187984116375446,
149
+ "rewards/margins_max": 0.018993791192770004,
150
+ "rewards/margins_min": -0.0002981324214488268,
151
+ "rewards/margins_std": 0.008526557125151157,
152
+ "rewards/rejected": -0.007181578781455755,
153
+ "step": 70
154
+ },
155
+ {
156
+ "epoch": 0.23,
157
+ "grad_norm": 2.72632149666877,
158
+ "learning_rate": 3.7383177570093457e-07,
159
+ "logits/chosen": -2.520550012588501,
160
+ "logits/rejected": -2.3816895484924316,
161
+ "logps/chosen": -301.37335205078125,
162
+ "logps/rejected": -331.9542236328125,
163
+ "loss": 0.6859,
164
+ "rewards/accuracies": 0.9624999761581421,
165
+ "rewards/chosen": 0.002128913998603821,
166
+ "rewards/margins": 0.015372690744698048,
167
+ "rewards/margins_max": 0.03150422126054764,
168
+ "rewards/margins_min": 0.002726194215938449,
169
+ "rewards/margins_std": 0.013175850734114647,
170
+ "rewards/rejected": -0.013243774883449078,
171
+ "step": 80
172
+ },
173
+ {
174
+ "epoch": 0.25,
175
+ "grad_norm": 3.532149748827834,
176
+ "learning_rate": 4.205607476635514e-07,
177
+ "logits/chosen": -2.3819632530212402,
178
+ "logits/rejected": -2.2091143131256104,
179
+ "logps/chosen": -247.6768035888672,
180
+ "logps/rejected": -231.6964111328125,
181
+ "loss": 0.6825,
182
+ "rewards/accuracies": 0.949999988079071,
183
+ "rewards/chosen": 0.003576954361051321,
184
+ "rewards/margins": 0.019739918410778046,
185
+ "rewards/margins_max": 0.043638236820697784,
186
+ "rewards/margins_min": 0.004001608118414879,
187
+ "rewards/margins_std": 0.017934972420334816,
188
+ "rewards/rejected": -0.016162965446710587,
189
+ "step": 90
190
+ },
191
+ {
192
+ "epoch": 0.28,
193
+ "grad_norm": 3.2302225335055166,
194
+ "learning_rate": 4.672897196261682e-07,
195
+ "logits/chosen": -2.382767915725708,
196
+ "logits/rejected": -2.2391555309295654,
197
+ "logps/chosen": -264.28216552734375,
198
+ "logps/rejected": -304.63079833984375,
199
+ "loss": 0.6787,
200
+ "rewards/accuracies": 0.9624999761581421,
201
+ "rewards/chosen": 0.0004217555688228458,
202
+ "rewards/margins": 0.025600453838706017,
203
+ "rewards/margins_max": 0.054918091744184494,
204
+ "rewards/margins_min": 0.0039414153434336185,
205
+ "rewards/margins_std": 0.02377716824412346,
206
+ "rewards/rejected": -0.0251787006855011,
207
+ "step": 100
208
+ },
209
+ {
210
+ "epoch": 0.28,
211
+ "eval_logits/chosen": -2.3056893348693848,
212
+ "eval_logits/rejected": -2.1834657192230225,
213
+ "eval_logps/chosen": -276.8445739746094,
214
+ "eval_logps/rejected": -260.2681884765625,
215
+ "eval_loss": 0.6901603937149048,
216
+ "eval_rewards/accuracies": 0.6050000190734863,
217
+ "eval_rewards/chosen": -0.019638627767562866,
218
+ "eval_rewards/margins": 0.007838762365281582,
219
+ "eval_rewards/margins_max": 0.06576983630657196,
220
+ "eval_rewards/margins_min": -0.04385747015476227,
221
+ "eval_rewards/margins_std": 0.03519851341843605,
222
+ "eval_rewards/rejected": -0.027477389201521873,
223
+ "eval_runtime": 739.5177,
224
+ "eval_samples_per_second": 2.704,
225
+ "eval_steps_per_second": 0.169,
226
+ "step": 100
227
+ },
228
+ {
229
+ "epoch": 0.31,
230
+ "grad_norm": 3.365787507138989,
231
+ "learning_rate": 4.999879018839287e-07,
232
+ "logits/chosen": -2.63311505317688,
233
+ "logits/rejected": -2.3202548027038574,
234
+ "logps/chosen": -347.92962646484375,
235
+ "logps/rejected": -333.94036865234375,
236
+ "loss": 0.671,
237
+ "rewards/accuracies": 0.9624999761581421,
238
+ "rewards/chosen": 0.005012608133256435,
239
+ "rewards/margins": 0.04701961204409599,
240
+ "rewards/margins_max": 0.09378845244646072,
241
+ "rewards/margins_min": 0.00973365269601345,
242
+ "rewards/margins_std": 0.03826902061700821,
243
+ "rewards/rejected": -0.04200700670480728,
244
+ "step": 110
245
+ },
246
+ {
247
+ "epoch": 0.34,
248
+ "grad_norm": 2.879990268796799,
249
+ "learning_rate": 4.997728568369408e-07,
250
+ "logits/chosen": -2.5022997856140137,
251
+ "logits/rejected": -2.3478662967681885,
252
+ "logps/chosen": -323.26910400390625,
253
+ "logps/rejected": -313.161865234375,
254
+ "loss": 0.6677,
255
+ "rewards/accuracies": 0.925000011920929,
256
+ "rewards/chosen": 0.006730080582201481,
257
+ "rewards/margins": 0.05950545519590378,
258
+ "rewards/margins_max": 0.12900672852993011,
259
+ "rewards/margins_min": 0.014946443028748035,
260
+ "rewards/margins_std": 0.05207683518528938,
261
+ "rewards/rejected": -0.05277537554502487,
262
+ "step": 120
263
+ },
264
+ {
265
+ "epoch": 0.37,
266
+ "grad_norm": 2.7102746510422207,
267
+ "learning_rate": 4.992892309373227e-07,
268
+ "logits/chosen": -2.4983277320861816,
269
+ "logits/rejected": -2.272451877593994,
270
+ "logps/chosen": -352.60589599609375,
271
+ "logps/rejected": -349.2779541015625,
272
+ "loss": 0.6585,
273
+ "rewards/accuracies": 1.0,
274
+ "rewards/chosen": 0.008741674944758415,
275
+ "rewards/margins": 0.07447224110364914,
276
+ "rewards/margins_max": 0.14192989468574524,
277
+ "rewards/margins_min": 0.014298012480139732,
278
+ "rewards/margins_std": 0.057725705206394196,
279
+ "rewards/rejected": -0.06573057919740677,
280
+ "step": 130
281
+ },
282
+ {
283
+ "epoch": 0.39,
284
+ "grad_norm": 2.2127681203334464,
285
+ "learning_rate": 4.985375442281968e-07,
286
+ "logits/chosen": -2.4224705696105957,
287
+ "logits/rejected": -2.2639002799987793,
288
+ "logps/chosen": -293.6229248046875,
289
+ "logps/rejected": -315.5215759277344,
290
+ "loss": 0.6574,
291
+ "rewards/accuracies": 0.925000011920929,
292
+ "rewards/chosen": 0.003200127277523279,
293
+ "rewards/margins": 0.08671971410512924,
294
+ "rewards/margins_max": 0.17399398982524872,
295
+ "rewards/margins_min": 0.014883764088153839,
296
+ "rewards/margins_std": 0.0733809694647789,
297
+ "rewards/rejected": -0.0835195854306221,
298
+ "step": 140
299
+ },
300
+ {
301
+ "epoch": 0.42,
302
+ "grad_norm": 2.9890965365572253,
303
+ "learning_rate": 4.975186049985817e-07,
304
+ "logits/chosen": -2.349522829055786,
305
+ "logits/rejected": -2.1850757598876953,
306
+ "logps/chosen": -268.02728271484375,
307
+ "logps/rejected": -252.3187713623047,
308
+ "loss": 0.6457,
309
+ "rewards/accuracies": 0.9624999761581421,
310
+ "rewards/chosen": 0.00209013931453228,
311
+ "rewards/margins": 0.09418609738349915,
312
+ "rewards/margins_max": 0.189139723777771,
313
+ "rewards/margins_min": 0.019670698791742325,
314
+ "rewards/margins_std": 0.07843033969402313,
315
+ "rewards/rejected": -0.09209596365690231,
316
+ "step": 150
317
+ },
318
+ {
319
+ "epoch": 0.45,
320
+ "grad_norm": 2.5940129709794473,
321
+ "learning_rate": 4.962335089142375e-07,
322
+ "logits/chosen": -2.3253729343414307,
323
+ "logits/rejected": -2.1711182594299316,
324
+ "logps/chosen": -270.85052490234375,
325
+ "logps/rejected": -279.93963623046875,
326
+ "loss": 0.639,
327
+ "rewards/accuracies": 0.9750000238418579,
328
+ "rewards/chosen": -0.0029645890463143587,
329
+ "rewards/margins": 0.11249279975891113,
330
+ "rewards/margins_max": 0.23242349922657013,
331
+ "rewards/margins_min": 0.026638973504304886,
332
+ "rewards/margins_std": 0.09201127290725708,
333
+ "rewards/rejected": -0.11545737832784653,
334
+ "step": 160
335
+ },
336
+ {
337
+ "epoch": 0.48,
338
+ "grad_norm": 2.9651475431583085,
339
+ "learning_rate": 4.946836378394966e-07,
340
+ "logits/chosen": -2.4186384677886963,
341
+ "logits/rejected": -2.2767791748046875,
342
+ "logps/chosen": -299.94921875,
343
+ "logps/rejected": -348.6862487792969,
344
+ "loss": 0.63,
345
+ "rewards/accuracies": 0.9750000238418579,
346
+ "rewards/chosen": -0.002911825431510806,
347
+ "rewards/margins": 0.1475684940814972,
348
+ "rewards/margins_max": 0.2743604779243469,
349
+ "rewards/margins_min": 0.03711001202464104,
350
+ "rewards/margins_std": 0.10615728050470352,
351
+ "rewards/rejected": -0.15048031508922577,
352
+ "step": 170
353
+ },
354
+ {
355
+ "epoch": 0.51,
356
+ "grad_norm": 2.8561721958106068,
357
+ "learning_rate": 4.92870658351344e-07,
358
+ "logits/chosen": -2.456223964691162,
359
+ "logits/rejected": -2.226595401763916,
360
+ "logps/chosen": -309.79547119140625,
361
+ "logps/rejected": -350.2745666503906,
362
+ "loss": 0.6219,
363
+ "rewards/accuracies": 0.9750000238418579,
364
+ "rewards/chosen": -0.02767277881503105,
365
+ "rewards/margins": 0.15511782467365265,
366
+ "rewards/margins_max": 0.3061321973800659,
367
+ "rewards/margins_min": 0.04783701151609421,
368
+ "rewards/margins_std": 0.11824627220630646,
369
+ "rewards/rejected": -0.182790607213974,
370
+ "step": 180
371
+ },
372
+ {
373
+ "epoch": 0.54,
374
+ "grad_norm": 3.036470774150159,
375
+ "learning_rate": 4.90796519947347e-07,
376
+ "logits/chosen": -2.3368537425994873,
377
+ "logits/rejected": -2.083339214324951,
378
+ "logps/chosen": -293.1517639160156,
379
+ "logps/rejected": -294.11480712890625,
380
+ "loss": 0.6092,
381
+ "rewards/accuracies": 0.9624999761581421,
382
+ "rewards/chosen": -0.01847015507519245,
383
+ "rewards/margins": 0.18039044737815857,
384
+ "rewards/margins_max": 0.36516961455345154,
385
+ "rewards/margins_min": 0.041585661470890045,
386
+ "rewards/margins_std": 0.14767181873321533,
387
+ "rewards/rejected": -0.19886058568954468,
388
+ "step": 190
389
+ },
390
+ {
391
+ "epoch": 0.56,
392
+ "grad_norm": 3.196285034512815,
393
+ "learning_rate": 4.88463452949359e-07,
394
+ "logits/chosen": -2.472463607788086,
395
+ "logits/rejected": -2.262190103530884,
396
+ "logps/chosen": -323.2473449707031,
397
+ "logps/rejected": -331.390625,
398
+ "loss": 0.6038,
399
+ "rewards/accuracies": 0.9750000238418579,
400
+ "rewards/chosen": -0.0690370723605156,
401
+ "rewards/margins": 0.21737924218177795,
402
+ "rewards/margins_max": 0.4769272804260254,
403
+ "rewards/margins_min": 0.030699292197823524,
404
+ "rewards/margins_std": 0.19959601759910583,
405
+ "rewards/rejected": -0.28641632199287415,
406
+ "step": 200
407
+ },
408
+ {
409
+ "epoch": 0.56,
410
+ "eval_logits/chosen": -2.273627519607544,
411
+ "eval_logits/rejected": -2.1563339233398438,
412
+ "eval_logps/chosen": -296.0924072265625,
413
+ "eval_logps/rejected": -283.1363525390625,
414
+ "eval_loss": 0.6828743815422058,
415
+ "eval_rewards/accuracies": 0.5929999947547913,
416
+ "eval_rewards/chosen": -0.21211715042591095,
417
+ "eval_rewards/margins": 0.04404139891266823,
418
+ "eval_rewards/margins_max": 0.41857707500457764,
419
+ "eval_rewards/margins_min": -0.28833016753196716,
420
+ "eval_rewards/margins_std": 0.2264755815267563,
421
+ "eval_rewards/rejected": -0.2561585307121277,
422
+ "eval_runtime": 740.6676,
423
+ "eval_samples_per_second": 2.7,
424
+ "eval_steps_per_second": 0.169,
425
+ "step": 200
426
+ },
427
+ {
428
+ "epoch": 0.59,
429
+ "grad_norm": 3.02540953835276,
430
+ "learning_rate": 4.858739661052539e-07,
431
+ "logits/chosen": -2.3480424880981445,
432
+ "logits/rejected": -2.227659225463867,
433
+ "logps/chosen": -289.94549560546875,
434
+ "logps/rejected": -328.08551025390625,
435
+ "loss": 0.5879,
436
+ "rewards/accuracies": 0.9125000238418579,
437
+ "rewards/chosen": -0.05972708389163017,
438
+ "rewards/margins": 0.23827257752418518,
439
+ "rewards/margins_max": 0.48445525765419006,
440
+ "rewards/margins_min": 0.0391400083899498,
441
+ "rewards/margins_std": 0.2039383351802826,
442
+ "rewards/rejected": -0.29799962043762207,
443
+ "step": 210
444
+ },
445
+ {
446
+ "epoch": 0.62,
447
+ "grad_norm": 2.7276849485746,
448
+ "learning_rate": 4.830308438912687e-07,
449
+ "logits/chosen": -2.473395824432373,
450
+ "logits/rejected": -2.196845531463623,
451
+ "logps/chosen": -333.0096435546875,
452
+ "logps/rejected": -351.48321533203125,
453
+ "loss": 0.5788,
454
+ "rewards/accuracies": 0.9624999761581421,
455
+ "rewards/chosen": -0.09599296748638153,
456
+ "rewards/margins": 0.2686784863471985,
457
+ "rewards/margins_max": 0.5082738995552063,
458
+ "rewards/margins_min": 0.044809646904468536,
459
+ "rewards/margins_std": 0.2064463347196579,
460
+ "rewards/rejected": -0.3646714687347412,
461
+ "step": 220
462
+ },
463
+ {
464
+ "epoch": 0.65,
465
+ "grad_norm": 2.8196760394643383,
466
+ "learning_rate": 4.799371435178545e-07,
467
+ "logits/chosen": -2.5183844566345215,
468
+ "logits/rejected": -2.2554872035980225,
469
+ "logps/chosen": -356.23358154296875,
470
+ "logps/rejected": -340.7899169921875,
471
+ "loss": 0.5631,
472
+ "rewards/accuracies": 0.949999988079071,
473
+ "rewards/chosen": -0.11530466377735138,
474
+ "rewards/margins": 0.29081040620803833,
475
+ "rewards/margins_max": 0.5872799754142761,
476
+ "rewards/margins_min": 0.0643739253282547,
477
+ "rewards/margins_std": 0.23660854995250702,
478
+ "rewards/rejected": -0.4061151146888733,
479
+ "step": 230
480
+ },
481
+ {
482
+ "epoch": 0.68,
483
+ "grad_norm": 2.402815708848366,
484
+ "learning_rate": 4.765961916422574e-07,
485
+ "logits/chosen": -2.409052610397339,
486
+ "logits/rejected": -2.2241270542144775,
487
+ "logps/chosen": -325.99786376953125,
488
+ "logps/rejected": -373.8414611816406,
489
+ "loss": 0.5776,
490
+ "rewards/accuracies": 0.949999988079071,
491
+ "rewards/chosen": -0.1929856687784195,
492
+ "rewards/margins": 0.2687362730503082,
493
+ "rewards/margins_max": 0.5277136564254761,
494
+ "rewards/margins_min": 0.040412187576293945,
495
+ "rewards/margins_std": 0.22582125663757324,
496
+ "rewards/rejected": -0.46172189712524414,
497
+ "step": 240
498
+ },
499
+ {
500
+ "epoch": 0.7,
501
+ "grad_norm": 2.9799913096461217,
502
+ "learning_rate": 4.730115807913626e-07,
503
+ "logits/chosen": -2.446721076965332,
504
+ "logits/rejected": -2.2028250694274902,
505
+ "logps/chosen": -303.15399169921875,
506
+ "logps/rejected": -335.6115417480469,
507
+ "loss": 0.5469,
508
+ "rewards/accuracies": 0.925000011920929,
509
+ "rewards/chosen": -0.16406993567943573,
510
+ "rewards/margins": 0.3364391326904297,
511
+ "rewards/margins_max": 0.7914501428604126,
512
+ "rewards/margins_min": 0.05388214439153671,
513
+ "rewards/margins_std": 0.33590734004974365,
514
+ "rewards/rejected": -0.5005090832710266,
515
+ "step": 250
516
+ },
517
+ {
518
+ "epoch": 0.73,
519
+ "grad_norm": 2.770663569324122,
520
+ "learning_rate": 4.691871654986485e-07,
521
+ "logits/chosen": -2.3937630653381348,
522
+ "logits/rejected": -2.21606183052063,
523
+ "logps/chosen": -288.9862060546875,
524
+ "logps/rejected": -352.5678405761719,
525
+ "loss": 0.536,
526
+ "rewards/accuracies": 0.925000011920929,
527
+ "rewards/chosen": -0.2037801295518875,
528
+ "rewards/margins": 0.34631386399269104,
529
+ "rewards/margins_max": 0.792448103427887,
530
+ "rewards/margins_min": 0.02828853949904442,
531
+ "rewards/margins_std": 0.3575611412525177,
532
+ "rewards/rejected": -0.5500940084457397,
533
+ "step": 260
534
+ },
535
+ {
536
+ "epoch": 0.76,
537
+ "grad_norm": 3.098378120195792,
538
+ "learning_rate": 4.6512705815940536e-07,
539
+ "logits/chosen": -2.3696112632751465,
540
+ "logits/rejected": -2.18962025642395,
541
+ "logps/chosen": -306.8760070800781,
542
+ "logps/rejected": -372.0642395019531,
543
+ "loss": 0.5394,
544
+ "rewards/accuracies": 0.887499988079071,
545
+ "rewards/chosen": -0.2417019158601761,
546
+ "rewards/margins": 0.41768041253089905,
547
+ "rewards/margins_max": 0.8514153361320496,
548
+ "rewards/margins_min": 0.05087714642286301,
549
+ "rewards/margins_std": 0.3643653392791748,
550
+ "rewards/rejected": -0.6593823432922363,
551
+ "step": 270
552
+ },
553
+ {
554
+ "epoch": 0.79,
555
+ "grad_norm": 2.966546455548068,
556
+ "learning_rate": 4.6083562460867544e-07,
557
+ "logits/chosen": -2.3208565711975098,
558
+ "logits/rejected": -2.2000744342803955,
559
+ "logps/chosen": -313.9183349609375,
560
+ "logps/rejected": -385.6065368652344,
561
+ "loss": 0.5172,
562
+ "rewards/accuracies": 0.925000011920929,
563
+ "rewards/chosen": -0.2830614447593689,
564
+ "rewards/margins": 0.4400337338447571,
565
+ "rewards/margins_max": 0.9521900415420532,
566
+ "rewards/margins_min": 0.09125441312789917,
567
+ "rewards/margins_std": 0.40178269147872925,
568
+ "rewards/rejected": -0.7230951189994812,
569
+ "step": 280
570
+ },
571
+ {
572
+ "epoch": 0.82,
573
+ "grad_norm": 3.1108063896750258,
574
+ "learning_rate": 4.563174794266683e-07,
575
+ "logits/chosen": -2.305835008621216,
576
+ "logits/rejected": -2.194869041442871,
577
+ "logps/chosen": -294.03326416015625,
578
+ "logps/rejected": -368.85650634765625,
579
+ "loss": 0.5334,
580
+ "rewards/accuracies": 0.875,
581
+ "rewards/chosen": -0.28016966581344604,
582
+ "rewards/margins": 0.4169536232948303,
583
+ "rewards/margins_max": 0.9842053651809692,
584
+ "rewards/margins_min": 0.05335196107625961,
585
+ "rewards/margins_std": 0.42577672004699707,
586
+ "rewards/rejected": -0.6971233487129211,
587
+ "step": 290
588
+ },
589
+ {
590
+ "epoch": 0.85,
591
+ "grad_norm": 3.4198160465582594,
592
+ "learning_rate": 4.515774809767012e-07,
593
+ "logits/chosen": -2.4141030311584473,
594
+ "logits/rejected": -2.219494581222534,
595
+ "logps/chosen": -346.39154052734375,
596
+ "logps/rejected": -413.7733459472656,
597
+ "loss": 0.4746,
598
+ "rewards/accuracies": 0.949999988079071,
599
+ "rewards/chosen": -0.3215632438659668,
600
+ "rewards/margins": 0.559494137763977,
601
+ "rewards/margins_max": 1.2075681686401367,
602
+ "rewards/margins_min": 0.1119498759508133,
603
+ "rewards/margins_std": 0.4947145879268646,
604
+ "rewards/rejected": -0.8810573816299438,
605
+ "step": 300
606
+ },
607
+ {
608
+ "epoch": 0.85,
609
+ "eval_logits/chosen": -2.1990644931793213,
610
+ "eval_logits/rejected": -2.090372085571289,
611
+ "eval_logps/chosen": -352.614013671875,
612
+ "eval_logps/rejected": -342.9794921875,
613
+ "eval_loss": 0.7105380892753601,
614
+ "eval_rewards/accuracies": 0.5659999847412109,
615
+ "eval_rewards/chosen": -0.7773330211639404,
616
+ "eval_rewards/margins": 0.07725735753774643,
617
+ "eval_rewards/margins_max": 1.040091872215271,
618
+ "eval_rewards/margins_min": -0.8433572053909302,
619
+ "eval_rewards/margins_std": 0.6093021035194397,
620
+ "eval_rewards/rejected": -0.8545902967453003,
621
+ "eval_runtime": 741.0262,
622
+ "eval_samples_per_second": 2.699,
623
+ "eval_steps_per_second": 0.169,
624
+ "step": 300
625
+ },
626
+ {
627
+ "epoch": 0.87,
628
+ "grad_norm": 2.5169015808444857,
629
+ "learning_rate": 4.4662072618099887e-07,
630
+ "logits/chosen": -2.445680856704712,
631
+ "logits/rejected": -2.1884543895721436,
632
+ "logps/chosen": -313.87322998046875,
633
+ "logps/rejected": -332.14422607421875,
634
+ "loss": 0.5121,
635
+ "rewards/accuracies": 0.887499988079071,
636
+ "rewards/chosen": -0.4068140387535095,
637
+ "rewards/margins": 0.4162236154079437,
638
+ "rewards/margins_max": 0.9356614947319031,
639
+ "rewards/margins_min": 0.056422315537929535,
640
+ "rewards/margins_std": 0.3940203785896301,
641
+ "rewards/rejected": -0.8230376243591309,
642
+ "step": 310
643
+ },
644
+ {
645
+ "epoch": 0.9,
646
+ "grad_norm": 3.3837390362012707,
647
+ "learning_rate": 4.414525450399712e-07,
648
+ "logits/chosen": -2.325852632522583,
649
+ "logits/rejected": -2.133544683456421,
650
+ "logps/chosen": -339.6539611816406,
651
+ "logps/rejected": -396.7124328613281,
652
+ "loss": 0.4964,
653
+ "rewards/accuracies": 0.875,
654
+ "rewards/chosen": -0.48702773451805115,
655
+ "rewards/margins": 0.5068559050559998,
656
+ "rewards/margins_max": 1.1567133665084839,
657
+ "rewards/margins_min": -0.029241245239973068,
658
+ "rewards/margins_std": 0.5536248683929443,
659
+ "rewards/rejected": -0.9938837289810181,
660
+ "step": 320
661
+ },
662
+ {
663
+ "epoch": 0.93,
664
+ "grad_norm": 3.325411841334806,
665
+ "learning_rate": 4.360784949008615e-07,
666
+ "logits/chosen": -2.4802184104919434,
667
+ "logits/rejected": -2.291903018951416,
668
+ "logps/chosen": -379.33331298828125,
669
+ "logps/rejected": -424.1192932128906,
670
+ "loss": 0.4802,
671
+ "rewards/accuracies": 0.987500011920929,
672
+ "rewards/chosen": -0.49472981691360474,
673
+ "rewards/margins": 0.692683756351471,
674
+ "rewards/margins_max": 1.5269715785980225,
675
+ "rewards/margins_min": 0.13377800583839417,
676
+ "rewards/margins_std": 0.6406728029251099,
677
+ "rewards/rejected": -1.1874135732650757,
678
+ "step": 330
679
+ },
680
+ {
681
+ "epoch": 0.96,
682
+ "grad_norm": 3.1486731504941217,
683
+ "learning_rate": 4.305043544819289e-07,
684
+ "logits/chosen": -2.2821261882781982,
685
+ "logits/rejected": -2.1466071605682373,
686
+ "logps/chosen": -322.30230712890625,
687
+ "logps/rejected": -388.6499328613281,
688
+ "loss": 0.4944,
689
+ "rewards/accuracies": 0.8999999761581421,
690
+ "rewards/chosen": -0.4555594027042389,
691
+ "rewards/margins": 0.5605371594429016,
692
+ "rewards/margins_max": 1.1739370822906494,
693
+ "rewards/margins_min": 0.10414503514766693,
694
+ "rewards/margins_std": 0.4933851659297943,
695
+ "rewards/rejected": -1.016096591949463,
696
+ "step": 340
697
+ },
698
+ {
699
+ "epoch": 0.99,
700
+ "grad_norm": 3.4872610687117387,
701
+ "learning_rate": 4.247361176585903e-07,
702
+ "logits/chosen": -2.2431674003601074,
703
+ "logits/rejected": -2.1132471561431885,
704
+ "logps/chosen": -330.2970886230469,
705
+ "logps/rejected": -435.4002990722656,
706
+ "loss": 0.4735,
707
+ "rewards/accuracies": 0.8999999761581421,
708
+ "rewards/chosen": -0.48258066177368164,
709
+ "rewards/margins": 0.6382626295089722,
710
+ "rewards/margins_max": 1.3624789714813232,
711
+ "rewards/margins_min": 0.10766267776489258,
712
+ "rewards/margins_std": 0.5704151391983032,
713
+ "rewards/rejected": -1.1208432912826538,
714
+ "step": 350
715
+ },
716
+ {
717
+ "epoch": 1.01,
718
+ "grad_norm": 3.207929181161526,
719
+ "learning_rate": 4.187799870182038e-07,
720
+ "logits/chosen": -2.299013376235962,
721
+ "logits/rejected": -2.159662961959839,
722
+ "logps/chosen": -329.9014892578125,
723
+ "logps/rejected": -411.421875,
724
+ "loss": 0.4418,
725
+ "rewards/accuracies": 0.8999999761581421,
726
+ "rewards/chosen": -0.49954190850257874,
727
+ "rewards/margins": 0.6922783255577087,
728
+ "rewards/margins_max": 1.497651219367981,
729
+ "rewards/margins_min": 0.14925283193588257,
730
+ "rewards/margins_std": 0.6044853925704956,
731
+ "rewards/rejected": -1.1918203830718994,
732
+ "step": 360
733
+ },
734
+ {
735
+ "epoch": 1.04,
736
+ "grad_norm": 4.274237019230171,
737
+ "learning_rate": 4.126423671904236e-07,
738
+ "logits/chosen": -2.34145188331604,
739
+ "logits/rejected": -2.116023063659668,
740
+ "logps/chosen": -384.4696960449219,
741
+ "logps/rejected": -413.1422424316406,
742
+ "loss": 0.4731,
743
+ "rewards/accuracies": 0.887499988079071,
744
+ "rewards/chosen": -0.616447925567627,
745
+ "rewards/margins": 0.7246918678283691,
746
+ "rewards/margins_max": 1.6939563751220703,
747
+ "rewards/margins_min": -0.03321857377886772,
748
+ "rewards/margins_std": 0.7812119722366333,
749
+ "rewards/rejected": -1.341139793395996,
750
+ "step": 370
751
+ },
752
+ {
753
+ "epoch": 1.07,
754
+ "grad_norm": 3.3134994438588827,
755
+ "learning_rate": 4.0632985796030007e-07,
756
+ "logits/chosen": -2.20294451713562,
757
+ "logits/rejected": -2.036015033721924,
758
+ "logps/chosen": -314.45513916015625,
759
+ "logps/rejected": -395.52667236328125,
760
+ "loss": 0.4449,
761
+ "rewards/accuracies": 0.925000011920929,
762
+ "rewards/chosen": -0.549362063407898,
763
+ "rewards/margins": 0.6994296312332153,
764
+ "rewards/margins_max": 1.5058152675628662,
765
+ "rewards/margins_min": 0.12678512930870056,
766
+ "rewards/margins_std": 0.6280149817466736,
767
+ "rewards/rejected": -1.2487916946411133,
768
+ "step": 380
769
+ },
770
+ {
771
+ "epoch": 1.1,
772
+ "grad_norm": 2.772777984001906,
773
+ "learning_rate": 3.9984924717152713e-07,
774
+ "logits/chosen": -2.216956853866577,
775
+ "logits/rejected": -2.116546154022217,
776
+ "logps/chosen": -351.6098937988281,
777
+ "logps/rejected": -451.20562744140625,
778
+ "loss": 0.4402,
779
+ "rewards/accuracies": 0.9125000238418579,
780
+ "rewards/chosen": -0.7173673510551453,
781
+ "rewards/margins": 0.8140894174575806,
782
+ "rewards/margins_max": 1.774478554725647,
783
+ "rewards/margins_min": 0.1617562621831894,
784
+ "rewards/margins_std": 0.7393523454666138,
785
+ "rewards/rejected": -1.531456708908081,
786
+ "step": 390
787
+ },
788
+ {
789
+ "epoch": 1.13,
790
+ "grad_norm": 3.7243228476083776,
791
+ "learning_rate": 3.932075034274723e-07,
792
+ "logits/chosen": -2.2378880977630615,
793
+ "logits/rejected": -2.064462184906006,
794
+ "logps/chosen": -347.496337890625,
795
+ "logps/rejected": -426.29315185546875,
796
+ "loss": 0.4288,
797
+ "rewards/accuracies": 0.875,
798
+ "rewards/chosen": -0.7525204420089722,
799
+ "rewards/margins": 0.7026923894882202,
800
+ "rewards/margins_max": 1.5236282348632812,
801
+ "rewards/margins_min": 0.09952196478843689,
802
+ "rewards/margins_std": 0.647875189781189,
803
+ "rewards/rejected": -1.4552127122879028,
804
+ "step": 400
805
+ },
806
+ {
807
+ "epoch": 1.13,
808
+ "eval_logits/chosen": -2.1253983974456787,
809
+ "eval_logits/rejected": -2.0236704349517822,
810
+ "eval_logps/chosen": -409.9260559082031,
811
+ "eval_logps/rejected": -405.01422119140625,
812
+ "eval_loss": 0.7566000819206238,
813
+ "eval_rewards/accuracies": 0.5699999928474426,
814
+ "eval_rewards/chosen": -1.350453495979309,
815
+ "eval_rewards/margins": 0.12448413670063019,
816
+ "eval_rewards/margins_max": 1.6613168716430664,
817
+ "eval_rewards/margins_min": -1.3515390157699585,
818
+ "eval_rewards/margins_std": 0.9884259700775146,
819
+ "eval_rewards/rejected": -1.4749376773834229,
820
+ "eval_runtime": 740.8805,
821
+ "eval_samples_per_second": 2.699,
822
+ "eval_steps_per_second": 0.169,
823
+ "step": 400
824
+ },
825
+ {
826
+ "epoch": 1.15,
827
+ "grad_norm": 3.709048912929159,
828
+ "learning_rate": 3.8641176859783383e-07,
829
+ "logits/chosen": -2.263033151626587,
830
+ "logits/rejected": -2.110279083251953,
831
+ "logps/chosen": -360.94451904296875,
832
+ "logps/rejected": -447.3453674316406,
833
+ "loss": 0.446,
834
+ "rewards/accuracies": 0.887499988079071,
835
+ "rewards/chosen": -0.8266700506210327,
836
+ "rewards/margins": 0.7980839014053345,
837
+ "rewards/margins_max": 1.9258846044540405,
838
+ "rewards/margins_min": 0.028872152790427208,
839
+ "rewards/margins_std": 0.8716685175895691,
840
+ "rewards/rejected": -1.6247539520263672,
841
+ "step": 410
842
+ },
843
+ {
844
+ "epoch": 1.18,
845
+ "grad_norm": 3.1370524113769718,
846
+ "learning_rate": 3.7946935013898606e-07,
847
+ "logits/chosen": -2.22702693939209,
848
+ "logits/rejected": -2.058694839477539,
849
+ "logps/chosen": -378.8398742675781,
850
+ "logps/rejected": -486.36895751953125,
851
+ "loss": 0.4121,
852
+ "rewards/accuracies": 0.925000011920929,
853
+ "rewards/chosen": -0.867613673210144,
854
+ "rewards/margins": 0.9479442834854126,
855
+ "rewards/margins_max": 1.9218909740447998,
856
+ "rewards/margins_min": 0.206168532371521,
857
+ "rewards/margins_std": 0.7779111862182617,
858
+ "rewards/rejected": -1.815557837486267,
859
+ "step": 420
860
+ },
861
+ {
862
+ "epoch": 1.21,
863
+ "grad_norm": 5.732961537629863,
864
+ "learning_rate": 3.7238771323626817e-07,
865
+ "logits/chosen": -2.271839141845703,
866
+ "logits/rejected": -2.0797672271728516,
867
+ "logps/chosen": -428.86376953125,
868
+ "logps/rejected": -498.2832946777344,
869
+ "loss": 0.4013,
870
+ "rewards/accuracies": 0.824999988079071,
871
+ "rewards/chosen": -1.0997209548950195,
872
+ "rewards/margins": 0.86876380443573,
873
+ "rewards/margins_max": 1.956595778465271,
874
+ "rewards/margins_min": -0.09084595739841461,
875
+ "rewards/margins_std": 0.9087589383125305,
876
+ "rewards/rejected": -1.96848464012146,
877
+ "step": 430
878
+ },
879
+ {
880
+ "epoch": 1.24,
881
+ "grad_norm": 3.2379293031944587,
882
+ "learning_rate": 3.651744727766676e-07,
883
+ "logits/chosen": -2.2412235736846924,
884
+ "logits/rejected": -2.06864333152771,
885
+ "logps/chosen": -367.8034362792969,
886
+ "logps/rejected": -472.5411071777344,
887
+ "loss": 0.4283,
888
+ "rewards/accuracies": 0.875,
889
+ "rewards/chosen": -0.9834961891174316,
890
+ "rewards/margins": 0.9132775068283081,
891
+ "rewards/margins_max": 1.9850295782089233,
892
+ "rewards/margins_min": 0.06963314116001129,
893
+ "rewards/margins_std": 0.8529081344604492,
894
+ "rewards/rejected": -1.8967736959457397,
895
+ "step": 440
896
+ },
897
+ {
898
+ "epoch": 1.27,
899
+ "grad_norm": 3.679847665204543,
900
+ "learning_rate": 3.5783738516052897e-07,
901
+ "logits/chosen": -2.1635823249816895,
902
+ "logits/rejected": -2.008791208267212,
903
+ "logps/chosen": -370.2854919433594,
904
+ "logps/rejected": -473.046630859375,
905
+ "loss": 0.3927,
906
+ "rewards/accuracies": 0.9125000238418579,
907
+ "rewards/chosen": -0.8544079065322876,
908
+ "rewards/margins": 0.9354113340377808,
909
+ "rewards/margins_max": 2.0640413761138916,
910
+ "rewards/margins_min": 0.09445329755544662,
911
+ "rewards/margins_std": 0.8850519061088562,
912
+ "rewards/rejected": -1.789819359779358,
913
+ "step": 450
914
+ },
915
+ {
916
+ "epoch": 1.3,
917
+ "grad_norm": 3.9802612609991503,
918
+ "learning_rate": 3.5038433996109404e-07,
919
+ "logits/chosen": -2.3022890090942383,
920
+ "logits/rejected": -2.148092269897461,
921
+ "logps/chosen": -420.42999267578125,
922
+ "logps/rejected": -540.9712524414062,
923
+ "loss": 0.379,
924
+ "rewards/accuracies": 0.925000011920929,
925
+ "rewards/chosen": -0.9502747654914856,
926
+ "rewards/margins": 1.1226153373718262,
927
+ "rewards/margins_max": 2.0989441871643066,
928
+ "rewards/margins_min": 0.2553909122943878,
929
+ "rewards/margins_std": 0.8486478924751282,
930
+ "rewards/rejected": -2.072890281677246,
931
+ "step": 460
932
+ },
933
+ {
934
+ "epoch": 1.32,
935
+ "grad_norm": 3.153480145327942,
936
+ "learning_rate": 3.428233514408398e-07,
937
+ "logits/chosen": -2.232956647872925,
938
+ "logits/rejected": -2.0882651805877686,
939
+ "logps/chosen": -383.5642395019531,
940
+ "logps/rejected": -496.1742248535156,
941
+ "loss": 0.3923,
942
+ "rewards/accuracies": 0.9375,
943
+ "rewards/chosen": -0.9364080429077148,
944
+ "rewards/margins": 1.1441760063171387,
945
+ "rewards/margins_max": 2.6022720336914062,
946
+ "rewards/margins_min": 0.20014441013336182,
947
+ "rewards/margins_std": 1.0920383930206299,
948
+ "rewards/rejected": -2.0805840492248535,
949
+ "step": 470
950
+ },
951
+ {
952
+ "epoch": 1.35,
953
+ "grad_norm": 3.401868329298301,
954
+ "learning_rate": 3.3516254993373945e-07,
955
+ "logits/chosen": -2.2222964763641357,
956
+ "logits/rejected": -2.030994176864624,
957
+ "logps/chosen": -403.7270812988281,
958
+ "logps/rejected": -489.4800720214844,
959
+ "loss": 0.4117,
960
+ "rewards/accuracies": 0.8999999761581421,
961
+ "rewards/chosen": -1.032002568244934,
962
+ "rewards/margins": 1.0399589538574219,
963
+ "rewards/margins_max": 2.25704026222229,
964
+ "rewards/margins_min": 0.06191538646817207,
965
+ "rewards/margins_std": 0.9929068684577942,
966
+ "rewards/rejected": -2.0719614028930664,
967
+ "step": 480
968
+ },
969
+ {
970
+ "epoch": 1.38,
971
+ "grad_norm": 3.3964950447287845,
972
+ "learning_rate": 3.274101731027105e-07,
973
+ "logits/chosen": -2.2032177448272705,
974
+ "logits/rejected": -2.071004867553711,
975
+ "logps/chosen": -400.43402099609375,
976
+ "logps/rejected": -543.1834106445312,
977
+ "loss": 0.3817,
978
+ "rewards/accuracies": 0.875,
979
+ "rewards/chosen": -0.9758344888687134,
980
+ "rewards/margins": 1.1685841083526611,
981
+ "rewards/margins_max": 2.530055046081543,
982
+ "rewards/margins_min": 0.010178548283874989,
983
+ "rewards/margins_std": 1.1102135181427002,
984
+ "rewards/rejected": -2.144418716430664,
985
+ "step": 490
986
+ },
987
+ {
988
+ "epoch": 1.41,
989
+ "grad_norm": 3.647023353729911,
990
+ "learning_rate": 3.1957455708165314e-07,
991
+ "logits/chosen": -2.1546573638916016,
992
+ "logits/rejected": -2.0752437114715576,
993
+ "logps/chosen": -373.78045654296875,
994
+ "logps/rejected": -493.3614807128906,
995
+ "loss": 0.3807,
996
+ "rewards/accuracies": 0.887499988079071,
997
+ "rewards/chosen": -1.0127661228179932,
998
+ "rewards/margins": 1.0738728046417236,
999
+ "rewards/margins_max": 2.1911089420318604,
1000
+ "rewards/margins_min": 0.05973333120346069,
1001
+ "rewards/margins_std": 0.9444160461425781,
1002
+ "rewards/rejected": -2.086638927459717,
1003
+ "step": 500
1004
+ },
1005
+ {
1006
+ "epoch": 1.41,
1007
+ "eval_logits/chosen": -2.0583739280700684,
1008
+ "eval_logits/rejected": -1.9637471437454224,
1009
+ "eval_logps/chosen": -451.7817077636719,
1010
+ "eval_logps/rejected": -455.107666015625,
1011
+ "eval_loss": 0.7769566178321838,
1012
+ "eval_rewards/accuracies": 0.5759999752044678,
1013
+ "eval_rewards/chosen": -1.769010305404663,
1014
+ "eval_rewards/margins": 0.20686142146587372,
1015
+ "eval_rewards/margins_max": 2.146589994430542,
1016
+ "eval_rewards/margins_min": -1.6287314891815186,
1017
+ "eval_rewards/margins_std": 1.2536591291427612,
1018
+ "eval_rewards/rejected": -1.9758716821670532,
1019
+ "eval_runtime": 741.3629,
1020
+ "eval_samples_per_second": 2.698,
1021
+ "eval_steps_per_second": 0.169,
1022
+ "step": 500
1023
+ },
1024
+ {
1025
+ "epoch": 1.44,
1026
+ "grad_norm": 4.75515226711996,
1027
+ "learning_rate": 3.116641275116018e-07,
1028
+ "logits/chosen": -2.1037933826446533,
1029
+ "logits/rejected": -1.8988920450210571,
1030
+ "logps/chosen": -431.19134521484375,
1031
+ "logps/rejected": -524.6165161132812,
1032
+ "loss": 0.4047,
1033
+ "rewards/accuracies": 0.887499988079071,
1034
+ "rewards/chosen": -1.1405599117279053,
1035
+ "rewards/margins": 1.1359269618988037,
1036
+ "rewards/margins_max": 2.8867573738098145,
1037
+ "rewards/margins_min": 0.03959502652287483,
1038
+ "rewards/margins_std": 1.2942934036254883,
1039
+ "rewards/rejected": -2.276486873626709,
1040
+ "step": 510
1041
+ },
1042
+ {
1043
+ "epoch": 1.46,
1044
+ "grad_norm": 3.8133006271435814,
1045
+ "learning_rate": 3.036873904806295e-07,
1046
+ "logits/chosen": -2.175333261489868,
1047
+ "logits/rejected": -1.971325159072876,
1048
+ "logps/chosen": -386.5411682128906,
1049
+ "logps/rejected": -497.42767333984375,
1050
+ "loss": 0.3576,
1051
+ "rewards/accuracies": 0.9125000238418579,
1052
+ "rewards/chosen": -1.1177667379379272,
1053
+ "rewards/margins": 1.1489231586456299,
1054
+ "rewards/margins_max": 2.5013482570648193,
1055
+ "rewards/margins_min": 0.12063050270080566,
1056
+ "rewards/margins_std": 1.0784401893615723,
1057
+ "rewards/rejected": -2.2666897773742676,
1058
+ "step": 520
1059
+ },
1060
+ {
1061
+ "epoch": 1.49,
1062
+ "grad_norm": 4.402083137874476,
1063
+ "learning_rate": 2.956529233772492e-07,
1064
+ "logits/chosen": -2.164149284362793,
1065
+ "logits/rejected": -2.0270886421203613,
1066
+ "logps/chosen": -445.92962646484375,
1067
+ "logps/rejected": -623.1121826171875,
1068
+ "loss": 0.3375,
1069
+ "rewards/accuracies": 0.9125000238418579,
1070
+ "rewards/chosen": -1.3388593196868896,
1071
+ "rewards/margins": 1.5283788442611694,
1072
+ "rewards/margins_max": 3.114734649658203,
1073
+ "rewards/margins_min": 0.2940402626991272,
1074
+ "rewards/margins_std": 1.2590672969818115,
1075
+ "rewards/rejected": -2.8672380447387695,
1076
+ "step": 530
1077
+ },
1078
+ {
1079
+ "epoch": 1.52,
1080
+ "grad_norm": 5.278491140419052,
1081
+ "learning_rate": 2.875693656671431e-07,
1082
+ "logits/chosen": -2.153266191482544,
1083
+ "logits/rejected": -1.9573100805282593,
1084
+ "logps/chosen": -465.7792053222656,
1085
+ "logps/rejected": -541.7371826171875,
1086
+ "loss": 0.3631,
1087
+ "rewards/accuracies": 0.875,
1088
+ "rewards/chosen": -1.4917871952056885,
1089
+ "rewards/margins": 1.1456191539764404,
1090
+ "rewards/margins_max": 2.534468173980713,
1091
+ "rewards/margins_min": 0.050025321543216705,
1092
+ "rewards/margins_std": 1.114137887954712,
1093
+ "rewards/rejected": -2.637406349182129,
1094
+ "step": 540
1095
+ },
1096
+ {
1097
+ "epoch": 1.55,
1098
+ "grad_norm": 2.831099826627899,
1099
+ "learning_rate": 2.794454096031429e-07,
1100
+ "logits/chosen": -2.1867547035217285,
1101
+ "logits/rejected": -2.0559072494506836,
1102
+ "logps/chosen": -509.72802734375,
1103
+ "logps/rejected": -703.0756225585938,
1104
+ "loss": 0.3108,
1105
+ "rewards/accuracies": 0.9375,
1106
+ "rewards/chosen": -1.6035360097885132,
1107
+ "rewards/margins": 1.853695273399353,
1108
+ "rewards/margins_max": 4.070513725280762,
1109
+ "rewards/margins_min": 0.37196239829063416,
1110
+ "rewards/margins_std": 1.6610209941864014,
1111
+ "rewards/rejected": -3.457231044769287,
1112
+ "step": 550
1113
+ },
1114
+ {
1115
+ "epoch": 1.58,
1116
+ "grad_norm": 4.413758285977071,
1117
+ "learning_rate": 2.7128979087844593e-07,
1118
+ "logits/chosen": -2.15148663520813,
1119
+ "logits/rejected": -1.9961950778961182,
1120
+ "logps/chosen": -422.4507751464844,
1121
+ "logps/rejected": -608.5535888671875,
1122
+ "loss": 0.3074,
1123
+ "rewards/accuracies": 0.8999999761581421,
1124
+ "rewards/chosen": -1.422060251235962,
1125
+ "rewards/margins": 1.6324869394302368,
1126
+ "rewards/margins_max": 3.5064094066619873,
1127
+ "rewards/margins_min": 0.22324328124523163,
1128
+ "rewards/margins_std": 1.5147463083267212,
1129
+ "rewards/rejected": -3.054547071456909,
1130
+ "step": 560
1131
+ },
1132
+ {
1133
+ "epoch": 1.61,
1134
+ "grad_norm": 4.771842727601967,
1135
+ "learning_rate": 2.6311127923312153e-07,
1136
+ "logits/chosen": -2.114218235015869,
1137
+ "logits/rejected": -1.990100622177124,
1138
+ "logps/chosen": -478.4190979003906,
1139
+ "logps/rejected": -668.0441284179688,
1140
+ "loss": 0.3081,
1141
+ "rewards/accuracies": 0.862500011920929,
1142
+ "rewards/chosen": -1.6441261768341064,
1143
+ "rewards/margins": 1.6914678812026978,
1144
+ "rewards/margins_max": 3.585777759552002,
1145
+ "rewards/margins_min": 0.0474582239985466,
1146
+ "rewards/margins_std": 1.607506513595581,
1147
+ "rewards/rejected": -3.3355937004089355,
1148
+ "step": 570
1149
+ },
1150
+ {
1151
+ "epoch": 1.63,
1152
+ "grad_norm": 4.718826798578273,
1153
+ "learning_rate": 2.5491866902400565e-07,
1154
+ "logits/chosen": -2.0991835594177246,
1155
+ "logits/rejected": -2.031989336013794,
1156
+ "logps/chosen": -415.6539611816406,
1157
+ "logps/rejected": -625.79443359375,
1158
+ "loss": 0.3326,
1159
+ "rewards/accuracies": 0.9125000238418579,
1160
+ "rewards/chosen": -1.5325905084609985,
1161
+ "rewards/margins": 1.6610767841339111,
1162
+ "rewards/margins_max": 3.7243950366973877,
1163
+ "rewards/margins_min": 0.19340097904205322,
1164
+ "rewards/margins_std": 1.592491865158081,
1165
+ "rewards/rejected": -3.19366717338562,
1166
+ "step": 580
1167
+ },
1168
+ {
1169
+ "epoch": 1.66,
1170
+ "grad_norm": 6.197013861888915,
1171
+ "learning_rate": 2.4672076976812543e-07,
1172
+ "logits/chosen": -2.07092022895813,
1173
+ "logits/rejected": -1.9514614343643188,
1174
+ "logps/chosen": -456.1803283691406,
1175
+ "logps/rejected": -636.2240600585938,
1176
+ "loss": 0.3376,
1177
+ "rewards/accuracies": 0.8374999761581421,
1178
+ "rewards/chosen": -1.704035758972168,
1179
+ "rewards/margins": 1.6240657567977905,
1180
+ "rewards/margins_max": 3.5553812980651855,
1181
+ "rewards/margins_min": 0.13123703002929688,
1182
+ "rewards/margins_std": 1.570482850074768,
1183
+ "rewards/rejected": -3.328101396560669,
1184
+ "step": 590
1185
+ },
1186
+ {
1187
+ "epoch": 1.69,
1188
+ "grad_norm": 4.544446788394471,
1189
+ "learning_rate": 2.385263966698222e-07,
1190
+ "logits/chosen": -2.005707263946533,
1191
+ "logits/rejected": -1.8865950107574463,
1192
+ "logps/chosen": -431.6078186035156,
1193
+ "logps/rejected": -682.1323852539062,
1194
+ "loss": 0.3449,
1195
+ "rewards/accuracies": 0.887499988079071,
1196
+ "rewards/chosen": -1.613631248474121,
1197
+ "rewards/margins": 1.895965576171875,
1198
+ "rewards/margins_max": 4.0293354988098145,
1199
+ "rewards/margins_min": 0.33173805475234985,
1200
+ "rewards/margins_std": 1.6987276077270508,
1201
+ "rewards/rejected": -3.509597063064575,
1202
+ "step": 600
1203
+ },
1204
+ {
1205
+ "epoch": 1.69,
1206
+ "eval_logits/chosen": -1.9706788063049316,
1207
+ "eval_logits/rejected": -1.882871389389038,
1208
+ "eval_logps/chosen": -505.41143798828125,
1209
+ "eval_logps/rejected": -519.8788452148438,
1210
+ "eval_loss": 0.8093447089195251,
1211
+ "eval_rewards/accuracies": 0.5730000138282776,
1212
+ "eval_rewards/chosen": -2.305307149887085,
1213
+ "eval_rewards/margins": 0.31827661395072937,
1214
+ "eval_rewards/margins_max": 2.791022539138794,
1215
+ "eval_rewards/margins_min": -1.984505295753479,
1216
+ "eval_rewards/margins_std": 1.5908182859420776,
1217
+ "eval_rewards/rejected": -2.6235837936401367,
1218
+ "eval_runtime": 740.929,
1219
+ "eval_samples_per_second": 2.699,
1220
+ "eval_steps_per_second": 0.169,
1221
+ "step": 600
1222
+ },
1223
+ {
1224
+ "epoch": 1.72,
1225
+ "grad_norm": 7.884893832559347,
1226
+ "learning_rate": 2.3034436114175838e-07,
1227
+ "logits/chosen": -2.072605848312378,
1228
+ "logits/rejected": -1.8742202520370483,
1229
+ "logps/chosen": -470.3563537597656,
1230
+ "logps/rejected": -593.04345703125,
1231
+ "loss": 0.3553,
1232
+ "rewards/accuracies": 0.862500011920929,
1233
+ "rewards/chosen": -1.6478776931762695,
1234
+ "rewards/margins": 1.5212364196777344,
1235
+ "rewards/margins_max": 3.331374406814575,
1236
+ "rewards/margins_min": 0.16380740702152252,
1237
+ "rewards/margins_std": 1.4223846197128296,
1238
+ "rewards/rejected": -3.169113874435425,
1239
+ "step": 610
1240
+ },
1241
+ {
1242
+ "epoch": 1.75,
1243
+ "grad_norm": 4.6350166519672475,
1244
+ "learning_rate": 2.2218346133000264e-07,
1245
+ "logits/chosen": -2.145089864730835,
1246
+ "logits/rejected": -1.9883239269256592,
1247
+ "logps/chosen": -495.61676025390625,
1248
+ "logps/rejected": -693.98486328125,
1249
+ "loss": 0.3263,
1250
+ "rewards/accuracies": 0.875,
1251
+ "rewards/chosen": -1.746462106704712,
1252
+ "rewards/margins": 1.9258817434310913,
1253
+ "rewards/margins_max": 4.596188545227051,
1254
+ "rewards/margins_min": 0.21569041907787323,
1255
+ "rewards/margins_std": 2.0187416076660156,
1256
+ "rewards/rejected": -3.672344207763672,
1257
+ "step": 620
1258
+ },
1259
+ {
1260
+ "epoch": 1.77,
1261
+ "grad_norm": 4.707195155665474,
1262
+ "learning_rate": 2.1405247265337917e-07,
1263
+ "logits/chosen": -2.0058627128601074,
1264
+ "logits/rejected": -1.8860944509506226,
1265
+ "logps/chosen": -489.9043884277344,
1266
+ "logps/rejected": -679.2588500976562,
1267
+ "loss": 0.3348,
1268
+ "rewards/accuracies": 0.887499988079071,
1269
+ "rewards/chosen": -1.6168292760849,
1270
+ "rewards/margins": 1.956435203552246,
1271
+ "rewards/margins_max": 4.475503444671631,
1272
+ "rewards/margins_min": 0.15001405775547028,
1273
+ "rewards/margins_std": 1.965258002281189,
1274
+ "rewards/rejected": -3.5732643604278564,
1275
+ "step": 630
1276
+ },
1277
+ {
1278
+ "epoch": 1.8,
1279
+ "grad_norm": 5.859575517402316,
1280
+ "learning_rate": 2.0596013836725657e-07,
1281
+ "logits/chosen": -2.161479949951172,
1282
+ "logits/rejected": -1.9951550960540771,
1283
+ "logps/chosen": -445.10345458984375,
1284
+ "logps/rejected": -593.2735595703125,
1285
+ "loss": 0.3148,
1286
+ "rewards/accuracies": 0.8999999761581421,
1287
+ "rewards/chosen": -1.5793910026550293,
1288
+ "rewards/margins": 1.6846816539764404,
1289
+ "rewards/margins_max": 3.323934555053711,
1290
+ "rewards/margins_min": 0.4667704701423645,
1291
+ "rewards/margins_std": 1.3031413555145264,
1292
+ "rewards/rejected": -3.264072895050049,
1293
+ "step": 640
1294
+ },
1295
+ {
1296
+ "epoch": 1.83,
1297
+ "grad_norm": 7.1672923088518825,
1298
+ "learning_rate": 1.9791516016192213e-07,
1299
+ "logits/chosen": -2.07092547416687,
1300
+ "logits/rejected": -1.956458330154419,
1301
+ "logps/chosen": -485.3031311035156,
1302
+ "logps/rejected": -704.134765625,
1303
+ "loss": 0.3248,
1304
+ "rewards/accuracies": 0.875,
1305
+ "rewards/chosen": -1.7588361501693726,
1306
+ "rewards/margins": 1.9537765979766846,
1307
+ "rewards/margins_max": 4.279685020446777,
1308
+ "rewards/margins_min": 0.22622933983802795,
1309
+ "rewards/margins_std": 1.8460969924926758,
1310
+ "rewards/rejected": -3.7126126289367676,
1311
+ "step": 650
1312
+ },
1313
+ {
1314
+ "epoch": 1.86,
1315
+ "grad_norm": 6.153621209141468,
1316
+ "learning_rate": 1.8992618880565036e-07,
1317
+ "logits/chosen": -2.0097153186798096,
1318
+ "logits/rejected": -1.9442319869995117,
1319
+ "logps/chosen": -366.12994384765625,
1320
+ "logps/rejected": -553.5570068359375,
1321
+ "loss": 0.3183,
1322
+ "rewards/accuracies": 0.875,
1323
+ "rewards/chosen": -1.3965609073638916,
1324
+ "rewards/margins": 1.6747844219207764,
1325
+ "rewards/margins_max": 3.8514389991760254,
1326
+ "rewards/margins_min": 0.07616128027439117,
1327
+ "rewards/margins_std": 1.7168182134628296,
1328
+ "rewards/rejected": -3.071345567703247,
1329
+ "step": 660
1330
+ },
1331
+ {
1332
+ "epoch": 1.89,
1333
+ "grad_norm": 6.622934271289155,
1334
+ "learning_rate": 1.8200181484252885e-07,
1335
+ "logits/chosen": -2.1211202144622803,
1336
+ "logits/rejected": -2.00960373878479,
1337
+ "logps/chosen": -453.65386962890625,
1338
+ "logps/rejected": -652.305908203125,
1339
+ "loss": 0.3179,
1340
+ "rewards/accuracies": 0.875,
1341
+ "rewards/chosen": -1.7312113046646118,
1342
+ "rewards/margins": 2.003723621368408,
1343
+ "rewards/margins_max": 4.150836944580078,
1344
+ "rewards/margins_min": 0.22760996222496033,
1345
+ "rewards/margins_std": 1.777065634727478,
1346
+ "rewards/rejected": -3.7349350452423096,
1347
+ "step": 670
1348
+ },
1349
+ {
1350
+ "epoch": 1.92,
1351
+ "grad_norm": 6.678835951751634,
1352
+ "learning_rate": 1.7415055935504233e-07,
1353
+ "logits/chosen": -2.175750970840454,
1354
+ "logits/rejected": -2.0145211219787598,
1355
+ "logps/chosen": -465.84259033203125,
1356
+ "logps/rejected": -653.8709106445312,
1357
+ "loss": 0.3322,
1358
+ "rewards/accuracies": 0.862500011920929,
1359
+ "rewards/chosen": -1.7737252712249756,
1360
+ "rewards/margins": 1.8737843036651611,
1361
+ "rewards/margins_max": 4.312856197357178,
1362
+ "rewards/margins_min": -0.018575742840766907,
1363
+ "rewards/margins_std": 1.942803978919983,
1364
+ "rewards/rejected": -3.6475093364715576,
1365
+ "step": 680
1366
+ },
1367
+ {
1368
+ "epoch": 1.94,
1369
+ "grad_norm": 5.060285672653984,
1370
+ "learning_rate": 1.6638086480134952e-07,
1371
+ "logits/chosen": -2.0581729412078857,
1372
+ "logits/rejected": -1.945577621459961,
1373
+ "logps/chosen": -358.5215148925781,
1374
+ "logps/rejected": -548.193359375,
1375
+ "loss": 0.3068,
1376
+ "rewards/accuracies": 0.925000011920929,
1377
+ "rewards/chosen": -1.3307039737701416,
1378
+ "rewards/margins": 1.8142309188842773,
1379
+ "rewards/margins_max": 4.50165319442749,
1380
+ "rewards/margins_min": 0.17869335412979126,
1381
+ "rewards/margins_std": 1.988674521446228,
1382
+ "rewards/rejected": -3.144935131072998,
1383
+ "step": 690
1384
+ },
1385
+ {
1386
+ "epoch": 1.97,
1387
+ "grad_norm": 8.639077018605542,
1388
+ "learning_rate": 1.5870108593710471e-07,
1389
+ "logits/chosen": -2.0774855613708496,
1390
+ "logits/rejected": -1.8586009740829468,
1391
+ "logps/chosen": -407.3385314941406,
1392
+ "logps/rejected": -548.2588500976562,
1393
+ "loss": 0.3253,
1394
+ "rewards/accuracies": 0.8500000238418579,
1395
+ "rewards/chosen": -1.3710554838180542,
1396
+ "rewards/margins": 1.697474718093872,
1397
+ "rewards/margins_max": 3.6990180015563965,
1398
+ "rewards/margins_min": 0.1754045933485031,
1399
+ "rewards/margins_std": 1.5944454669952393,
1400
+ "rewards/rejected": -3.0685300827026367,
1401
+ "step": 700
1402
+ },
1403
+ {
1404
+ "epoch": 1.97,
1405
+ "eval_logits/chosen": -1.9524266719818115,
1406
+ "eval_logits/rejected": -1.8636794090270996,
1407
+ "eval_logps/chosen": -511.7565612792969,
1408
+ "eval_logps/rejected": -533.7400512695312,
1409
+ "eval_loss": 0.8021999001502991,
1410
+ "eval_rewards/accuracies": 0.5899999737739563,
1411
+ "eval_rewards/chosen": -2.3687589168548584,
1412
+ "eval_rewards/margins": 0.3934372663497925,
1413
+ "eval_rewards/margins_max": 3.059994697570801,
1414
+ "eval_rewards/margins_min": -2.04789400100708,
1415
+ "eval_rewards/margins_std": 1.6969114542007446,
1416
+ "eval_rewards/rejected": -2.7621960639953613,
1417
+ "eval_runtime": 740.07,
1418
+ "eval_samples_per_second": 2.702,
1419
+ "eval_steps_per_second": 0.169,
1420
+ "step": 700
1421
+ },
1422
+ {
1423
+ "epoch": 2.0,
1424
+ "grad_norm": 4.484030680446832,
1425
+ "learning_rate": 1.5111948083158528e-07,
1426
+ "logits/chosen": -1.9601835012435913,
1427
+ "logits/rejected": -1.8817923069000244,
1428
+ "logps/chosen": -376.6875915527344,
1429
+ "logps/rejected": -597.0089721679688,
1430
+ "loss": 0.2922,
1431
+ "rewards/accuracies": 0.887499988079071,
1432
+ "rewards/chosen": -1.4828770160675049,
1433
+ "rewards/margins": 1.8921600580215454,
1434
+ "rewards/margins_max": 4.028073310852051,
1435
+ "rewards/margins_min": 0.3237634301185608,
1436
+ "rewards/margins_std": 1.6557111740112305,
1437
+ "rewards/rejected": -3.3750369548797607,
1438
+ "step": 710
1439
+ },
1440
+ {
1441
+ "epoch": 2.03,
1442
+ "grad_norm": 5.934607513999122,
1443
+ "learning_rate": 1.4364420198778658e-07,
1444
+ "logits/chosen": -2.168855667114258,
1445
+ "logits/rejected": -1.9871803522109985,
1446
+ "logps/chosen": -517.0801391601562,
1447
+ "logps/rejected": -775.808349609375,
1448
+ "loss": 0.2773,
1449
+ "rewards/accuracies": 0.9375,
1450
+ "rewards/chosen": -1.8118972778320312,
1451
+ "rewards/margins": 2.484999656677246,
1452
+ "rewards/margins_max": 5.045625686645508,
1453
+ "rewards/margins_min": 0.6035622358322144,
1454
+ "rewards/margins_std": 2.0212478637695312,
1455
+ "rewards/rejected": -4.296896457672119,
1456
+ "step": 720
1457
+ },
1458
+ {
1459
+ "epoch": 2.06,
1460
+ "grad_norm": 6.664291467344353,
1461
+ "learning_rate": 1.3628328757603242e-07,
1462
+ "logits/chosen": -2.07096529006958,
1463
+ "logits/rejected": -1.88152277469635,
1464
+ "logps/chosen": -509.58056640625,
1465
+ "logps/rejected": -746.2061767578125,
1466
+ "loss": 0.2493,
1467
+ "rewards/accuracies": 0.9375,
1468
+ "rewards/chosen": -2.0079989433288574,
1469
+ "rewards/margins": 2.2790350914001465,
1470
+ "rewards/margins_max": 4.603484153747559,
1471
+ "rewards/margins_min": 0.4713754653930664,
1472
+ "rewards/margins_std": 1.8801381587982178,
1473
+ "rewards/rejected": -4.287034034729004,
1474
+ "step": 730
1475
+ },
1476
+ {
1477
+ "epoch": 2.08,
1478
+ "grad_norm": 5.3728713322719175,
1479
+ "learning_rate": 1.2904465279052723e-07,
1480
+ "logits/chosen": -1.9976093769073486,
1481
+ "logits/rejected": -1.8471425771713257,
1482
+ "logps/chosen": -497.21484375,
1483
+ "logps/rejected": -705.7311401367188,
1484
+ "loss": 0.3031,
1485
+ "rewards/accuracies": 0.875,
1486
+ "rewards/chosen": -1.9563236236572266,
1487
+ "rewards/margins": 2.0360076427459717,
1488
+ "rewards/margins_max": 4.599600791931152,
1489
+ "rewards/margins_min": 0.06591819226741791,
1490
+ "rewards/margins_std": 2.065493583679199,
1491
+ "rewards/rejected": -3.9923312664031982,
1492
+ "step": 740
1493
+ },
1494
+ {
1495
+ "epoch": 2.11,
1496
+ "grad_norm": 4.3979433368191625,
1497
+ "learning_rate": 1.219360813381446e-07,
1498
+ "logits/chosen": -1.9922767877578735,
1499
+ "logits/rejected": -1.8619979619979858,
1500
+ "logps/chosen": -434.5537109375,
1501
+ "logps/rejected": -657.9417724609375,
1502
+ "loss": 0.288,
1503
+ "rewards/accuracies": 0.8999999761581421,
1504
+ "rewards/chosen": -1.721703290939331,
1505
+ "rewards/margins": 2.172398090362549,
1506
+ "rewards/margins_max": 4.379127025604248,
1507
+ "rewards/margins_min": 0.2665182948112488,
1508
+ "rewards/margins_std": 1.8405358791351318,
1509
+ "rewards/rejected": -3.8941009044647217,
1510
+ "step": 750
1511
+ },
1512
+ {
1513
+ "epoch": 2.14,
1514
+ "grad_norm": 4.311152372020765,
1515
+ "learning_rate": 1.149652170686039e-07,
1516
+ "logits/chosen": -1.910975456237793,
1517
+ "logits/rejected": -1.7928791046142578,
1518
+ "logps/chosen": -441.78240966796875,
1519
+ "logps/rejected": -715.9662475585938,
1520
+ "loss": 0.2896,
1521
+ "rewards/accuracies": 0.9375,
1522
+ "rewards/chosen": -1.777691125869751,
1523
+ "rewards/margins": 2.4173693656921387,
1524
+ "rewards/margins_max": 5.414074897766113,
1525
+ "rewards/margins_min": 0.11628694832324982,
1526
+ "rewards/margins_std": 2.435407876968384,
1527
+ "rewards/rejected": -4.195060729980469,
1528
+ "step": 760
1529
+ },
1530
+ {
1531
+ "epoch": 2.17,
1532
+ "grad_norm": 4.632824439408098,
1533
+ "learning_rate": 1.0813955575503587e-07,
1534
+ "logits/chosen": -2.0070652961730957,
1535
+ "logits/rejected": -1.8251206874847412,
1536
+ "logps/chosen": -446.7857971191406,
1537
+ "logps/rejected": -642.5098876953125,
1538
+ "loss": 0.2992,
1539
+ "rewards/accuracies": 0.9125000238418579,
1540
+ "rewards/chosen": -1.7226359844207764,
1541
+ "rewards/margins": 2.2547175884246826,
1542
+ "rewards/margins_max": 5.0700860023498535,
1543
+ "rewards/margins_min": 0.39999958872795105,
1544
+ "rewards/margins_std": 2.0922274589538574,
1545
+ "rewards/rejected": -3.977353572845459,
1546
+ "step": 770
1547
+ },
1548
+ {
1549
+ "epoch": 2.2,
1550
+ "grad_norm": 7.187846299826648,
1551
+ "learning_rate": 1.0146643703377486e-07,
1552
+ "logits/chosen": -2.025348424911499,
1553
+ "logits/rejected": -1.8247390985488892,
1554
+ "logps/chosen": -516.863525390625,
1555
+ "logps/rejected": -660.7418212890625,
1556
+ "loss": 0.3685,
1557
+ "rewards/accuracies": 0.8125,
1558
+ "rewards/chosen": -2.216492176055908,
1559
+ "rewards/margins": 1.8119462728500366,
1560
+ "rewards/margins_max": 4.643686294555664,
1561
+ "rewards/margins_min": -0.28089627623558044,
1562
+ "rewards/margins_std": 2.2221999168395996,
1563
+ "rewards/rejected": -4.028437614440918,
1564
+ "step": 780
1565
+ },
1566
+ {
1567
+ "epoch": 2.23,
1568
+ "grad_norm": 4.864775866905843,
1569
+ "learning_rate": 9.495303651204494e-08,
1570
+ "logits/chosen": -2.032228946685791,
1571
+ "logits/rejected": -1.8665939569473267,
1572
+ "logps/chosen": -472.88018798828125,
1573
+ "logps/rejected": -657.1442260742188,
1574
+ "loss": 0.2754,
1575
+ "rewards/accuracies": 0.925000011920929,
1576
+ "rewards/chosen": -1.7393814325332642,
1577
+ "rewards/margins": 1.9806731939315796,
1578
+ "rewards/margins_max": 4.059412002563477,
1579
+ "rewards/margins_min": 0.27446281909942627,
1580
+ "rewards/margins_std": 1.732347846031189,
1581
+ "rewards/rejected": -3.7200546264648438,
1582
+ "step": 790
1583
+ },
1584
+ {
1585
+ "epoch": 2.25,
1586
+ "grad_norm": 4.897791088230011,
1587
+ "learning_rate": 8.860635805202615e-08,
1588
+ "logits/chosen": -2.0725150108337402,
1589
+ "logits/rejected": -1.939500093460083,
1590
+ "logps/chosen": -462.3565368652344,
1591
+ "logps/rejected": -782.6009521484375,
1592
+ "loss": 0.2445,
1593
+ "rewards/accuracies": 0.9624999761581421,
1594
+ "rewards/chosen": -1.6571842432022095,
1595
+ "rewards/margins": 2.6175780296325684,
1596
+ "rewards/margins_max": 5.12880802154541,
1597
+ "rewards/margins_min": 0.6253465414047241,
1598
+ "rewards/margins_std": 2.0294041633605957,
1599
+ "rewards/rejected": -4.274762153625488,
1600
+ "step": 800
1601
+ },
1602
+ {
1603
+ "epoch": 2.25,
1604
+ "eval_logits/chosen": -1.919421672821045,
1605
+ "eval_logits/rejected": -1.8329098224639893,
1606
+ "eval_logps/chosen": -536.6691284179688,
1607
+ "eval_logps/rejected": -563.362060546875,
1608
+ "eval_loss": 0.8262488842010498,
1609
+ "eval_rewards/accuracies": 0.5879999995231628,
1610
+ "eval_rewards/chosen": -2.6178839206695557,
1611
+ "eval_rewards/margins": 0.44053173065185547,
1612
+ "eval_rewards/margins_max": 3.385202407836914,
1613
+ "eval_rewards/margins_min": -2.237797260284424,
1614
+ "eval_rewards/margins_std": 1.8657594919204712,
1615
+ "eval_rewards/rejected": -3.0584161281585693,
1616
+ "eval_runtime": 740.1616,
1617
+ "eval_samples_per_second": 2.702,
1618
+ "eval_steps_per_second": 0.169,
1619
+ "step": 800
1620
+ },
1621
+ {
1622
+ "epoch": 2.28,
1623
+ "grad_norm": 3.8140500779127393,
1624
+ "learning_rate": 8.24332262395994e-08,
1625
+ "logits/chosen": -2.062171459197998,
1626
+ "logits/rejected": -1.9682680368423462,
1627
+ "logps/chosen": -468.1681213378906,
1628
+ "logps/rejected": -737.950439453125,
1629
+ "loss": 0.2607,
1630
+ "rewards/accuracies": 0.9125000238418579,
1631
+ "rewards/chosen": -1.9337375164031982,
1632
+ "rewards/margins": 2.346217393875122,
1633
+ "rewards/margins_max": 4.732678413391113,
1634
+ "rewards/margins_min": 0.3489117920398712,
1635
+ "rewards/margins_std": 1.9971859455108643,
1636
+ "rewards/rejected": -4.2799553871154785,
1637
+ "step": 810
1638
+ },
1639
+ {
1640
+ "epoch": 2.31,
1641
+ "grad_norm": 4.503876817512786,
1642
+ "learning_rate": 7.644027904586586e-08,
1643
+ "logits/chosen": -1.9747568368911743,
1644
+ "logits/rejected": -1.861803412437439,
1645
+ "logps/chosen": -466.3984375,
1646
+ "logps/rejected": -755.3158569335938,
1647
+ "loss": 0.26,
1648
+ "rewards/accuracies": 0.8999999761581421,
1649
+ "rewards/chosen": -1.7740318775177002,
1650
+ "rewards/margins": 2.602890968322754,
1651
+ "rewards/margins_max": 5.653742790222168,
1652
+ "rewards/margins_min": 0.4451362192630768,
1653
+ "rewards/margins_std": 2.3605756759643555,
1654
+ "rewards/rejected": -4.376922607421875,
1655
+ "step": 820
1656
+ },
1657
+ {
1658
+ "epoch": 2.34,
1659
+ "grad_norm": 8.38717540675709,
1660
+ "learning_rate": 7.063396068933469e-08,
1661
+ "logits/chosen": -1.9597285985946655,
1662
+ "logits/rejected": -1.7506755590438843,
1663
+ "logps/chosen": -575.2877197265625,
1664
+ "logps/rejected": -752.0540161132812,
1665
+ "loss": 0.289,
1666
+ "rewards/accuracies": 0.8374999761581421,
1667
+ "rewards/chosen": -2.2225418090820312,
1668
+ "rewards/margins": 2.1929917335510254,
1669
+ "rewards/margins_max": 4.804136753082275,
1670
+ "rewards/margins_min": 0.21819576621055603,
1671
+ "rewards/margins_std": 2.0718307495117188,
1672
+ "rewards/rejected": -4.415533542633057,
1673
+ "step": 830
1674
+ },
1675
+ {
1676
+ "epoch": 2.37,
1677
+ "grad_norm": 4.269821171109155,
1678
+ "learning_rate": 6.502051470645148e-08,
1679
+ "logits/chosen": -2.0234594345092773,
1680
+ "logits/rejected": -1.8737064599990845,
1681
+ "logps/chosen": -497.57843017578125,
1682
+ "logps/rejected": -664.8786010742188,
1683
+ "loss": 0.2876,
1684
+ "rewards/accuracies": 0.887499988079071,
1685
+ "rewards/chosen": -1.9449161291122437,
1686
+ "rewards/margins": 1.8489919900894165,
1687
+ "rewards/margins_max": 3.4735729694366455,
1688
+ "rewards/margins_min": 0.39146485924720764,
1689
+ "rewards/margins_std": 1.407354712486267,
1690
+ "rewards/rejected": -3.7939083576202393,
1691
+ "step": 840
1692
+ },
1693
+ {
1694
+ "epoch": 2.39,
1695
+ "grad_norm": 6.881439522360299,
1696
+ "learning_rate": 5.960597723792194e-08,
1697
+ "logits/chosen": -2.020962715148926,
1698
+ "logits/rejected": -1.8502124547958374,
1699
+ "logps/chosen": -445.7184143066406,
1700
+ "logps/rejected": -693.6365356445312,
1701
+ "loss": 0.2767,
1702
+ "rewards/accuracies": 0.925000011920929,
1703
+ "rewards/chosen": -1.7460823059082031,
1704
+ "rewards/margins": 2.4407520294189453,
1705
+ "rewards/margins_max": 4.710750102996826,
1706
+ "rewards/margins_min": 0.3442041873931885,
1707
+ "rewards/margins_std": 1.993112564086914,
1708
+ "rewards/rejected": -4.18683385848999,
1709
+ "step": 850
1710
+ },
1711
+ {
1712
+ "epoch": 2.42,
1713
+ "grad_norm": 7.845678328004129,
1714
+ "learning_rate": 5.4396170538046486e-08,
1715
+ "logits/chosen": -2.072786808013916,
1716
+ "logits/rejected": -1.908163070678711,
1717
+ "logps/chosen": -511.01104736328125,
1718
+ "logps/rejected": -751.96435546875,
1719
+ "loss": 0.2501,
1720
+ "rewards/accuracies": 0.887499988079071,
1721
+ "rewards/chosen": -1.9439871311187744,
1722
+ "rewards/margins": 2.6217305660247803,
1723
+ "rewards/margins_max": 5.660550594329834,
1724
+ "rewards/margins_min": 0.2680825889110565,
1725
+ "rewards/margins_std": 2.4432153701782227,
1726
+ "rewards/rejected": -4.565717697143555,
1727
+ "step": 860
1728
+ },
1729
+ {
1730
+ "epoch": 2.45,
1731
+ "grad_norm": 7.4391577660258195,
1732
+ "learning_rate": 4.93966967140487e-08,
1733
+ "logits/chosen": -1.9789683818817139,
1734
+ "logits/rejected": -1.842271089553833,
1735
+ "logps/chosen": -460.94091796875,
1736
+ "logps/rejected": -734.498779296875,
1737
+ "loss": 0.2516,
1738
+ "rewards/accuracies": 0.949999988079071,
1739
+ "rewards/chosen": -1.7545455694198608,
1740
+ "rewards/margins": 2.402285575866699,
1741
+ "rewards/margins_max": 5.085206985473633,
1742
+ "rewards/margins_min": 0.28181013464927673,
1743
+ "rewards/margins_std": 2.1507792472839355,
1744
+ "rewards/rejected": -4.156831741333008,
1745
+ "step": 870
1746
+ },
1747
+ {
1748
+ "epoch": 2.48,
1749
+ "grad_norm": 5.333353916764276,
1750
+ "learning_rate": 4.4612931702126433e-08,
1751
+ "logits/chosen": -2.1065726280212402,
1752
+ "logits/rejected": -1.9196290969848633,
1753
+ "logps/chosen": -523.8679809570312,
1754
+ "logps/rejected": -738.0938720703125,
1755
+ "loss": 0.2638,
1756
+ "rewards/accuracies": 0.925000011920929,
1757
+ "rewards/chosen": -2.153127908706665,
1758
+ "rewards/margins": 2.2366292476654053,
1759
+ "rewards/margins_max": 4.850363254547119,
1760
+ "rewards/margins_min": 0.37360453605651855,
1761
+ "rewards/margins_std": 2.033721923828125,
1762
+ "rewards/rejected": -4.38975715637207,
1763
+ "step": 880
1764
+ },
1765
+ {
1766
+ "epoch": 2.51,
1767
+ "grad_norm": 5.085291105056715,
1768
+ "learning_rate": 4.005001948670605e-08,
1769
+ "logits/chosen": -2.0524444580078125,
1770
+ "logits/rejected": -1.8745992183685303,
1771
+ "logps/chosen": -531.6593017578125,
1772
+ "logps/rejected": -733.5911865234375,
1773
+ "loss": 0.2719,
1774
+ "rewards/accuracies": 0.925000011920929,
1775
+ "rewards/chosen": -2.118652820587158,
1776
+ "rewards/margins": 2.1900432109832764,
1777
+ "rewards/margins_max": 4.532656669616699,
1778
+ "rewards/margins_min": 0.4483065605163574,
1779
+ "rewards/margins_std": 1.870699167251587,
1780
+ "rewards/rejected": -4.3086957931518555,
1781
+ "step": 890
1782
+ },
1783
+ {
1784
+ "epoch": 2.54,
1785
+ "grad_norm": 6.4536898091014265,
1786
+ "learning_rate": 3.571286656911376e-08,
1787
+ "logits/chosen": -2.048665761947632,
1788
+ "logits/rejected": -1.8375803232192993,
1789
+ "logps/chosen": -514.3804321289062,
1790
+ "logps/rejected": -805.6507568359375,
1791
+ "loss": 0.3015,
1792
+ "rewards/accuracies": 0.9375,
1793
+ "rewards/chosen": -2.077589750289917,
1794
+ "rewards/margins": 2.7033679485321045,
1795
+ "rewards/margins_max": 5.94973087310791,
1796
+ "rewards/margins_min": 0.318843811750412,
1797
+ "rewards/margins_std": 2.5493154525756836,
1798
+ "rewards/rejected": -4.7809576988220215,
1799
+ "step": 900
1800
+ },
1801
+ {
1802
+ "epoch": 2.54,
1803
+ "eval_logits/chosen": -1.9146674871444702,
1804
+ "eval_logits/rejected": -1.8280622959136963,
1805
+ "eval_logps/chosen": -542.6185302734375,
1806
+ "eval_logps/rejected": -571.6796264648438,
1807
+ "eval_loss": 0.8292613625526428,
1808
+ "eval_rewards/accuracies": 0.5929999947547913,
1809
+ "eval_rewards/chosen": -2.6773781776428223,
1810
+ "eval_rewards/margins": 0.46421319246292114,
1811
+ "eval_rewards/margins_max": 3.504265308380127,
1812
+ "eval_rewards/margins_min": -2.2912440299987793,
1813
+ "eval_rewards/margins_std": 1.918405294418335,
1814
+ "eval_rewards/rejected": -3.1415910720825195,
1815
+ "eval_runtime": 740.6009,
1816
+ "eval_samples_per_second": 2.701,
1817
+ "eval_steps_per_second": 0.169,
1818
+ "step": 900
1819
+ },
1820
+ {
1821
+ "epoch": 2.56,
1822
+ "grad_norm": 4.3310162633935345,
1823
+ "learning_rate": 3.160613669161255e-08,
1824
+ "logits/chosen": -2.009597063064575,
1825
+ "logits/rejected": -1.8357868194580078,
1826
+ "logps/chosen": -472.4285583496094,
1827
+ "logps/rejected": -671.4129028320312,
1828
+ "loss": 0.2756,
1829
+ "rewards/accuracies": 0.925000011920929,
1830
+ "rewards/chosen": -1.863695502281189,
1831
+ "rewards/margins": 2.203331232070923,
1832
+ "rewards/margins_max": 4.663190841674805,
1833
+ "rewards/margins_min": 0.4688544273376465,
1834
+ "rewards/margins_std": 1.9418967962265015,
1835
+ "rewards/rejected": -4.067026615142822,
1836
+ "step": 910
1837
+ },
1838
+ {
1839
+ "epoch": 2.59,
1840
+ "grad_norm": 5.566695099413474,
1841
+ "learning_rate": 2.7734245822478436e-08,
1842
+ "logits/chosen": -2.015672206878662,
1843
+ "logits/rejected": -1.8013242483139038,
1844
+ "logps/chosen": -464.11163330078125,
1845
+ "logps/rejected": -615.1932373046875,
1846
+ "loss": 0.3132,
1847
+ "rewards/accuracies": 0.925000011920929,
1848
+ "rewards/chosen": -1.7841193675994873,
1849
+ "rewards/margins": 1.9425182342529297,
1850
+ "rewards/margins_max": 4.301909923553467,
1851
+ "rewards/margins_min": 0.10777553170919418,
1852
+ "rewards/margins_std": 1.9170602560043335,
1853
+ "rewards/rejected": -3.726637363433838,
1854
+ "step": 920
1855
+ },
1856
+ {
1857
+ "epoch": 2.62,
1858
+ "grad_norm": 7.3325072892546554,
1859
+ "learning_rate": 2.410135740750821e-08,
1860
+ "logits/chosen": -2.0186896324157715,
1861
+ "logits/rejected": -1.8880863189697266,
1862
+ "logps/chosen": -460.39154052734375,
1863
+ "logps/rejected": -668.5147705078125,
1864
+ "loss": 0.2969,
1865
+ "rewards/accuracies": 0.887499988079071,
1866
+ "rewards/chosen": -1.975254774093628,
1867
+ "rewards/margins": 2.0803561210632324,
1868
+ "rewards/margins_max": 4.375603675842285,
1869
+ "rewards/margins_min": 0.5560076236724854,
1870
+ "rewards/margins_std": 1.7459514141082764,
1871
+ "rewards/rejected": -4.055610656738281,
1872
+ "step": 930
1873
+ },
1874
+ {
1875
+ "epoch": 2.65,
1876
+ "grad_norm": 4.1844081566816715,
1877
+ "learning_rate": 2.071137789306418e-08,
1878
+ "logits/chosen": -2.073499917984009,
1879
+ "logits/rejected": -1.8808135986328125,
1880
+ "logps/chosen": -530.9814453125,
1881
+ "logps/rejected": -763.3355712890625,
1882
+ "loss": 0.2802,
1883
+ "rewards/accuracies": 0.949999988079071,
1884
+ "rewards/chosen": -2.0685553550720215,
1885
+ "rewards/margins": 2.229001998901367,
1886
+ "rewards/margins_max": 5.061312198638916,
1887
+ "rewards/margins_min": 0.28276023268699646,
1888
+ "rewards/margins_std": 2.12688946723938,
1889
+ "rewards/rejected": -4.297557353973389,
1890
+ "step": 940
1891
+ },
1892
+ {
1893
+ "epoch": 2.68,
1894
+ "grad_norm": 5.379046668918782,
1895
+ "learning_rate": 1.7567952525471107e-08,
1896
+ "logits/chosen": -1.981681227684021,
1897
+ "logits/rejected": -1.835107445716858,
1898
+ "logps/chosen": -423.8411560058594,
1899
+ "logps/rejected": -588.3886108398438,
1900
+ "loss": 0.3094,
1901
+ "rewards/accuracies": 0.9375,
1902
+ "rewards/chosen": -1.7472492456436157,
1903
+ "rewards/margins": 1.7251592874526978,
1904
+ "rewards/margins_max": 3.747589111328125,
1905
+ "rewards/margins_min": 0.21285438537597656,
1906
+ "rewards/margins_std": 1.5877231359481812,
1907
+ "rewards/rejected": -3.4724082946777344,
1908
+ "step": 950
1909
+ },
1910
+ {
1911
+ "epoch": 2.7,
1912
+ "grad_norm": 5.323826462643735,
1913
+ "learning_rate": 1.467446143128101e-08,
1914
+ "logits/chosen": -2.054689884185791,
1915
+ "logits/rejected": -1.922407865524292,
1916
+ "logps/chosen": -434.65081787109375,
1917
+ "logps/rejected": -617.8987426757812,
1918
+ "loss": 0.2779,
1919
+ "rewards/accuracies": 0.949999988079071,
1920
+ "rewards/chosen": -1.7226186990737915,
1921
+ "rewards/margins": 1.9324982166290283,
1922
+ "rewards/margins_max": 3.994910478591919,
1923
+ "rewards/margins_min": 0.3764217495918274,
1924
+ "rewards/margins_std": 1.6352239847183228,
1925
+ "rewards/rejected": -3.6551170349121094,
1926
+ "step": 960
1927
+ },
1928
+ {
1929
+ "epoch": 2.73,
1930
+ "grad_norm": 9.489539963211016,
1931
+ "learning_rate": 1.2034015982622243e-08,
1932
+ "logits/chosen": -2.103243112564087,
1933
+ "logits/rejected": -1.8488214015960693,
1934
+ "logps/chosen": -522.6852416992188,
1935
+ "logps/rejected": -773.9171752929688,
1936
+ "loss": 0.2864,
1937
+ "rewards/accuracies": 0.8999999761581421,
1938
+ "rewards/chosen": -1.9787250757217407,
1939
+ "rewards/margins": 2.53349232673645,
1940
+ "rewards/margins_max": 5.404959678649902,
1941
+ "rewards/margins_min": 0.26877671480178833,
1942
+ "rewards/margins_std": 2.34672212600708,
1943
+ "rewards/rejected": -4.5122175216674805,
1944
+ "step": 970
1945
+ },
1946
+ {
1947
+ "epoch": 2.76,
1948
+ "grad_norm": 4.819958146732696,
1949
+ "learning_rate": 9.649455451539418e-09,
1950
+ "logits/chosen": -2.0097153186798096,
1951
+ "logits/rejected": -1.8648490905761719,
1952
+ "logps/chosen": -453.50042724609375,
1953
+ "logps/rejected": -675.0182495117188,
1954
+ "loss": 0.2579,
1955
+ "rewards/accuracies": 0.925000011920929,
1956
+ "rewards/chosen": -1.9288856983184814,
1957
+ "rewards/margins": 2.2360923290252686,
1958
+ "rewards/margins_max": 4.845951080322266,
1959
+ "rewards/margins_min": 0.353663831949234,
1960
+ "rewards/margins_std": 2.0667340755462646,
1961
+ "rewards/rejected": -4.16497802734375,
1962
+ "step": 980
1963
+ },
1964
+ {
1965
+ "epoch": 2.79,
1966
+ "grad_norm": 4.860055955478379,
1967
+ "learning_rate": 7.523343956923194e-09,
1968
+ "logits/chosen": -1.9947786331176758,
1969
+ "logits/rejected": -1.7985414266586304,
1970
+ "logps/chosen": -480.50103759765625,
1971
+ "logps/rejected": -720.1832885742188,
1972
+ "loss": 0.307,
1973
+ "rewards/accuracies": 0.8999999761581421,
1974
+ "rewards/chosen": -1.8668218851089478,
1975
+ "rewards/margins": 2.3755202293395996,
1976
+ "rewards/margins_max": 5.254766941070557,
1977
+ "rewards/margins_min": 0.25941935181617737,
1978
+ "rewards/margins_std": 2.237616777420044,
1979
+ "rewards/rejected": -4.242341995239258,
1980
+ "step": 990
1981
+ },
1982
+ {
1983
+ "epoch": 2.82,
1984
+ "grad_norm": 4.887859891241385,
1985
+ "learning_rate": 5.6579677073121945e-09,
1986
+ "logits/chosen": -1.95950448513031,
1987
+ "logits/rejected": -1.8909938335418701,
1988
+ "logps/chosen": -456.54913330078125,
1989
+ "logps/rejected": -741.733642578125,
1990
+ "loss": 0.2725,
1991
+ "rewards/accuracies": 0.887499988079071,
1992
+ "rewards/chosen": -1.8867641687393188,
1993
+ "rewards/margins": 2.411424160003662,
1994
+ "rewards/margins_max": 5.223512649536133,
1995
+ "rewards/margins_min": 0.1520969420671463,
1996
+ "rewards/margins_std": 2.2634613513946533,
1997
+ "rewards/rejected": -4.29818868637085,
1998
+ "step": 1000
1999
+ },
2000
+ {
2001
+ "epoch": 2.82,
2002
+ "eval_logits/chosen": -1.9147663116455078,
2003
+ "eval_logits/rejected": -1.8277100324630737,
2004
+ "eval_logps/chosen": -539.9697265625,
2005
+ "eval_logps/rejected": -569.4471435546875,
2006
+ "eval_loss": 0.825098991394043,
2007
+ "eval_rewards/accuracies": 0.5929999947547913,
2008
+ "eval_rewards/chosen": -2.6508901119232178,
2009
+ "eval_rewards/margins": 0.4683758020401001,
2010
+ "eval_rewards/margins_max": 3.500056505203247,
2011
+ "eval_rewards/margins_min": -2.2740743160247803,
2012
+ "eval_rewards/margins_std": 1.9113789796829224,
2013
+ "eval_rewards/rejected": -3.1192662715911865,
2014
+ "eval_runtime": 741.0299,
2015
+ "eval_samples_per_second": 2.699,
2016
+ "eval_steps_per_second": 0.169,
2017
+ "step": 1000
2018
+ },
2019
+ {
2020
+ "epoch": 2.85,
2021
+ "grad_norm": 6.523826359106252,
2022
+ "learning_rate": 4.0553325425319585e-09,
2023
+ "logits/chosen": -2.071352243423462,
2024
+ "logits/rejected": -1.9273669719696045,
2025
+ "logps/chosen": -521.459228515625,
2026
+ "logps/rejected": -810.1173095703125,
2027
+ "loss": 0.3486,
2028
+ "rewards/accuracies": 0.8374999761581421,
2029
+ "rewards/chosen": -2.1784815788269043,
2030
+ "rewards/margins": 2.454408645629883,
2031
+ "rewards/margins_max": 5.279435157775879,
2032
+ "rewards/margins_min": 0.46752485632896423,
2033
+ "rewards/margins_std": 2.169618844985962,
2034
+ "rewards/rejected": -4.632889747619629,
2035
+ "step": 1010
2036
+ },
2037
+ {
2038
+ "epoch": 2.87,
2039
+ "grad_norm": 9.128100978737436,
2040
+ "learning_rate": 2.717161776814747e-09,
2041
+ "logits/chosen": -2.0809285640716553,
2042
+ "logits/rejected": -1.9295809268951416,
2043
+ "logps/chosen": -497.36962890625,
2044
+ "logps/rejected": -737.8751831054688,
2045
+ "loss": 0.284,
2046
+ "rewards/accuracies": 0.925000011920929,
2047
+ "rewards/chosen": -2.0471560955047607,
2048
+ "rewards/margins": 2.289663791656494,
2049
+ "rewards/margins_max": 5.025771141052246,
2050
+ "rewards/margins_min": 0.4177941679954529,
2051
+ "rewards/margins_std": 2.142401695251465,
2052
+ "rewards/rejected": -4.336819648742676,
2053
+ "step": 1020
2054
+ },
2055
+ {
2056
+ "epoch": 2.9,
2057
+ "grad_norm": 7.711600386273794,
2058
+ "learning_rate": 1.6448943457189613e-09,
2059
+ "logits/chosen": -2.0054070949554443,
2060
+ "logits/rejected": -1.8348314762115479,
2061
+ "logps/chosen": -463.53558349609375,
2062
+ "logps/rejected": -709.8609619140625,
2063
+ "loss": 0.3101,
2064
+ "rewards/accuracies": 0.949999988079071,
2065
+ "rewards/chosen": -1.7694952487945557,
2066
+ "rewards/margins": 2.3598575592041016,
2067
+ "rewards/margins_max": 5.3210344314575195,
2068
+ "rewards/margins_min": 0.4152081608772278,
2069
+ "rewards/margins_std": 2.2792410850524902,
2070
+ "rewards/rejected": -4.129352569580078,
2071
+ "step": 1030
2072
+ },
2073
+ {
2074
+ "epoch": 2.93,
2075
+ "grad_norm": 7.482361970769731,
2076
+ "learning_rate": 8.396832588411229e-10,
2077
+ "logits/chosen": -1.911126732826233,
2078
+ "logits/rejected": -1.759119987487793,
2079
+ "logps/chosen": -461.5048828125,
2080
+ "logps/rejected": -633.4130859375,
2081
+ "loss": 0.3082,
2082
+ "rewards/accuracies": 0.8999999761581421,
2083
+ "rewards/chosen": -1.928013563156128,
2084
+ "rewards/margins": 1.7119224071502686,
2085
+ "rewards/margins_max": 4.119196891784668,
2086
+ "rewards/margins_min": 0.11787784099578857,
2087
+ "rewards/margins_std": 1.8589006662368774,
2088
+ "rewards/rejected": -3.6399359703063965,
2089
+ "step": 1040
2090
+ },
2091
+ {
2092
+ "epoch": 2.96,
2093
+ "grad_norm": 5.603676794638893,
2094
+ "learning_rate": 3.0239435998430374e-10,
2095
+ "logits/chosen": -2.0554075241088867,
2096
+ "logits/rejected": -1.8917900323867798,
2097
+ "logps/chosen": -446.63922119140625,
2098
+ "logps/rejected": -648.955078125,
2099
+ "loss": 0.2974,
2100
+ "rewards/accuracies": 0.887499988079071,
2101
+ "rewards/chosen": -1.6835094690322876,
2102
+ "rewards/margins": 2.1592822074890137,
2103
+ "rewards/margins_max": 4.456897735595703,
2104
+ "rewards/margins_min": 0.3158484995365143,
2105
+ "rewards/margins_std": 1.9191557168960571,
2106
+ "rewards/rejected": -3.842791795730591,
2107
+ "step": 1050
2108
+ },
2109
+ {
2110
+ "epoch": 2.99,
2111
+ "grad_norm": 5.071722037499608,
2112
+ "learning_rate": 3.360539611582669e-11,
2113
+ "logits/chosen": -1.9219595193862915,
2114
+ "logits/rejected": -1.8536704778671265,
2115
+ "logps/chosen": -401.3136291503906,
2116
+ "logps/rejected": -684.7721557617188,
2117
+ "loss": 0.2643,
2118
+ "rewards/accuracies": 0.949999988079071,
2119
+ "rewards/chosen": -1.4818236827850342,
2120
+ "rewards/margins": 2.484513998031616,
2121
+ "rewards/margins_max": 5.372402667999268,
2122
+ "rewards/margins_min": 0.5392643213272095,
2123
+ "rewards/margins_std": 2.2041122913360596,
2124
+ "rewards/rejected": -3.9663376808166504,
2125
+ "step": 1060
2126
+ },
2127
+ {
2128
+ "epoch": 3.0,
2129
+ "step": 1065,
2130
+ "total_flos": 0.0,
2131
+ "train_loss": 0.4186206150502666,
2132
+ "train_runtime": 20980.8825,
2133
+ "train_samples_per_second": 0.812,
2134
+ "train_steps_per_second": 0.051
2135
+ }
2136
+ ],
2137
+ "logging_steps": 10,
2138
+ "max_steps": 1065,
2139
+ "num_input_tokens_seen": 0,
2140
+ "num_train_epochs": 3,
2141
+ "save_steps": 100,
2142
+ "total_flos": 0.0,
2143
+ "train_batch_size": 4,
2144
+ "trial_name": null,
2145
+ "trial_params": null
2146
+ }