chchen commited on
Commit
e023cc9
1 Parent(s): 6e9c6cb

Model save

Browse files
.ipynb_checkpoints/lora_orpo-checkpoint.yaml CHANGED
@@ -34,10 +34,10 @@ learning_rate: 0.000005
34
  num_train_epochs: 3.0
35
  lr_scheduler_type: cosine
36
  warmup_steps: 0.1
37
- fp16: true
38
 
39
  ### eval
40
  val_size: 0.1
41
- per_device_eval_batch_size: 2
42
  evaluation_strategy: steps
43
  eval_steps: 500
 
34
  num_train_epochs: 3.0
35
  lr_scheduler_type: cosine
36
  warmup_steps: 0.1
37
+ bf16: true
38
 
39
  ### eval
40
  val_size: 0.1
41
+ per_device_eval_batch_size: 1
42
  evaluation_strategy: steps
43
  eval_steps: 500
README.md CHANGED
@@ -38,7 +38,7 @@ More information needed
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-06
40
  - train_batch_size: 1
41
- - eval_batch_size: 2
42
  - seed: 42
43
  - gradient_accumulation_steps: 16
44
  - total_train_batch_size: 16
@@ -46,7 +46,6 @@ The following hyperparameters were used during training:
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_steps: 0.1
48
  - num_epochs: 3.0
49
- - mixed_precision_training: Native AMP
50
 
51
  ### Training results
52
 
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-06
40
  - train_batch_size: 1
41
+ - eval_batch_size: 1
42
  - seed: 42
43
  - gradient_accumulation_steps: 16
44
  - total_train_batch_size: 16
 
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_steps: 0.1
48
  - num_epochs: 3.0
 
49
 
50
  ### Training results
51
 
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "up_proj",
24
- "down_proj",
25
- "q_proj",
26
- "k_proj",
27
  "v_proj",
 
 
28
  "gate_proj",
29
- "o_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "v_proj",
24
+ "q_proj",
25
+ "up_proj",
26
  "gate_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95018afb465ec344257caacc54cbc32ab167939ff6bf32d1b25aa8112a976531
3
  size 100059752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc4b6acb2d98e1fe0132e7b05219cd20ebed844e661990d0b1652210733570e4
3
  size 100059752
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.986666666666667,
3
+ "total_flos": 2.2023536924295168e+17,
4
+ "train_loss": 1.8065733909606934,
5
+ "train_runtime": 5653.336,
6
+ "train_samples_per_second": 0.478,
7
+ "train_steps_per_second": 0.03
8
+ }
lora_orpo.yaml CHANGED
@@ -34,10 +34,10 @@ learning_rate: 0.000005
34
  num_train_epochs: 3.0
35
  lr_scheduler_type: cosine
36
  warmup_steps: 0.1
37
- fp16: true
38
 
39
  ### eval
40
  val_size: 0.1
41
- per_device_eval_batch_size: 2
42
  evaluation_strategy: steps
43
  eval_steps: 500
 
34
  num_train_epochs: 3.0
35
  lr_scheduler_type: cosine
36
  warmup_steps: 0.1
37
+ bf16: true
38
 
39
  ### eval
40
  val_size: 0.1
41
+ per_device_eval_batch_size: 1
42
  evaluation_strategy: steps
43
  eval_steps: 500
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.986666666666667,
3
+ "total_flos": 2.2023536924295168e+17,
4
+ "train_loss": 1.8065733909606934,
5
+ "train_runtime": 5653.336,
6
+ "train_samples_per_second": 0.478,
7
+ "train_steps_per_second": 0.03
8
+ }
trainer_log.jsonl CHANGED
@@ -1,17 +1,17 @@
1
- {"current_steps": 10, "total_steps": 168, "loss": 2.0742, "accuracy": 0.53125, "learning_rate": 4.957230266673969e-06, "epoch": 0.17777777777777778, "percentage": 5.95, "elapsed_time": "0:05:37", "remaining_time": "1:28:54"}
2
- {"current_steps": 20, "total_steps": 168, "loss": 2.1265, "accuracy": 0.543749988079071, "learning_rate": 4.828686741593921e-06, "epoch": 0.35555555555555557, "percentage": 11.9, "elapsed_time": "0:11:07", "remaining_time": "1:22:16"}
3
- {"current_steps": 30, "total_steps": 168, "loss": 1.9475, "accuracy": 0.5375000238418579, "learning_rate": 4.618852307232078e-06, "epoch": 0.5333333333333333, "percentage": 17.86, "elapsed_time": "0:16:57", "remaining_time": "1:17:58"}
4
- {"current_steps": 40, "total_steps": 168, "loss": 1.8335, "accuracy": 0.550000011920929, "learning_rate": 4.335051964269395e-06, "epoch": 0.7111111111111111, "percentage": 23.81, "elapsed_time": "0:22:23", "remaining_time": "1:11:38"}
5
- {"current_steps": 50, "total_steps": 168, "loss": 2.0471, "accuracy": 0.5375000238418579, "learning_rate": 3.987192750660719e-06, "epoch": 0.8888888888888888, "percentage": 29.76, "elapsed_time": "0:28:01", "remaining_time": "1:06:07"}
6
- {"current_steps": 60, "total_steps": 168, "loss": 1.862, "accuracy": 0.5562499761581421, "learning_rate": 3.587417902020876e-06, "epoch": 1.0666666666666667, "percentage": 35.71, "elapsed_time": "0:33:39", "remaining_time": "1:00:34"}
7
- {"current_steps": 70, "total_steps": 168, "loss": 1.7853, "accuracy": 0.581250011920929, "learning_rate": 3.1496829497545268e-06, "epoch": 1.2444444444444445, "percentage": 41.67, "elapsed_time": "0:39:12", "remaining_time": "0:54:52"}
8
- {"current_steps": 80, "total_steps": 168, "loss": 1.8004, "accuracy": 0.581250011920929, "learning_rate": 2.6892685546987724e-06, "epoch": 1.4222222222222223, "percentage": 47.62, "elapsed_time": "0:44:56", "remaining_time": "0:49:25"}
9
- {"current_steps": 90, "total_steps": 168, "loss": 1.647, "accuracy": 0.543749988079071, "learning_rate": 2.2222470825144806e-06, "epoch": 1.6, "percentage": 53.57, "elapsed_time": "0:50:27", "remaining_time": "0:43:44"}
10
- {"current_steps": 100, "total_steps": 168, "loss": 1.7403, "accuracy": 0.59375, "learning_rate": 1.7649215418673847e-06, "epoch": 1.7777777777777777, "percentage": 59.52, "elapsed_time": "0:56:04", "remaining_time": "0:38:07"}
11
- {"current_steps": 110, "total_steps": 168, "loss": 1.7239, "accuracy": 0.5062500238418579, "learning_rate": 1.3332564712129845e-06, "epoch": 1.9555555555555557, "percentage": 65.48, "elapsed_time": "1:01:43", "remaining_time": "0:32:32"}
12
- {"current_steps": 120, "total_steps": 168, "loss": 1.7291, "accuracy": 0.53125, "learning_rate": 9.423206410612498e-07, "epoch": 2.1333333333333333, "percentage": 71.43, "elapsed_time": "1:07:28", "remaining_time": "0:26:59"}
13
- {"current_steps": 130, "total_steps": 168, "loss": 1.6236, "accuracy": 0.5687500238418579, "learning_rate": 6.057610261367044e-07, "epoch": 2.311111111111111, "percentage": 77.38, "elapsed_time": "1:13:10", "remaining_time": "0:21:23"}
14
- {"current_steps": 140, "total_steps": 168, "loss": 1.5735, "accuracy": 0.5874999761581421, "learning_rate": 3.3532641026504415e-07, "epoch": 2.488888888888889, "percentage": 83.33, "elapsed_time": "1:18:30", "remaining_time": "0:15:42"}
15
- {"current_steps": 150, "total_steps": 168, "loss": 1.8172, "accuracy": 0.5874999761581421, "learning_rate": 1.4045725421448332e-07, "epoch": 2.6666666666666665, "percentage": 89.29, "elapsed_time": "1:24:18", "remaining_time": "0:10:07"}
16
- {"current_steps": 160, "total_steps": 168, "loss": 1.5534, "accuracy": 0.612500011920929, "learning_rate": 2.7956143581177874e-08, "epoch": 2.8444444444444446, "percentage": 95.24, "elapsed_time": "1:29:45", "remaining_time": "0:04:29"}
17
- {"current_steps": 168, "total_steps": 168, "epoch": 2.986666666666667, "percentage": 100.0, "elapsed_time": "1:34:13", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 168, "loss": 2.1168, "accuracy": 0.5249999761581421, "learning_rate": 4.957230266673969e-06, "epoch": 0.17777777777777778, "percentage": 5.95, "elapsed_time": "0:05:38", "remaining_time": "1:29:04"}
2
+ {"current_steps": 20, "total_steps": 168, "loss": 2.1946, "accuracy": 0.543749988079071, "learning_rate": 4.828686741593921e-06, "epoch": 0.35555555555555557, "percentage": 11.9, "elapsed_time": "0:11:08", "remaining_time": "1:22:23"}
3
+ {"current_steps": 30, "total_steps": 168, "loss": 2.0246, "accuracy": 0.53125, "learning_rate": 4.618852307232078e-06, "epoch": 0.5333333333333333, "percentage": 17.86, "elapsed_time": "0:16:57", "remaining_time": "1:18:02"}
4
+ {"current_steps": 40, "total_steps": 168, "loss": 1.913, "accuracy": 0.5874999761581421, "learning_rate": 4.335051964269395e-06, "epoch": 0.7111111111111111, "percentage": 23.81, "elapsed_time": "0:22:22", "remaining_time": "1:11:37"}
5
+ {"current_steps": 50, "total_steps": 168, "loss": 2.12, "accuracy": 0.5375000238418579, "learning_rate": 3.987192750660719e-06, "epoch": 0.8888888888888888, "percentage": 29.76, "elapsed_time": "0:27:59", "remaining_time": "1:06:03"}
6
+ {"current_steps": 60, "total_steps": 168, "loss": 1.9418, "accuracy": 0.5625, "learning_rate": 3.587417902020876e-06, "epoch": 1.0666666666666667, "percentage": 35.71, "elapsed_time": "0:33:36", "remaining_time": "1:00:30"}
7
+ {"current_steps": 70, "total_steps": 168, "loss": 1.8513, "accuracy": 0.5625, "learning_rate": 3.1496829497545268e-06, "epoch": 1.2444444444444445, "percentage": 41.67, "elapsed_time": "0:39:07", "remaining_time": "0:54:46"}
8
+ {"current_steps": 80, "total_steps": 168, "loss": 1.8758, "accuracy": 0.574999988079071, "learning_rate": 2.6892685546987724e-06, "epoch": 1.4222222222222223, "percentage": 47.62, "elapsed_time": "0:44:51", "remaining_time": "0:49:20"}
9
+ {"current_steps": 90, "total_steps": 168, "loss": 1.7086, "accuracy": 0.543749988079071, "learning_rate": 2.2222470825144806e-06, "epoch": 1.6, "percentage": 53.57, "elapsed_time": "0:50:22", "remaining_time": "0:43:39"}
10
+ {"current_steps": 100, "total_steps": 168, "loss": 1.8133, "accuracy": 0.5625, "learning_rate": 1.7649215418673847e-06, "epoch": 1.7777777777777777, "percentage": 59.52, "elapsed_time": "0:55:57", "remaining_time": "0:38:03"}
11
+ {"current_steps": 110, "total_steps": 168, "loss": 1.7876, "accuracy": 0.518750011920929, "learning_rate": 1.3332564712129845e-06, "epoch": 1.9555555555555557, "percentage": 65.48, "elapsed_time": "1:01:36", "remaining_time": "0:32:28"}
12
+ {"current_steps": 120, "total_steps": 168, "loss": 1.7863, "accuracy": 0.53125, "learning_rate": 9.423206410612498e-07, "epoch": 2.1333333333333333, "percentage": 71.43, "elapsed_time": "1:07:20", "remaining_time": "0:26:56"}
13
+ {"current_steps": 130, "total_steps": 168, "loss": 1.692, "accuracy": 0.5625, "learning_rate": 6.057610261367044e-07, "epoch": 2.311111111111111, "percentage": 77.38, "elapsed_time": "1:13:01", "remaining_time": "0:21:20"}
14
+ {"current_steps": 140, "total_steps": 168, "loss": 1.6376, "accuracy": 0.606249988079071, "learning_rate": 3.3532641026504415e-07, "epoch": 2.488888888888889, "percentage": 83.33, "elapsed_time": "1:18:21", "remaining_time": "0:15:40"}
15
+ {"current_steps": 150, "total_steps": 168, "loss": 1.8891, "accuracy": 0.574999988079071, "learning_rate": 1.4045725421448332e-07, "epoch": 2.6666666666666665, "percentage": 89.29, "elapsed_time": "1:24:09", "remaining_time": "0:10:05"}
16
+ {"current_steps": 160, "total_steps": 168, "loss": 1.6214, "accuracy": 0.6000000238418579, "learning_rate": 2.7956143581177874e-08, "epoch": 2.8444444444444446, "percentage": 95.24, "elapsed_time": "1:29:35", "remaining_time": "0:04:28"}
17
+ {"current_steps": 168, "total_steps": 168, "epoch": 2.986666666666667, "percentage": 100.0, "elapsed_time": "1:34:02", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.986666666666667,
5
+ "eval_steps": 500,
6
+ "global_step": 168,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.17777777777777778,
13
+ "grad_norm": 1.9894839525222778,
14
+ "learning_rate": 4.957230266673969e-06,
15
+ "logits/chosen": 218.2901153564453,
16
+ "logits/rejected": 217.98861694335938,
17
+ "logps/chosen": -2.0115113258361816,
18
+ "logps/rejected": -2.2237343788146973,
19
+ "loss": 2.0742,
20
+ "odds_ratio_loss": 0.6265951991081238,
21
+ "rewards/accuracies": 0.53125,
22
+ "rewards/chosen": -0.20115113258361816,
23
+ "rewards/margins": 0.021222341805696487,
24
+ "rewards/rejected": -0.22237345576286316,
25
+ "sft_loss": 2.0115113258361816,
26
+ "step": 10
27
+ },
28
+ {
29
+ "epoch": 0.35555555555555557,
30
+ "grad_norm": 1.8634482622146606,
31
+ "learning_rate": 4.828686741593921e-06,
32
+ "logits/chosen": 220.6365509033203,
33
+ "logits/rejected": 220.3389129638672,
34
+ "logps/chosen": -2.0625388622283936,
35
+ "logps/rejected": -2.3297858238220215,
36
+ "loss": 2.1265,
37
+ "odds_ratio_loss": 0.6394721865653992,
38
+ "rewards/accuracies": 0.543749988079071,
39
+ "rewards/chosen": -0.20625391602516174,
40
+ "rewards/margins": 0.026724692434072495,
41
+ "rewards/rejected": -0.23297858238220215,
42
+ "sft_loss": 2.0625388622283936,
43
+ "step": 20
44
+ },
45
+ {
46
+ "epoch": 0.5333333333333333,
47
+ "grad_norm": 1.5888192653656006,
48
+ "learning_rate": 4.618852307232078e-06,
49
+ "logits/chosen": 223.16909790039062,
50
+ "logits/rejected": 223.3883819580078,
51
+ "logps/chosen": -1.8862736225128174,
52
+ "logps/rejected": -2.1588046550750732,
53
+ "loss": 1.9475,
54
+ "odds_ratio_loss": 0.612014651298523,
55
+ "rewards/accuracies": 0.5375000238418579,
56
+ "rewards/chosen": -0.18862736225128174,
57
+ "rewards/margins": 0.027253109961748123,
58
+ "rewards/rejected": -0.21588046848773956,
59
+ "sft_loss": 1.8862736225128174,
60
+ "step": 30
61
+ },
62
+ {
63
+ "epoch": 0.7111111111111111,
64
+ "grad_norm": 2.911007881164551,
65
+ "learning_rate": 4.335051964269395e-06,
66
+ "logits/chosen": 219.7681884765625,
67
+ "logits/rejected": 220.56063842773438,
68
+ "logps/chosen": -1.7726600170135498,
69
+ "logps/rejected": -2.0512185096740723,
70
+ "loss": 1.8335,
71
+ "odds_ratio_loss": 0.6088349223136902,
72
+ "rewards/accuracies": 0.550000011920929,
73
+ "rewards/chosen": -0.17726600170135498,
74
+ "rewards/margins": 0.02785584330558777,
75
+ "rewards/rejected": -0.20512184500694275,
76
+ "sft_loss": 1.7726600170135498,
77
+ "step": 40
78
+ },
79
+ {
80
+ "epoch": 0.8888888888888888,
81
+ "grad_norm": 3.1844053268432617,
82
+ "learning_rate": 3.987192750660719e-06,
83
+ "logits/chosen": 227.5769500732422,
84
+ "logits/rejected": 227.42721557617188,
85
+ "logps/chosen": -1.982785940170288,
86
+ "logps/rejected": -2.3187923431396484,
87
+ "loss": 2.0471,
88
+ "odds_ratio_loss": 0.6428849697113037,
89
+ "rewards/accuracies": 0.5375000238418579,
90
+ "rewards/chosen": -0.19827860593795776,
91
+ "rewards/margins": 0.033600639551877975,
92
+ "rewards/rejected": -0.23187923431396484,
93
+ "sft_loss": 1.982785940170288,
94
+ "step": 50
95
+ },
96
+ {
97
+ "epoch": 1.0666666666666667,
98
+ "grad_norm": 3.250999689102173,
99
+ "learning_rate": 3.587417902020876e-06,
100
+ "logits/chosen": 229.1508331298828,
101
+ "logits/rejected": 230.65234375,
102
+ "logps/chosen": -1.8027265071868896,
103
+ "logps/rejected": -2.109091281890869,
104
+ "loss": 1.862,
105
+ "odds_ratio_loss": 0.5927264094352722,
106
+ "rewards/accuracies": 0.5562499761581421,
107
+ "rewards/chosen": -0.1802726536989212,
108
+ "rewards/margins": 0.030636483803391457,
109
+ "rewards/rejected": -0.2109091579914093,
110
+ "sft_loss": 1.8027265071868896,
111
+ "step": 60
112
+ },
113
+ {
114
+ "epoch": 1.2444444444444445,
115
+ "grad_norm": 2.524855375289917,
116
+ "learning_rate": 3.1496829497545268e-06,
117
+ "logits/chosen": 229.8919219970703,
118
+ "logits/rejected": 229.6911163330078,
119
+ "logps/chosen": -1.722979187965393,
120
+ "logps/rejected": -1.955990195274353,
121
+ "loss": 1.7853,
122
+ "odds_ratio_loss": 0.6227248311042786,
123
+ "rewards/accuracies": 0.581250011920929,
124
+ "rewards/chosen": -0.17229792475700378,
125
+ "rewards/margins": 0.02330111339688301,
126
+ "rewards/rejected": -0.1955990493297577,
127
+ "sft_loss": 1.722979187965393,
128
+ "step": 70
129
+ },
130
+ {
131
+ "epoch": 1.4222222222222223,
132
+ "grad_norm": 1.4623929262161255,
133
+ "learning_rate": 2.6892685546987724e-06,
134
+ "logits/chosen": 234.3847198486328,
135
+ "logits/rejected": 233.77871704101562,
136
+ "logps/chosen": -1.7393592596054077,
137
+ "logps/rejected": -1.9893379211425781,
138
+ "loss": 1.8004,
139
+ "odds_ratio_loss": 0.6108058094978333,
140
+ "rewards/accuracies": 0.581250011920929,
141
+ "rewards/chosen": -0.1739359200000763,
142
+ "rewards/margins": 0.02499789372086525,
143
+ "rewards/rejected": -0.19893380999565125,
144
+ "sft_loss": 1.7393592596054077,
145
+ "step": 80
146
+ },
147
+ {
148
+ "epoch": 1.6,
149
+ "grad_norm": 1.540860891342163,
150
+ "learning_rate": 2.2222470825144806e-06,
151
+ "logits/chosen": 231.958251953125,
152
+ "logits/rejected": 232.3849334716797,
153
+ "logps/chosen": -1.5855820178985596,
154
+ "logps/rejected": -1.9024156332015991,
155
+ "loss": 1.647,
156
+ "odds_ratio_loss": 0.6140419244766235,
157
+ "rewards/accuracies": 0.543749988079071,
158
+ "rewards/chosen": -0.158558189868927,
159
+ "rewards/margins": 0.03168336674571037,
160
+ "rewards/rejected": -0.19024157524108887,
161
+ "sft_loss": 1.5855820178985596,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 1.7777777777777777,
166
+ "grad_norm": 1.0507925748825073,
167
+ "learning_rate": 1.7649215418673847e-06,
168
+ "logits/chosen": 235.5908203125,
169
+ "logits/rejected": 235.9726104736328,
170
+ "logps/chosen": -1.67770254611969,
171
+ "logps/rejected": -1.9119056463241577,
172
+ "loss": 1.7403,
173
+ "odds_ratio_loss": 0.6257372498512268,
174
+ "rewards/accuracies": 0.59375,
175
+ "rewards/chosen": -0.16777023673057556,
176
+ "rewards/margins": 0.0234203077852726,
177
+ "rewards/rejected": -0.19119055569171906,
178
+ "sft_loss": 1.67770254611969,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 1.9555555555555557,
183
+ "grad_norm": 1.1329325437545776,
184
+ "learning_rate": 1.3332564712129845e-06,
185
+ "logits/chosen": 236.5535125732422,
186
+ "logits/rejected": 236.4635772705078,
187
+ "logps/chosen": -1.661228895187378,
188
+ "logps/rejected": -1.8796217441558838,
189
+ "loss": 1.7239,
190
+ "odds_ratio_loss": 0.6264489889144897,
191
+ "rewards/accuracies": 0.5062500238418579,
192
+ "rewards/chosen": -0.16612288355827332,
193
+ "rewards/margins": 0.021839287132024765,
194
+ "rewards/rejected": -0.18796217441558838,
195
+ "sft_loss": 1.661228895187378,
196
+ "step": 110
197
+ },
198
+ {
199
+ "epoch": 2.1333333333333333,
200
+ "grad_norm": 3.1466641426086426,
201
+ "learning_rate": 9.423206410612498e-07,
202
+ "logits/chosen": 234.2484130859375,
203
+ "logits/rejected": 235.138427734375,
204
+ "logps/chosen": -1.6647857427597046,
205
+ "logps/rejected": -1.900854468345642,
206
+ "loss": 1.7291,
207
+ "odds_ratio_loss": 0.6434910893440247,
208
+ "rewards/accuracies": 0.53125,
209
+ "rewards/chosen": -0.16647860407829285,
210
+ "rewards/margins": 0.02360684797167778,
211
+ "rewards/rejected": -0.19008544087409973,
212
+ "sft_loss": 1.6647857427597046,
213
+ "step": 120
214
+ },
215
+ {
216
+ "epoch": 2.311111111111111,
217
+ "grad_norm": 0.8913648128509521,
218
+ "learning_rate": 6.057610261367044e-07,
219
+ "logits/chosen": 234.11795043945312,
220
+ "logits/rejected": 233.8062744140625,
221
+ "logps/chosen": -1.560727834701538,
222
+ "logps/rejected": -1.7592264413833618,
223
+ "loss": 1.6236,
224
+ "odds_ratio_loss": 0.6284235119819641,
225
+ "rewards/accuracies": 0.5687500238418579,
226
+ "rewards/chosen": -0.15607279539108276,
227
+ "rewards/margins": 0.019849851727485657,
228
+ "rewards/rejected": -0.17592264711856842,
229
+ "sft_loss": 1.560727834701538,
230
+ "step": 130
231
+ },
232
+ {
233
+ "epoch": 2.488888888888889,
234
+ "grad_norm": 1.3135228157043457,
235
+ "learning_rate": 3.3532641026504415e-07,
236
+ "logits/chosen": 238.02099609375,
237
+ "logits/rejected": 237.72402954101562,
238
+ "logps/chosen": -1.5137670040130615,
239
+ "logps/rejected": -1.881291389465332,
240
+ "loss": 1.5735,
241
+ "odds_ratio_loss": 0.5971778035163879,
242
+ "rewards/accuracies": 0.5874999761581421,
243
+ "rewards/chosen": -0.15137669444084167,
244
+ "rewards/margins": 0.036752425134181976,
245
+ "rewards/rejected": -0.18812914192676544,
246
+ "sft_loss": 1.5137670040130615,
247
+ "step": 140
248
+ },
249
+ {
250
+ "epoch": 2.6666666666666665,
251
+ "grad_norm": 2.724855661392212,
252
+ "learning_rate": 1.4045725421448332e-07,
253
+ "logits/chosen": 238.43264770507812,
254
+ "logits/rejected": 238.6967010498047,
255
+ "logps/chosen": -1.7582404613494873,
256
+ "logps/rejected": -2.0160341262817383,
257
+ "loss": 1.8172,
258
+ "odds_ratio_loss": 0.5895546674728394,
259
+ "rewards/accuracies": 0.5874999761581421,
260
+ "rewards/chosen": -0.17582406103610992,
261
+ "rewards/margins": 0.025779366493225098,
262
+ "rewards/rejected": -0.20160344243049622,
263
+ "sft_loss": 1.7582404613494873,
264
+ "step": 150
265
+ },
266
+ {
267
+ "epoch": 2.8444444444444446,
268
+ "grad_norm": 1.488288402557373,
269
+ "learning_rate": 2.7956143581177874e-08,
270
+ "logits/chosen": 237.65185546875,
271
+ "logits/rejected": 237.43270874023438,
272
+ "logps/chosen": -1.4948513507843018,
273
+ "logps/rejected": -1.7622127532958984,
274
+ "loss": 1.5534,
275
+ "odds_ratio_loss": 0.5855392217636108,
276
+ "rewards/accuracies": 0.612500011920929,
277
+ "rewards/chosen": -0.14948514103889465,
278
+ "rewards/margins": 0.026736149564385414,
279
+ "rewards/rejected": -0.17622129619121552,
280
+ "sft_loss": 1.4948513507843018,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 2.986666666666667,
285
+ "step": 168,
286
+ "total_flos": 2.2023536924295168e+17,
287
+ "train_loss": 1.8065733909606934,
288
+ "train_runtime": 5653.336,
289
+ "train_samples_per_second": 0.478,
290
+ "train_steps_per_second": 0.03
291
+ }
292
+ ],
293
+ "logging_steps": 10,
294
+ "max_steps": 168,
295
+ "num_input_tokens_seen": 0,
296
+ "num_train_epochs": 3,
297
+ "save_steps": 500,
298
+ "total_flos": 2.2023536924295168e+17,
299
+ "train_batch_size": 1,
300
+ "trial_name": null,
301
+ "trial_params": null
302
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0b7bb05b10d251335d82869ccbd09daf7e20325dd66ed9ecb74474f40ee5829
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f843c7a8a89253f57fbbf07b31914c7257caaec1c6f05fbf07eff4da859d27c
3
  size 5176
training_loss.png ADDED
training_rewards_accuracies.png ADDED
training_sft_loss.png ADDED