beamaia commited on
Commit
4b61ac2
1 Parent(s): 87f3dde

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
- "k_proj",
25
  "q_proj",
26
- "v_proj"
 
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "q_proj",
24
+ "k_proj",
25
+ "v_proj",
26
+ "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:382e30330bcf16a31a9c77b85ae047dffe625857977f8280cc03e19970f6f020
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9022a41c6a7725ac3af5b0936fc7e06fc2895c4a8da259582c34fce1136a0fd4
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b62d24323a5fd9bf8c0fc65cdaa199e8690c7869635da47e5f7100fb8c16ea0
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3821e0c5652183b68ba5df1fec03c5f08c944e6453f131c4b4c0eb3216f0912
3
  size 109267450
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.5443353056907654,
3
- "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.9-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -10,42 +10,42 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
- "grad_norm": 4.8340067863464355,
14
- "kl": 27.297277450561523,
15
  "learning_rate": 0.00018,
16
- "logps/chosen": -294.6091003417969,
17
- "logps/rejected": -256.2513732910156,
18
- "loss": 0.4863,
19
- "rewards/chosen": 1.882211446762085,
20
- "rewards/margins": 0.8332540392875671,
21
- "rewards/rejected": 0.9183141589164734,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
  "grad_norm": 0.0,
27
- "kl": 40.49668502807617,
28
  "learning_rate": 0.00015142857142857143,
29
- "logps/chosen": -539.7681884765625,
30
- "logps/rejected": -501.2303161621094,
31
- "loss": 0.4555,
32
- "rewards/chosen": -24.707237243652344,
33
- "rewards/margins": -0.5242304801940918,
34
- "rewards/rejected": -22.829710006713867,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval_kl": 0.0,
40
- "eval_logps/chosen": -1786.45947265625,
41
- "eval_logps/rejected": -1616.440185546875,
42
- "eval_loss": 0.5443361401557922,
43
- "eval_rewards/chosen": -148.88607788085938,
44
- "eval_rewards/margins": -20.042972564697266,
45
- "eval_rewards/rejected": -135.36317443847656,
46
- "eval_runtime": 138.4944,
47
- "eval_samples_per_second": 2.166,
48
- "eval_steps_per_second": 0.542,
49
  "step": 50
50
  },
51
  {
@@ -53,12 +53,12 @@
53
  "grad_norm": 0.0,
54
  "kl": 0.0,
55
  "learning_rate": 0.00012285714285714287,
56
- "logps/chosen": -1740.7340087890625,
57
- "logps/rejected": -1820.6246337890625,
58
- "loss": 0.5427,
59
- "rewards/chosen": -146.22254943847656,
60
- "rewards/margins": 5.4553728103637695,
61
- "rewards/rejected": -151.97308349609375,
62
  "step": 60
63
  },
64
  {
@@ -66,12 +66,12 @@
66
  "grad_norm": 0.0,
67
  "kl": 0.0,
68
  "learning_rate": 9.428571428571429e-05,
69
- "logps/chosen": -1711.9744873046875,
70
- "logps/rejected": -1807.9703369140625,
71
- "loss": 0.5427,
72
- "rewards/chosen": -143.12269592285156,
73
- "rewards/margins": 8.623757362365723,
74
- "rewards/rejected": -150.7401885986328,
75
  "step": 80
76
  },
77
  {
@@ -79,26 +79,26 @@
79
  "grad_norm": 0.0,
80
  "kl": 0.0,
81
  "learning_rate": 6.571428571428571e-05,
82
- "logps/chosen": -1719.2364501953125,
83
- "logps/rejected": -1687.2681884765625,
84
- "loss": 0.5714,
85
- "rewards/chosen": -144.7313995361328,
86
- "rewards/margins": -4.064985752105713,
87
- "rewards/rejected": -140.0031280517578,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval_kl": 0.0,
93
- "eval_logps/chosen": -1605.7635498046875,
94
- "eval_logps/rejected": -1472.8948974609375,
95
- "eval_loss": 0.5443353056907654,
96
- "eval_rewards/chosen": -130.8164825439453,
97
- "eval_rewards/margins": -16.295190811157227,
98
- "eval_rewards/rejected": -121.00863647460938,
99
- "eval_runtime": 138.4851,
100
- "eval_samples_per_second": 2.166,
101
- "eval_steps_per_second": 0.542,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
+ "best_metric": 0.544333279132843,
3
+ "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.9-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
+ "grad_norm": 0.0,
14
+ "kl": 0.9986292123794556,
15
  "learning_rate": 0.00018,
16
+ "logps/chosen": -876.9887084960938,
17
+ "logps/rejected": -891.7728271484375,
18
+ "loss": 0.5066,
19
+ "rewards/chosen": -59.218875885009766,
20
+ "rewards/margins": 1.2524851560592651,
21
+ "rewards/rejected": -60.5962028503418,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
  "grad_norm": 0.0,
27
+ "kl": 0.0,
28
  "learning_rate": 0.00015142857142857143,
29
+ "logps/chosen": -1901.8116455078125,
30
+ "logps/rejected": -1940.32470703125,
31
+ "loss": 0.5606,
32
+ "rewards/chosen": -161.08740234375,
33
+ "rewards/margins": 2.580688953399658,
34
+ "rewards/rejected": -164.8165283203125,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval_kl": 0.0,
40
+ "eval_logps/chosen": -1797.90625,
41
+ "eval_logps/rejected": -1778.623291015625,
42
+ "eval_loss": 0.5443333983421326,
43
+ "eval_rewards/chosen": -151.05047607421875,
44
+ "eval_rewards/margins": 2.7896132469177246,
45
+ "eval_rewards/rejected": -151.03567504882812,
46
+ "eval_runtime": 138.2181,
47
+ "eval_samples_per_second": 2.17,
48
+ "eval_steps_per_second": 0.543,
49
  "step": 50
50
  },
51
  {
 
53
  "grad_norm": 0.0,
54
  "kl": 0.0,
55
  "learning_rate": 0.00012285714285714287,
56
+ "logps/chosen": -1852.9610595703125,
57
+ "logps/rejected": -1893.4521484375,
58
+ "loss": 0.5211,
59
+ "rewards/chosen": -157.99769592285156,
60
+ "rewards/margins": 3.8418266773223877,
61
+ "rewards/rejected": -160.36117553710938,
62
  "step": 60
63
  },
64
  {
 
66
  "grad_norm": 0.0,
67
  "kl": 0.0,
68
  "learning_rate": 9.428571428571429e-05,
69
+ "logps/chosen": -2051.344482421875,
70
+ "logps/rejected": -1892.0853271484375,
71
+ "loss": 0.5714,
72
+ "rewards/chosen": -176.026611328125,
73
+ "rewards/margins": -20.468326568603516,
74
+ "rewards/rejected": -161.7165069580078,
75
  "step": 80
76
  },
77
  {
 
79
  "grad_norm": 0.0,
80
  "kl": 0.0,
81
  "learning_rate": 6.571428571428571e-05,
82
+ "logps/chosen": -2035.6630859375,
83
+ "logps/rejected": -1939.001220703125,
84
+ "loss": 0.4995,
85
+ "rewards/chosen": -175.16189575195312,
86
+ "rewards/margins": -12.196085929870605,
87
+ "rewards/rejected": -165.23004150390625,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval_kl": 0.0,
93
+ "eval_logps/chosen": -1859.810791015625,
94
+ "eval_logps/rejected": -1831.84814453125,
95
+ "eval_loss": 0.544333279132843,
96
+ "eval_rewards/chosen": -157.24095153808594,
97
+ "eval_rewards/margins": 2.0474841594696045,
98
+ "eval_rewards/rejected": -156.35816955566406,
99
+ "eval_runtime": 138.0332,
100
+ "eval_samples_per_second": 2.173,
101
+ "eval_steps_per_second": 0.543,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:208731d05bb926d0eed23087f8da51d3f9788681ea21fd406c1607c7f228f41f
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:610ac0203a49e0c4734b308d39dc63c437e14685b90417052adfcc16864e15eb
3
  size 5688