beamaia commited on
Commit
261f5d0
1 Parent(s): e598102

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "o_proj",
24
- "q_proj",
25
  "v_proj",
26
- "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "k_proj",
24
  "o_proj",
 
25
  "v_proj",
26
+ "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:674904922c1114bca2b6962d0865c26cd6461c97c38b675a2f9cd49a1477127c
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52990ac22b94270d636aea6ec8617aaacfa911c0126139ca1af3ecbef6bdb910
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d1594d8c26c7cf16f63874394d9e1ee281367b7bf47fe000ce7b4c5d1bd3bf6
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aee12a0aa0a7205759369bbb9ef004fc22850bd0b1f78cfa6bcd55938931fd60
3
  size 109267450
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.38888487219810486,
3
- "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.8-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -10,95 +10,95 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
- "grad_norm": 4.127615451812744,
14
- "kl": 39.26093673706055,
15
  "learning_rate": 0.00018142857142857142,
16
- "logps/chosen": -245.89620971679688,
17
- "logps/rejected": -258.748779296875,
18
- "loss": 0.4802,
19
- "rewards/chosen": 4.086777210235596,
20
- "rewards/margins": 0.9074667692184448,
21
- "rewards/rejected": 3.135941743850708,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
- "grad_norm": 5.660097122192383,
27
- "kl": 59.82822799682617,
28
  "learning_rate": 0.00015285714285714287,
29
- "logps/chosen": -209.38458251953125,
30
- "logps/rejected": -248.3455047607422,
31
- "loss": 0.4154,
32
- "rewards/chosen": 6.526347637176514,
33
- "rewards/margins": 2.00443172454834,
34
- "rewards/rejected": 4.586952209472656,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
- "eval_kl": 58.896419525146484,
40
- "eval_logps/chosen": -219.6454315185547,
41
- "eval_logps/rejected": -220.0347442626953,
42
- "eval_loss": 0.4205213487148285,
43
- "eval_rewards/chosen": 7.1839752197265625,
44
- "eval_rewards/margins": 3.1912589073181152,
45
- "eval_rewards/rejected": 4.228271961212158,
46
- "eval_runtime": 138.6487,
47
- "eval_samples_per_second": 2.164,
48
- "eval_steps_per_second": 0.541,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
- "grad_norm": 3.1389451026916504,
54
- "kl": 57.303131103515625,
55
  "learning_rate": 0.00012428571428571428,
56
- "logps/chosen": -229.5048370361328,
57
- "logps/rejected": -248.27647399902344,
58
- "loss": 0.3826,
59
- "rewards/chosen": 7.161718368530273,
60
- "rewards/margins": 2.8796584606170654,
61
- "rewards/rejected": 4.1422295570373535,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.55,
66
- "grad_norm": 3.413207530975342,
67
- "kl": 73.86964416503906,
68
  "learning_rate": 9.571428571428573e-05,
69
- "logps/chosen": -207.4834747314453,
70
- "logps/rejected": -261.6178894042969,
71
- "loss": 0.4554,
72
- "rewards/chosen": 8.253941535949707,
73
- "rewards/margins": 2.003471851348877,
74
- "rewards/rejected": 6.300358772277832,
75
  "step": 80
76
  },
77
  {
78
  "epoch": 0.68,
79
- "grad_norm": 4.572467803955078,
80
- "kl": 62.742340087890625,
81
  "learning_rate": 6.714285714285714e-05,
82
- "logps/chosen": -211.75201416015625,
83
- "logps/rejected": -226.812255859375,
84
- "loss": 0.36,
85
- "rewards/chosen": 7.574887752532959,
86
- "rewards/margins": 2.8302791118621826,
87
- "rewards/rejected": 4.788758277893066,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
- "eval_kl": 57.12546157836914,
93
- "eval_logps/chosen": -219.50518798828125,
94
- "eval_logps/rejected": -215.76112365722656,
95
- "eval_loss": 0.38888487219810486,
96
- "eval_rewards/chosen": 7.198000431060791,
97
- "eval_rewards/margins": 2.7296645641326904,
98
- "eval_rewards/rejected": 4.65563440322876,
99
- "eval_runtime": 138.5412,
100
- "eval_samples_per_second": 2.165,
101
- "eval_steps_per_second": 0.541,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
+ "best_metric": 0.6390000581741333,
3
+ "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.8-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
+ "grad_norm": 0.0,
14
+ "kl": 7.392268180847168,
15
  "learning_rate": 0.00018142857142857142,
16
+ "logps/chosen": -556.1337890625,
17
+ "logps/rejected": -563.4813842773438,
18
+ "loss": 0.5396,
19
+ "rewards/chosen": -27.30255699157715,
20
+ "rewards/margins": 7.028589725494385,
21
+ "rewards/rejected": -27.570302963256836,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
+ "grad_norm": 0.0,
27
+ "kl": 0.0,
28
  "learning_rate": 0.00015285714285714287,
29
+ "logps/chosen": -1588.400634765625,
30
+ "logps/rejected": -1780.9019775390625,
31
+ "loss": 0.6413,
32
+ "rewards/chosen": -131.71377563476562,
33
+ "rewards/margins": 18.5042781829834,
34
+ "rewards/rejected": -147.60687255859375,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
+ "eval_kl": 0.0,
40
+ "eval_logps/chosen": -1972.53662109375,
41
+ "eval_logps/rejected": -1894.372314453125,
42
+ "eval_loss": 0.6390000581741333,
43
+ "eval_rewards/chosen": -168.53868103027344,
44
+ "eval_rewards/margins": -6.742273807525635,
45
+ "eval_rewards/rejected": -162.69952392578125,
46
+ "eval_runtime": 141.4233,
47
+ "eval_samples_per_second": 2.121,
48
+ "eval_steps_per_second": 0.53,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
+ "grad_norm": 0.0,
54
+ "kl": 0.0,
55
  "learning_rate": 0.00012428571428571428,
56
+ "logps/chosen": -2004.6998291015625,
57
+ "logps/rejected": -1917.18212890625,
58
+ "loss": 0.6159,
59
+ "rewards/chosen": -172.33108520507812,
60
+ "rewards/margins": -6.893044948577881,
61
+ "rewards/rejected": -164.2748260498047,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.55,
66
+ "grad_norm": 0.0,
67
+ "kl": 0.0,
68
  "learning_rate": 9.571428571428573e-05,
69
+ "logps/chosen": -2267.27490234375,
70
+ "logps/rejected": -2191.0869140625,
71
+ "loss": 0.6539,
72
+ "rewards/chosen": -196.35650634765625,
73
+ "rewards/margins": -5.303529739379883,
74
+ "rewards/rejected": -189.41534423828125,
75
  "step": 80
76
  },
77
  {
78
  "epoch": 0.68,
79
+ "grad_norm": 0.0,
80
+ "kl": 0.0,
81
  "learning_rate": 6.714285714285714e-05,
82
+ "logps/chosen": -2088.027587890625,
83
+ "logps/rejected": -2137.646728515625,
84
+ "loss": 0.637,
85
+ "rewards/chosen": -180.6759796142578,
86
+ "rewards/margins": 5.185708045959473,
87
+ "rewards/rejected": -185.26431274414062,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
+ "eval_kl": 0.0,
93
+ "eval_logps/chosen": -2049.0234375,
94
+ "eval_logps/rejected": -1959.040283203125,
95
+ "eval_loss": 0.6390000581741333,
96
+ "eval_rewards/chosen": -176.1873779296875,
97
+ "eval_rewards/margins": -7.898090839385986,
98
+ "eval_rewards/rejected": -169.1663055419922,
99
+ "eval_runtime": 141.3411,
100
+ "eval_samples_per_second": 2.123,
101
+ "eval_steps_per_second": 0.531,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec753cf81827293d67e1cd4dadda0709aa2933566f8a8859588f3e620859ca79
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88063efcf2e1044ae00909b776634bdd20170ee5677d970f3927f18fe89a355
3
  size 5688