beamaia commited on
Commit
2f0140d
·
verified ·
1 Parent(s): 27aa857

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,9 +20,9 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
  "k_proj",
25
  "q_proj",
 
26
  "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
  "q_proj",
25
+ "v_proj",
26
  "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad992dcdb4258d37a765425377a9b92b92fb78925955f14c74a6beec38d715c2
3
  size 436242776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31708829f27daf4d0a9b29f0546ba8eec73a3b1a835a02461b2817562e62db5e
3
  size 436242776
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28451a46dbd0ed11b50fd2625d02f48a1defa58eb16bf0764d57d3f9a318f66c
3
  size 872631034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e7e1c59e13d70a56ec028cbb03dc7b3b8e577a52bb9a3cca0b367450284032
3
  size 872631034
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12711e870dc2eb008cef779e781ac8fe1872c7e9d23f2bbc181148f8c2d8f981
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227afea00680bdcdcf19d54b572f61ab2e563bd954561db8d7fee74cde40c145
3
  size 1064
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.47333332896232605,
3
- "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.16-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -11,94 +11,94 @@
11
  {
12
  "epoch": 0.14,
13
  "grad_norm": 0.0,
14
- "learning_rate": 0.00018714285714285716,
15
- "loss": 0.4292,
16
  "step": 20,
17
- "train/kl": 4.945448875427246,
18
- "train/logps/chosen": -899.4505912162163,
19
- "train/logps/rejected": -1086.2307412790697,
20
- "train/rewards/chosen": -61.6109058277027,
21
- "train/rewards/margins": 17.706562150131596,
22
- "train/rewards/rejected": -79.3174679778343
23
  },
24
  {
25
  "epoch": 0.27,
26
  "grad_norm": 0.0,
27
- "learning_rate": 0.00015857142857142857,
28
- "loss": 0.4781,
29
  "step": 40,
30
  "train/kl": 0.0,
31
- "train/logps/chosen": -2608.2385620915034,
32
- "train/logps/rejected": -2442.9848428143714,
33
- "train/rewards/chosen": -233.4453635620915,
34
- "train/rewards/margins": -17.621004467780125,
35
- "train/rewards/rejected": -215.82435909431138
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval/kl": 0.0,
40
- "eval/logps/chosen": -2513.149647887324,
41
- "eval/logps/rejected": -2267.6801819620255,
42
- "eval/rewards/chosen": -222.94891615316902,
43
- "eval/rewards/margins": -22.617910911871547,
44
- "eval/rewards/rejected": -200.33100524129748,
45
  "eval_loss": 0.47333332896232605,
46
- "eval_runtime": 140.3068,
47
- "eval_samples_per_second": 2.138,
48
- "eval_steps_per_second": 0.535,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
  "grad_norm": 0.0,
54
- "learning_rate": 0.00013000000000000002,
55
- "loss": 0.4594,
56
  "step": 60,
57
  "train/kl": 0.0,
58
- "train/logps/chosen": -2768.1101190476193,
59
- "train/logps/rejected": -2565.2698699421967,
60
- "train/rewards/chosen": -246.74649234693877,
61
- "train/rewards/margins": -19.224545092603506,
62
- "train/rewards/rejected": -227.52194725433526
63
  },
64
  {
65
  "epoch": 0.55,
66
  "grad_norm": 0.0,
67
- "learning_rate": 0.00010142857142857143,
68
- "loss": 0.4969,
69
  "step": 80,
70
  "train/kl": 0.0,
71
- "train/logps/chosen": -2651.341391509434,
72
- "train/logps/rejected": -2455.1451863354037,
73
- "train/rewards/chosen": -236.88241941823898,
74
- "train/rewards/margins": -19.532070039356995,
75
- "train/rewards/rejected": -217.350349378882
76
  },
77
  {
78
  "epoch": 0.68,
79
  "grad_norm": 0.0,
80
- "learning_rate": 7.285714285714286e-05,
81
- "loss": 0.4719,
82
  "step": 100,
83
  "train/kl": 0.0,
84
- "train/logps/chosen": -2469.365894039735,
85
- "train/logps/rejected": -2591.453032544379,
86
- "train/rewards/chosen": -219.61886899834437,
87
- "train/rewards/margins": 9.505113989821325,
88
- "train/rewards/rejected": -229.1239829881657
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval/kl": 0.0,
93
- "eval/logps/chosen": -2514.4253961267605,
94
- "eval/logps/rejected": -2268.7335838607596,
95
- "eval/rewards/chosen": -223.0765019806338,
96
- "eval/rewards/margins": -22.64017632794392,
97
- "eval/rewards/rejected": -200.43632565268987,
98
  "eval_loss": 0.47333332896232605,
99
- "eval_runtime": 140.3035,
100
- "eval_samples_per_second": 2.138,
101
- "eval_steps_per_second": 0.535,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
  "best_metric": 0.47333332896232605,
3
+ "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.16-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
11
  {
12
  "epoch": 0.14,
13
  "grad_norm": 0.0,
14
+ "learning_rate": 0.00018,
15
+ "loss": 0.4713,
16
  "step": 20,
17
+ "train/kl": 0.0,
18
+ "train/logps/chosen": -1423.408907312925,
19
+ "train/logps/rejected": -1301.3419436416184,
20
+ "train/rewards/chosen": -113.72140731292517,
21
+ "train/rewards/margins": -11.091552724774871,
22
+ "train/rewards/rejected": -102.6298545881503
23
  },
24
  {
25
  "epoch": 0.27,
26
  "grad_norm": 0.0,
27
+ "learning_rate": 0.00015142857142857143,
28
+ "loss": 0.45,
29
  "step": 40,
30
  "train/kl": 0.0,
31
+ "train/logps/chosen": -2404.9383680555557,
32
+ "train/logps/rejected": -2267.5051491477275,
33
+ "train/rewards/chosen": -211.85392252604166,
34
+ "train/rewards/margins": -15.253491950757564,
35
+ "train/rewards/rejected": -196.6004305752841
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval/kl": 0.0,
40
+ "eval/logps/chosen": -2179.756602112676,
41
+ "eval/logps/rejected": -1974.1839398734178,
42
+ "eval/rewards/chosen": -189.60969135123239,
43
+ "eval/rewards/margins": -18.62818423097923,
44
+ "eval/rewards/rejected": -170.98150712025316,
45
  "eval_loss": 0.47333332896232605,
46
+ "eval_runtime": 140.7497,
47
+ "eval_samples_per_second": 2.131,
48
+ "eval_steps_per_second": 0.533,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
  "grad_norm": 0.0,
54
+ "learning_rate": 0.00012285714285714287,
55
+ "loss": 0.4469,
56
  "step": 60,
57
  "train/kl": 0.0,
58
+ "train/logps/chosen": -2463.7814685314684,
59
+ "train/logps/rejected": -2104.4410310734465,
60
+ "train/rewards/chosen": -216.41613854895104,
61
+ "train/rewards/margins": -33.82629479895104,
62
+ "train/rewards/rejected": -182.58984375
63
  },
64
  {
65
  "epoch": 0.55,
66
  "grad_norm": 0.0,
67
+ "learning_rate": 9.428571428571429e-05,
68
+ "loss": 0.4281,
69
  "step": 80,
70
  "train/kl": 0.0,
71
+ "train/logps/chosen": -2216.6425638686133,
72
+ "train/logps/rejected": -2146.411031420765,
73
+ "train/rewards/chosen": -193.77794251824818,
74
+ "train/rewards/margins": -6.814528925351993,
75
+ "train/rewards/rejected": -186.9634135928962
76
  },
77
  {
78
  "epoch": 0.68,
79
  "grad_norm": 0.0,
80
+ "learning_rate": 6.571428571428571e-05,
81
+ "loss": 0.5031,
82
  "step": 100,
83
  "train/kl": 0.0,
84
+ "train/logps/chosen": -2453.4375,
85
+ "train/logps/rejected": -2285.3565251572327,
86
+ "train/rewards/chosen": -214.367794060559,
87
+ "train/rewards/margins": -16.124685275181633,
88
+ "train/rewards/rejected": -198.24310878537736
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval/kl": 0.0,
93
+ "eval/logps/chosen": -2181.0286091549297,
94
+ "eval/logps/rejected": -1975.4440268987341,
95
+ "eval/rewards/chosen": -189.73686454665494,
96
+ "eval/rewards/margins": -18.629343779249865,
97
+ "eval/rewards/rejected": -171.10752076740508,
98
  "eval_loss": 0.47333332896232605,
99
+ "eval_runtime": 140.6959,
100
+ "eval_samples_per_second": 2.132,
101
+ "eval_steps_per_second": 0.533,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7225d407f19a33ea970c09fe3b329d792c1f46c9b0cbb091f6418b163d0104ac
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb212074cd725db22cb4056e84de5d7117e1aba74d1da2313e006cd0bf7089b
3
  size 5688