Mel-Iza0 commited on
Commit
6352386
1 Parent(s): f651092

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,9 +20,9 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
  "o_proj",
25
  "v_proj",
 
26
  "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "o_proj",
24
  "v_proj",
25
+ "q_proj",
26
  "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cbe963b48301ba88c612d722a934c173b3e8aee704730f9f2e638316a624ad7
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f62c273336a7b98480a304521101fe04fdd590ee43f6f0dea258f4d29dfeb79
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85936e832477583b5302c221a0f9d265b4f7af407c808675904669b4084b7f0
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a752e7840d77c51034a0de171e20aa29476e87c45da45df9749fcbb9e795eb82
3
  size 109267450
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4ce0b279631b0b5282c6845685570f98d121f98c20888d3cde7c94f3bf8fa9b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363bcb1976d3d8f69d575a3bb74fad2f79e9d75da57793c889da5e2ae17ef801
3
  size 1064
checkpoint-100/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": NaN,
3
  "best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.0-KTO_KTO with Agents 1.2.0 dataset and Mixstral model-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
@@ -10,95 +10,95 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
- "grad_norm": 3.6165390014648438,
14
- "kl": 1.9972060918807983,
15
- "learning_rate": 0.00018142857142857142,
16
- "logps/chosen": -245.00869750976562,
17
- "logps/rejected": -241.55165100097656,
18
- "loss": 0.4678,
19
- "rewards/chosen": 0.044715628027915955,
20
- "rewards/margins": 0.5635281205177307,
21
- "rewards/rejected": -0.5842480063438416,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
- "grad_norm": 3.311204671859741,
27
- "kl": NaN,
28
- "learning_rate": 0.0001542857142857143,
29
- "logps/chosen": -232.5364532470703,
30
- "logps/rejected": -269.9206237792969,
31
- "loss": 0.4625,
32
- "rewards/chosen": -0.920975923538208,
33
- "rewards/margins": 3.3125476837158203,
34
- "rewards/rejected": -4.193856716156006,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
- "eval_kl": NaN,
40
- "eval_logps/chosen": -217.65966796875,
41
- "eval_logps/rejected": -235.0032196044922,
42
- "eval_loss": NaN,
43
- "eval_rewards/chosen": 1.1550920009613037,
44
- "eval_rewards/margins": 1.6945449113845825,
45
- "eval_rewards/rejected": -0.46676430106163025,
46
- "eval_runtime": 353.7939,
47
- "eval_samples_per_second": 0.848,
48
- "eval_steps_per_second": 0.212,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
- "grad_norm": NaN,
54
- "kl": NaN,
55
- "learning_rate": 0.00012857142857142858,
56
- "logps/chosen": -251.53494262695312,
57
- "logps/rejected": -272.7889709472656,
58
- "loss": 0.4154,
59
- "rewards/chosen": -0.21331408619880676,
60
- "rewards/margins": 2.0234382152557373,
61
- "rewards/rejected": -2.6234018802642822,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.55,
66
- "grad_norm": 3.4451451301574707,
67
- "kl": NaN,
68
- "learning_rate": 0.00010571428571428572,
69
- "logps/chosen": -281.3064270019531,
70
- "logps/rejected": -319.31500244140625,
71
- "loss": 0.5082,
72
- "rewards/chosen": -4.3177809715271,
73
- "rewards/margins": 3.9624247550964355,
74
- "rewards/rejected": -8.34419059753418,
75
  "step": 80
76
  },
77
  {
78
  "epoch": 0.68,
79
- "grad_norm": 4.457475185394287,
80
- "kl": NaN,
81
- "learning_rate": 8e-05,
82
- "logps/chosen": -227.8453826904297,
83
- "logps/rejected": -306.9755859375,
84
- "loss": 0.3511,
85
- "rewards/chosen": -0.2338699847459793,
86
- "rewards/margins": 6.2684006690979,
87
- "rewards/rejected": -6.247352600097656,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
- "eval_kl": NaN,
93
- "eval_logps/chosen": -287.4195251464844,
94
- "eval_logps/rejected": -352.2350769042969,
95
- "eval_loss": NaN,
96
- "eval_rewards/chosen": -5.434815883636475,
97
- "eval_rewards/margins": 7.475613594055176,
98
- "eval_rewards/rejected": -12.600561141967773,
99
- "eval_runtime": 353.0809,
100
- "eval_samples_per_second": 0.85,
101
- "eval_steps_per_second": 0.212,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
+ "best_metric": 0.38556817173957825,
3
  "best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.0-KTO_KTO with Agents 1.2.0 dataset and Mixstral model-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
+ "grad_norm": 2.368553638458252,
14
+ "kl": 0.5817955732345581,
15
+ "learning_rate": 0.00018,
16
+ "logps/chosen": -227.94122314453125,
17
+ "logps/rejected": -240.64300537109375,
18
+ "loss": 0.475,
19
+ "rewards/chosen": -0.784079372882843,
20
+ "rewards/margins": 0.5924594402313232,
21
+ "rewards/rejected": -1.3401262760162354,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
+ "grad_norm": 1.441540241241455,
27
+ "kl": 10.41219425201416,
28
+ "learning_rate": 0.00015142857142857143,
29
+ "logps/chosen": -233.67567443847656,
30
+ "logps/rejected": -243.45187377929688,
31
+ "loss": 0.4403,
32
+ "rewards/chosen": 0.15312156081199646,
33
+ "rewards/margins": 0.6628362536430359,
34
+ "rewards/rejected": -0.5719818472862244,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
+ "eval_kl": 0.5120495557785034,
40
+ "eval_logps/chosen": -248.310302734375,
41
+ "eval_logps/rejected": -253.4099578857422,
42
+ "eval_loss": 0.4195210039615631,
43
+ "eval_rewards/chosen": -1.1683257818222046,
44
+ "eval_rewards/margins": 2.6137375831604004,
45
+ "eval_rewards/rejected": -3.615262269973755,
46
+ "eval_runtime": 357.6046,
47
+ "eval_samples_per_second": 0.839,
48
+ "eval_steps_per_second": 0.21,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
+ "grad_norm": 2.9933063983917236,
54
+ "kl": 1.537040114402771,
55
+ "learning_rate": 0.00012285714285714287,
56
+ "logps/chosen": -244.6122283935547,
57
+ "logps/rejected": -288.9703674316406,
58
+ "loss": 0.3877,
59
+ "rewards/chosen": -0.880365252494812,
60
+ "rewards/margins": 3.219341516494751,
61
+ "rewards/rejected": -4.040163516998291,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.55,
66
+ "grad_norm": 1.7866162061691284,
67
+ "kl": 8.153600692749023,
68
+ "learning_rate": 9.428571428571429e-05,
69
+ "logps/chosen": -193.49400329589844,
70
+ "logps/rejected": -239.42953491210938,
71
+ "loss": 0.3612,
72
+ "rewards/chosen": 1.8910856246948242,
73
+ "rewards/margins": 2.9180142879486084,
74
+ "rewards/rejected": -1.0155872106552124,
75
  "step": 80
76
  },
77
  {
78
  "epoch": 0.68,
79
+ "grad_norm": 1.8967958688735962,
80
+ "kl": 5.099704742431641,
81
+ "learning_rate": 6.714285714285714e-05,
82
+ "logps/chosen": -210.8452911376953,
83
+ "logps/rejected": -259.0009460449219,
84
+ "loss": 0.3518,
85
+ "rewards/chosen": 1.323478102684021,
86
+ "rewards/margins": 3.5613787174224854,
87
+ "rewards/rejected": -2.316351890563965,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
+ "eval_kl": 0.0,
93
+ "eval_logps/chosen": -263.7559509277344,
94
+ "eval_logps/rejected": -282.9854431152344,
95
+ "eval_loss": 0.38556817173957825,
96
+ "eval_rewards/chosen": -2.7128894329071045,
97
+ "eval_rewards/margins": 3.9057483673095703,
98
+ "eval_rewards/rejected": -6.572808265686035,
99
+ "eval_runtime": 356.8194,
100
+ "eval_samples_per_second": 0.841,
101
+ "eval_steps_per_second": 0.21,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da474f70c0f6f8b6f82743af310b6f75ed1c226566b249708fa5a72912d0eb50
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1231c64b0f80048096d711cb4b87ae11f12db73d4d3dee38c242e8b85d18f2b4
3
  size 5688