gguichard commited on
Commit
12f8bbb
1 Parent(s): ea92d2a

Training in progress, epoch 6, checkpoint

Browse files
checkpoint-1272/adapter_config.json CHANGED
@@ -14,7 +14,7 @@
14
  "classifier.weight"
15
  ],
16
  "peft_type": "LORA",
17
- "r": 128,
18
  "revision": null,
19
  "target_modules": [
20
  "query",
 
14
  "classifier.weight"
15
  ],
16
  "peft_type": "LORA",
17
+ "r": 64,
18
  "revision": null,
19
  "target_modules": [
20
  "query",
checkpoint-1272/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0fcd50de5b29a0b81ac6a1b131f337af435d4794b2f2322d357fda68fdad05
3
- size 230550429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f35c03d7a318d568526199e88bda759a33e71b33dbfffb4d6fd3fbaaa2a8276
3
+ size 117304221
checkpoint-1272/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f83d9598b7487d67e421285999323e5e383bf6a5e7afa1e08ccba12565ecaea8
3
- size 461144966
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57755f3bcba71f91fb6a37ff2e2e719cbd33a52b40beed58a83a51175028b18
3
+ size 234652550
checkpoint-1272/trainer_state.json CHANGED
@@ -11,85 +11,85 @@
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 9.95e-05,
14
- "loss": 6.3737,
15
  "step": 212
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 5.842774868011475,
20
- "eval_runtime": 3.9529,
21
- "eval_samples_per_second": 171.015,
22
- "eval_steps_per_second": 21.503,
23
  "step": 212
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 9.900000000000001e-05,
28
- "loss": 5.5296,
29
  "step": 424
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_loss": 4.859792232513428,
34
- "eval_runtime": 4.2848,
35
- "eval_samples_per_second": 157.766,
36
- "eval_steps_per_second": 19.837,
37
  "step": 424
38
  },
39
  {
40
  "epoch": 3.0,
41
  "learning_rate": 9.850000000000001e-05,
42
- "loss": 4.6838,
43
  "step": 636
44
  },
45
  {
46
  "epoch": 3.0,
47
- "eval_loss": 4.013933181762695,
48
- "eval_runtime": 4.1662,
49
- "eval_samples_per_second": 162.258,
50
- "eval_steps_per_second": 20.402,
51
  "step": 636
52
  },
53
  {
54
  "epoch": 4.0,
55
  "learning_rate": 9.8e-05,
56
- "loss": 3.9438,
57
  "step": 848
58
  },
59
  {
60
  "epoch": 4.0,
61
- "eval_loss": 3.277362585067749,
62
- "eval_runtime": 4.2258,
63
- "eval_samples_per_second": 159.97,
64
- "eval_steps_per_second": 20.115,
65
  "step": 848
66
  },
67
  {
68
  "epoch": 5.0,
69
  "learning_rate": 9.75e-05,
70
- "loss": 3.2751,
71
  "step": 1060
72
  },
73
  {
74
  "epoch": 5.0,
75
- "eval_loss": 2.62673282623291,
76
- "eval_runtime": 4.5502,
77
- "eval_samples_per_second": 148.565,
78
- "eval_steps_per_second": 18.68,
79
  "step": 1060
80
  },
81
  {
82
  "epoch": 6.0,
83
  "learning_rate": 9.7e-05,
84
- "loss": 2.6863,
85
  "step": 1272
86
  },
87
  {
88
  "epoch": 6.0,
89
- "eval_loss": 2.0860772132873535,
90
- "eval_runtime": 4.2538,
91
- "eval_samples_per_second": 158.917,
92
- "eval_steps_per_second": 19.982,
93
  "step": 1272
94
  }
95
  ],
@@ -97,7 +97,7 @@
97
  "max_steps": 42400,
98
  "num_train_epochs": 200,
99
  "save_steps": 500,
100
- "total_flos": 1855215309758544.0,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
 
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 9.95e-05,
14
+ "loss": 6.3714,
15
  "step": 212
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 5.850927352905273,
20
+ "eval_runtime": 3.8282,
21
+ "eval_samples_per_second": 176.584,
22
+ "eval_steps_per_second": 22.204,
23
  "step": 212
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 9.900000000000001e-05,
28
+ "loss": 5.5438,
29
  "step": 424
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_loss": 4.862037658691406,
34
+ "eval_runtime": 3.7425,
35
+ "eval_samples_per_second": 180.626,
36
+ "eval_steps_per_second": 22.712,
37
  "step": 424
38
  },
39
  {
40
  "epoch": 3.0,
41
  "learning_rate": 9.850000000000001e-05,
42
+ "loss": 4.6993,
43
  "step": 636
44
  },
45
  {
46
  "epoch": 3.0,
47
+ "eval_loss": 4.018372058868408,
48
+ "eval_runtime": 4.0424,
49
+ "eval_samples_per_second": 167.229,
50
+ "eval_steps_per_second": 21.027,
51
  "step": 636
52
  },
53
  {
54
  "epoch": 4.0,
55
  "learning_rate": 9.8e-05,
56
+ "loss": 3.967,
57
  "step": 848
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "eval_loss": 3.290557384490967,
62
+ "eval_runtime": 4.3933,
63
+ "eval_samples_per_second": 153.872,
64
+ "eval_steps_per_second": 19.348,
65
  "step": 848
66
  },
67
  {
68
  "epoch": 5.0,
69
  "learning_rate": 9.75e-05,
70
+ "loss": 3.305,
71
  "step": 1060
72
  },
73
  {
74
  "epoch": 5.0,
75
+ "eval_loss": 2.6497254371643066,
76
+ "eval_runtime": 4.3562,
77
+ "eval_samples_per_second": 155.182,
78
+ "eval_steps_per_second": 19.513,
79
  "step": 1060
80
  },
81
  {
82
  "epoch": 6.0,
83
  "learning_rate": 9.7e-05,
84
+ "loss": 2.7185,
85
  "step": 1272
86
  },
87
  {
88
  "epoch": 6.0,
89
+ "eval_loss": 2.1158342361450195,
90
+ "eval_runtime": 4.5634,
91
+ "eval_samples_per_second": 148.135,
92
+ "eval_steps_per_second": 18.626,
93
  "step": 1272
94
  }
95
  ],
 
97
  "max_steps": 42400,
98
  "num_train_epochs": 200,
99
  "save_steps": 500,
100
+ "total_flos": 1709449094176848.0,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
checkpoint-1272/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45ac36706a0f96e1fbfd751ddcc73ed3d4e95773bdb931420d6f73f3adff937
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7d47d9ef4da320ee46463f8716d365eb34b4bc5488090164af1bcf3ad60b35
3
  size 4155