gguichard commited on
Commit
dd80c86
1 Parent(s): ef0ad1d

Training in progress, epoch 7, checkpoint

Browse files
checkpoint-1484/adapter_config.json CHANGED
@@ -14,7 +14,7 @@
14
  "classifier.weight"
15
  ],
16
  "peft_type": "LORA",
17
- "r": 128,
18
  "revision": null,
19
  "target_modules": [
20
  "query",
 
14
  "classifier.weight"
15
  ],
16
  "peft_type": "LORA",
17
+ "r": 64,
18
  "revision": null,
19
  "target_modules": [
20
  "query",
checkpoint-1484/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0403f99471e01bbe94d0e08f743a3c09728ee1b175692e6c07115cbbcc0a415
3
- size 230550429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72dc78522c1f22d8b13839ed6d26c6aded0a2cc8e33a19c37bbf8f3bf6faf103
3
+ size 117304221
checkpoint-1484/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fe51714705fc1530ae4e99c7f18ad6b336f8285d351ff8a400717793e05a4ca
3
- size 461144966
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d86a8c8d83591a983a0ac4fe26d8c5ba7653ea63c618a7f5c3d0ab3a5ca3f961
3
+ size 234652550
checkpoint-1484/trainer_state.json CHANGED
@@ -11,99 +11,99 @@
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 9.95e-05,
14
- "loss": 6.3737,
15
  "step": 212
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 5.842774868011475,
20
- "eval_runtime": 3.9529,
21
- "eval_samples_per_second": 171.015,
22
- "eval_steps_per_second": 21.503,
23
  "step": 212
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 9.900000000000001e-05,
28
- "loss": 5.5296,
29
  "step": 424
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_loss": 4.859792232513428,
34
- "eval_runtime": 4.2848,
35
- "eval_samples_per_second": 157.766,
36
- "eval_steps_per_second": 19.837,
37
  "step": 424
38
  },
39
  {
40
  "epoch": 3.0,
41
  "learning_rate": 9.850000000000001e-05,
42
- "loss": 4.6838,
43
  "step": 636
44
  },
45
  {
46
  "epoch": 3.0,
47
- "eval_loss": 4.013933181762695,
48
- "eval_runtime": 4.1662,
49
- "eval_samples_per_second": 162.258,
50
- "eval_steps_per_second": 20.402,
51
  "step": 636
52
  },
53
  {
54
  "epoch": 4.0,
55
  "learning_rate": 9.8e-05,
56
- "loss": 3.9438,
57
  "step": 848
58
  },
59
  {
60
  "epoch": 4.0,
61
- "eval_loss": 3.277362585067749,
62
- "eval_runtime": 4.2258,
63
- "eval_samples_per_second": 159.97,
64
- "eval_steps_per_second": 20.115,
65
  "step": 848
66
  },
67
  {
68
  "epoch": 5.0,
69
  "learning_rate": 9.75e-05,
70
- "loss": 3.2751,
71
  "step": 1060
72
  },
73
  {
74
  "epoch": 5.0,
75
- "eval_loss": 2.62673282623291,
76
- "eval_runtime": 4.5502,
77
- "eval_samples_per_second": 148.565,
78
- "eval_steps_per_second": 18.68,
79
  "step": 1060
80
  },
81
  {
82
  "epoch": 6.0,
83
  "learning_rate": 9.7e-05,
84
- "loss": 2.6863,
85
  "step": 1272
86
  },
87
  {
88
  "epoch": 6.0,
89
- "eval_loss": 2.0860772132873535,
90
- "eval_runtime": 4.2538,
91
- "eval_samples_per_second": 158.917,
92
- "eval_steps_per_second": 19.982,
93
  "step": 1272
94
  },
95
  {
96
  "epoch": 7.0,
97
  "learning_rate": 9.65e-05,
98
- "loss": 2.193,
99
  "step": 1484
100
  },
101
  {
102
  "epoch": 7.0,
103
- "eval_loss": 1.6755822896957397,
104
- "eval_runtime": 4.2558,
105
- "eval_samples_per_second": 158.842,
106
- "eval_steps_per_second": 19.973,
107
  "step": 1484
108
  }
109
  ],
@@ -111,7 +111,7 @@
111
  "max_steps": 42400,
112
  "num_train_epochs": 200,
113
  "save_steps": 500,
114
- "total_flos": 2164845934111632.0,
115
  "trial_name": null,
116
  "trial_params": null
117
  }
 
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 9.95e-05,
14
+ "loss": 6.3714,
15
  "step": 212
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 5.850927352905273,
20
+ "eval_runtime": 3.8282,
21
+ "eval_samples_per_second": 176.584,
22
+ "eval_steps_per_second": 22.204,
23
  "step": 212
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 9.900000000000001e-05,
28
+ "loss": 5.5438,
29
  "step": 424
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_loss": 4.862037658691406,
34
+ "eval_runtime": 3.7425,
35
+ "eval_samples_per_second": 180.626,
36
+ "eval_steps_per_second": 22.712,
37
  "step": 424
38
  },
39
  {
40
  "epoch": 3.0,
41
  "learning_rate": 9.850000000000001e-05,
42
+ "loss": 4.6993,
43
  "step": 636
44
  },
45
  {
46
  "epoch": 3.0,
47
+ "eval_loss": 4.018372058868408,
48
+ "eval_runtime": 4.0424,
49
+ "eval_samples_per_second": 167.229,
50
+ "eval_steps_per_second": 21.027,
51
  "step": 636
52
  },
53
  {
54
  "epoch": 4.0,
55
  "learning_rate": 9.8e-05,
56
+ "loss": 3.967,
57
  "step": 848
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "eval_loss": 3.290557384490967,
62
+ "eval_runtime": 4.3933,
63
+ "eval_samples_per_second": 153.872,
64
+ "eval_steps_per_second": 19.348,
65
  "step": 848
66
  },
67
  {
68
  "epoch": 5.0,
69
  "learning_rate": 9.75e-05,
70
+ "loss": 3.305,
71
  "step": 1060
72
  },
73
  {
74
  "epoch": 5.0,
75
+ "eval_loss": 2.6497254371643066,
76
+ "eval_runtime": 4.3562,
77
+ "eval_samples_per_second": 155.182,
78
+ "eval_steps_per_second": 19.513,
79
  "step": 1060
80
  },
81
  {
82
  "epoch": 6.0,
83
  "learning_rate": 9.7e-05,
84
+ "loss": 2.7185,
85
  "step": 1272
86
  },
87
  {
88
  "epoch": 6.0,
89
+ "eval_loss": 2.1158342361450195,
90
+ "eval_runtime": 4.5634,
91
+ "eval_samples_per_second": 148.135,
92
+ "eval_steps_per_second": 18.626,
93
  "step": 1272
94
  },
95
  {
96
  "epoch": 7.0,
97
  "learning_rate": 9.65e-05,
98
+ "loss": 2.2181,
99
  "step": 1484
100
  },
101
  {
102
  "epoch": 7.0,
103
+ "eval_loss": 1.6882902383804321,
104
+ "eval_runtime": 4.3232,
105
+ "eval_samples_per_second": 156.367,
106
+ "eval_steps_per_second": 19.662,
107
  "step": 1484
108
  }
109
  ],
 
111
  "max_steps": 42400,
112
  "num_train_epochs": 200,
113
  "save_steps": 500,
114
+ "total_flos": 1994751715142544.0,
115
  "trial_name": null,
116
  "trial_params": null
117
  }
checkpoint-1484/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45ac36706a0f96e1fbfd751ddcc73ed3d4e95773bdb931420d6f73f3adff937
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7d47d9ef4da320ee46463f8716d365eb34b4bc5488090164af1bcf3ad60b35
3
  size 4155