gguichard commited on
Commit
19ee6f3
1 Parent(s): 9260eac

Training in progress, epoch 5, checkpoint

Browse files
checkpoint-1060/adapter_config.json CHANGED
@@ -14,7 +14,7 @@
14
  "classifier.weight"
15
  ],
16
  "peft_type": "LORA",
17
- "r": 128,
18
  "revision": null,
19
  "target_modules": [
20
  "query",
 
14
  "classifier.weight"
15
  ],
16
  "peft_type": "LORA",
17
+ "r": 64,
18
  "revision": null,
19
  "target_modules": [
20
  "query",
checkpoint-1060/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddcd04b0679144aa4828d0df260eaadf4073fb0b3491e7cd576562ec87c3179b
3
- size 230550429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb3429c2a084a696b78f22cb270c62bf7d4694f9faab5648efae9afccb050b8
3
+ size 117304221
checkpoint-1060/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3954334f99081185ac3f0146aaa3bf5b2c2874a90cffefc4d0e4047e2aa5f1c
3
- size 461144966
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c83cacb0934c73627d7c7b542bc3ae197572e76ce209141dfe2a2933380e54d
3
+ size 234652550
checkpoint-1060/trainer_state.json CHANGED
@@ -11,71 +11,71 @@
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 9.95e-05,
14
- "loss": 6.3737,
15
  "step": 212
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_loss": 5.842774868011475,
20
- "eval_runtime": 3.9529,
21
- "eval_samples_per_second": 171.015,
22
- "eval_steps_per_second": 21.503,
23
  "step": 212
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 9.900000000000001e-05,
28
- "loss": 5.5296,
29
  "step": 424
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_loss": 4.859792232513428,
34
- "eval_runtime": 4.2848,
35
- "eval_samples_per_second": 157.766,
36
- "eval_steps_per_second": 19.837,
37
  "step": 424
38
  },
39
  {
40
  "epoch": 3.0,
41
  "learning_rate": 9.850000000000001e-05,
42
- "loss": 4.6838,
43
  "step": 636
44
  },
45
  {
46
  "epoch": 3.0,
47
- "eval_loss": 4.013933181762695,
48
- "eval_runtime": 4.1662,
49
- "eval_samples_per_second": 162.258,
50
- "eval_steps_per_second": 20.402,
51
  "step": 636
52
  },
53
  {
54
  "epoch": 4.0,
55
  "learning_rate": 9.8e-05,
56
- "loss": 3.9438,
57
  "step": 848
58
  },
59
  {
60
  "epoch": 4.0,
61
- "eval_loss": 3.277362585067749,
62
- "eval_runtime": 4.2258,
63
- "eval_samples_per_second": 159.97,
64
- "eval_steps_per_second": 20.115,
65
  "step": 848
66
  },
67
  {
68
  "epoch": 5.0,
69
  "learning_rate": 9.75e-05,
70
- "loss": 3.2751,
71
  "step": 1060
72
  },
73
  {
74
  "epoch": 5.0,
75
- "eval_loss": 2.62673282623291,
76
- "eval_runtime": 4.5502,
77
- "eval_samples_per_second": 148.565,
78
- "eval_steps_per_second": 18.68,
79
  "step": 1060
80
  }
81
  ],
@@ -83,7 +83,7 @@
83
  "max_steps": 42400,
84
  "num_train_epochs": 200,
85
  "save_steps": 500,
86
- "total_flos": 1546564063916460.0,
87
  "trial_name": null,
88
  "trial_params": null
89
  }
 
11
  {
12
  "epoch": 1.0,
13
  "learning_rate": 9.95e-05,
14
+ "loss": 6.3714,
15
  "step": 212
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_loss": 5.850927352905273,
20
+ "eval_runtime": 3.8282,
21
+ "eval_samples_per_second": 176.584,
22
+ "eval_steps_per_second": 22.204,
23
  "step": 212
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 9.900000000000001e-05,
28
+ "loss": 5.5438,
29
  "step": 424
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_loss": 4.862037658691406,
34
+ "eval_runtime": 3.7425,
35
+ "eval_samples_per_second": 180.626,
36
+ "eval_steps_per_second": 22.712,
37
  "step": 424
38
  },
39
  {
40
  "epoch": 3.0,
41
  "learning_rate": 9.850000000000001e-05,
42
+ "loss": 4.6993,
43
  "step": 636
44
  },
45
  {
46
  "epoch": 3.0,
47
+ "eval_loss": 4.018372058868408,
48
+ "eval_runtime": 4.0424,
49
+ "eval_samples_per_second": 167.229,
50
+ "eval_steps_per_second": 21.027,
51
  "step": 636
52
  },
53
  {
54
  "epoch": 4.0,
55
  "learning_rate": 9.8e-05,
56
+ "loss": 3.967,
57
  "step": 848
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "eval_loss": 3.290557384490967,
62
+ "eval_runtime": 4.3933,
63
+ "eval_samples_per_second": 153.872,
64
+ "eval_steps_per_second": 19.348,
65
  "step": 848
66
  },
67
  {
68
  "epoch": 5.0,
69
  "learning_rate": 9.75e-05,
70
+ "loss": 3.305,
71
  "step": 1060
72
  },
73
  {
74
  "epoch": 5.0,
75
+ "eval_loss": 2.6497254371643066,
76
+ "eval_runtime": 4.3562,
77
+ "eval_samples_per_second": 155.182,
78
+ "eval_steps_per_second": 19.513,
79
  "step": 1060
80
  }
81
  ],
 
83
  "max_steps": 42400,
84
  "num_train_epochs": 200,
85
  "save_steps": 500,
86
+ "total_flos": 1425048900923820.0,
87
  "trial_name": null,
88
  "trial_params": null
89
  }
checkpoint-1060/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45ac36706a0f96e1fbfd751ddcc73ed3d4e95773bdb931420d6f73f3adff937
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7d47d9ef4da320ee46463f8716d365eb34b4bc5488090164af1bcf3ad60b35
3
  size 4155