cezeozue commited on
Commit
a4967b9
1 Parent(s): f904c3e

Training in progress, step 2500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:909721e820f8a7858935155bda1e62e574820c37a5d64131f47d2f24a9cfcacb
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac1cce5e5410e57d71489c02c27b3b2af6e3aa248217249170091e83a0f6878
3
  size 268290900
run-10/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d6932a93af79c9e4747e455fda2d1176497efb879c59250b1e1b7bc52f350b2
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5488b1fd75bc225f0465e85708beee9d03dca3da18beab40db0b6de9131562a
3
  size 268290900
run-10/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d911fd34d2f3cb8dae21ef6011cf533414404b6592194a7926ffb47a5337b492
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c617ec75c21f636b743e5354494e46c9458f994d4761b7f3e23fc2931d61a5c8
3
  size 536643898
run-10/checkpoint-1000/trainer_state.json CHANGED
@@ -10,41 +10,41 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.3992,
16
- "eval_samples_per_second": 574.162,
17
- "eval_steps_per_second": 12.039,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8416129032258064,
29
- "eval_loss": 0.09490782022476196,
30
- "eval_runtime": 5.4571,
31
- "eval_samples_per_second": 568.065,
32
- "eval_steps_per_second": 11.911,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8938709677419355,
38
- "eval_loss": 0.06310474872589111,
39
- "eval_runtime": 5.4013,
40
- "eval_samples_per_second": 573.935,
41
- "eval_steps_per_second": 12.034,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1093,
48
  "step": 1000
49
  }
50
  ],
@@ -52,11 +52,11 @@
52
  "max_steps": 3180,
53
  "num_train_epochs": 10,
54
  "save_steps": 500,
55
- "total_flos": 400953207486132.0,
56
  "trial_name": null,
57
  "trial_params": {
58
- "alpha": 0.1451722512397559,
59
  "num_train_epochs": 10,
60
- "temperature": 14
61
  }
62
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6035483870967742,
14
+ "eval_loss": 0.2065293788909912,
15
+ "eval_runtime": 5.4973,
16
+ "eval_samples_per_second": 563.918,
17
+ "eval_steps_per_second": 11.824,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3298,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8422580645161291,
29
+ "eval_loss": 0.09804486483335495,
30
+ "eval_runtime": 5.6345,
31
+ "eval_samples_per_second": 550.179,
32
+ "eval_steps_per_second": 11.536,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8964516129032258,
38
+ "eval_loss": 0.06418585777282715,
39
+ "eval_runtime": 5.5011,
40
+ "eval_samples_per_second": 563.528,
41
+ "eval_steps_per_second": 11.816,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
+ "loss": 0.113,
48
  "step": 1000
49
  }
50
  ],
 
52
  "max_steps": 3180,
53
  "num_train_epochs": 10,
54
  "save_steps": 500,
55
+ "total_flos": 259653090321324.0,
56
  "trial_name": null,
57
  "trial_params": {
58
+ "alpha": 0.10298372805208489,
59
  "num_train_epochs": 10,
60
+ "temperature": 9
61
  }
62
  }
run-10/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4877bbd4bf8c3b4c2d0a169a9c195b85ebd557cc55dabfa43eadab9c9abe3fd
3
  size 4664
run-10/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9edbd771b7fb84ea3e8f82fa1685fc0dba5eb395241a0e289c3e2609479c330
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d046f7f0f9f1d524adf8bb4905dc8a86f7aaf45fe0e66d01efad26ccd78eb218
3
  size 268290900
run-10/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c761dc90c89d08084f78b18c4915c010a87b9d0ccba04efecd2c4cd05990a73
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89353e37fe4fbc594afa43fd3e441c6e8c6ad935dc6a149d555cef603eaded63
3
  size 536643898
run-10/checkpoint-1500/trainer_state.json CHANGED
@@ -10,56 +10,56 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.3992,
16
- "eval_samples_per_second": 574.162,
17
- "eval_steps_per_second": 12.039,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8416129032258064,
29
- "eval_loss": 0.09490782022476196,
30
- "eval_runtime": 5.4571,
31
- "eval_samples_per_second": 568.065,
32
- "eval_steps_per_second": 11.911,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8938709677419355,
38
- "eval_loss": 0.06310474872589111,
39
- "eval_runtime": 5.4013,
40
- "eval_samples_per_second": 573.935,
41
- "eval_steps_per_second": 12.034,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1093,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9032258064516129,
53
- "eval_loss": 0.04744185879826546,
54
- "eval_runtime": 5.4117,
55
- "eval_samples_per_second": 572.834,
56
- "eval_steps_per_second": 12.011,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
- "loss": 0.0689,
63
  "step": 1500
64
  }
65
  ],
@@ -67,11 +67,11 @@
67
  "max_steps": 3180,
68
  "num_train_epochs": 10,
69
  "save_steps": 500,
70
- "total_flos": 530779493233920.0,
71
  "trial_name": null,
72
  "trial_params": {
73
- "alpha": 0.1451722512397559,
74
  "num_train_epochs": 10,
75
- "temperature": 14
76
  }
77
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6035483870967742,
14
+ "eval_loss": 0.2065293788909912,
15
+ "eval_runtime": 5.4973,
16
+ "eval_samples_per_second": 563.918,
17
+ "eval_steps_per_second": 11.824,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3298,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8422580645161291,
29
+ "eval_loss": 0.09804486483335495,
30
+ "eval_runtime": 5.6345,
31
+ "eval_samples_per_second": 550.179,
32
+ "eval_steps_per_second": 11.536,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8964516129032258,
38
+ "eval_loss": 0.06418585777282715,
39
+ "eval_runtime": 5.5011,
40
+ "eval_samples_per_second": 563.528,
41
+ "eval_steps_per_second": 11.816,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
+ "loss": 0.113,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.9045161290322581,
53
+ "eval_loss": 0.04777143895626068,
54
+ "eval_runtime": 5.6659,
55
+ "eval_samples_per_second": 547.133,
56
+ "eval_steps_per_second": 11.472,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
+ "loss": 0.0703,
63
  "step": 1500
64
  }
65
  ],
 
67
  "max_steps": 3180,
68
  "num_train_epochs": 10,
69
  "save_steps": 500,
70
+ "total_flos": 389479376069112.0,
71
  "trial_name": null,
72
  "trial_params": {
73
+ "alpha": 0.10298372805208489,
74
  "num_train_epochs": 10,
75
+ "temperature": 9
76
  }
77
  }
run-10/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4877bbd4bf8c3b4c2d0a169a9c195b85ebd557cc55dabfa43eadab9c9abe3fd
3
  size 4664
run-10/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb6a9179bf089f9691f3b2e0a3a2c4805af497ea41972721533587fbdf018db
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ddd496d9bff9e72be53b8931dae02308ef56156956545f8ff8644c5b1a740f
3
  size 268290900
run-10/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f646434e3c8ec08005e09b55b9b25aac55d54a30f38806b03f5bcc8894c064
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e239fb88bbadd95b3ff46d1d2f73a66c219259e916926dc745b45ffbb9ef9b
3
  size 536643898
run-10/checkpoint-2000/trainer_state.json CHANGED
@@ -10,80 +10,80 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.3992,
16
- "eval_samples_per_second": 574.162,
17
- "eval_steps_per_second": 12.039,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8416129032258064,
29
- "eval_loss": 0.09490782022476196,
30
- "eval_runtime": 5.4571,
31
- "eval_samples_per_second": 568.065,
32
- "eval_steps_per_second": 11.911,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8938709677419355,
38
- "eval_loss": 0.06310474872589111,
39
- "eval_runtime": 5.4013,
40
- "eval_samples_per_second": 573.935,
41
- "eval_steps_per_second": 12.034,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1093,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9032258064516129,
53
- "eval_loss": 0.04744185879826546,
54
- "eval_runtime": 5.4117,
55
- "eval_samples_per_second": 572.834,
56
- "eval_steps_per_second": 12.011,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
- "loss": 0.0689,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 5.0,
67
  "eval_accuracy": 0.9151612903225806,
68
- "eval_loss": 0.03924418240785599,
69
- "eval_runtime": 5.5557,
70
- "eval_samples_per_second": 557.989,
71
- "eval_steps_per_second": 11.7,
72
  "step": 1590
73
  },
74
  {
75
  "epoch": 6.0,
76
- "eval_accuracy": 0.917741935483871,
77
- "eval_loss": 0.03422646224498749,
78
- "eval_runtime": 5.3885,
79
- "eval_samples_per_second": 575.3,
80
- "eval_steps_per_second": 12.063,
81
  "step": 1908
82
  },
83
  {
84
  "epoch": 6.29,
85
  "learning_rate": 7.421383647798742e-06,
86
- "loss": 0.0539,
87
  "step": 2000
88
  }
89
  ],
@@ -91,11 +91,11 @@
91
  "max_steps": 3180,
92
  "num_train_epochs": 10,
93
  "save_steps": 500,
94
- "total_flos": 661227332227812.0,
95
  "trial_name": null,
96
  "trial_params": {
97
- "alpha": 0.1451722512397559,
98
  "num_train_epochs": 10,
99
- "temperature": 14
100
  }
101
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6035483870967742,
14
+ "eval_loss": 0.2065293788909912,
15
+ "eval_runtime": 5.4973,
16
+ "eval_samples_per_second": 563.918,
17
+ "eval_steps_per_second": 11.824,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3298,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8422580645161291,
29
+ "eval_loss": 0.09804486483335495,
30
+ "eval_runtime": 5.6345,
31
+ "eval_samples_per_second": 550.179,
32
+ "eval_steps_per_second": 11.536,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8964516129032258,
38
+ "eval_loss": 0.06418585777282715,
39
+ "eval_runtime": 5.5011,
40
+ "eval_samples_per_second": 563.528,
41
+ "eval_steps_per_second": 11.816,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
+ "loss": 0.113,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.9045161290322581,
53
+ "eval_loss": 0.04777143895626068,
54
+ "eval_runtime": 5.6659,
55
+ "eval_samples_per_second": 547.133,
56
+ "eval_steps_per_second": 11.472,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
+ "loss": 0.0703,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 5.0,
67
  "eval_accuracy": 0.9151612903225806,
68
+ "eval_loss": 0.03928793966770172,
69
+ "eval_runtime": 5.7374,
70
+ "eval_samples_per_second": 540.317,
71
+ "eval_steps_per_second": 11.329,
72
  "step": 1590
73
  },
74
  {
75
  "epoch": 6.0,
76
+ "eval_accuracy": 0.9193548387096774,
77
+ "eval_loss": 0.034144770354032516,
78
+ "eval_runtime": 5.8666,
79
+ "eval_samples_per_second": 528.418,
80
+ "eval_steps_per_second": 11.08,
81
  "step": 1908
82
  },
83
  {
84
  "epoch": 6.29,
85
  "learning_rate": 7.421383647798742e-06,
86
+ "loss": 0.0547,
87
  "step": 2000
88
  }
89
  ],
 
91
  "max_steps": 3180,
92
  "num_train_epochs": 10,
93
  "save_steps": 500,
94
+ "total_flos": 519927215063004.0,
95
  "trial_name": null,
96
  "trial_params": {
97
+ "alpha": 0.10298372805208489,
98
  "num_train_epochs": 10,
99
+ "temperature": 9
100
  }
101
  }
run-10/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4877bbd4bf8c3b4c2d0a169a9c195b85ebd557cc55dabfa43eadab9c9abe3fd
3
  size 4664
run-10/checkpoint-2500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab1f84f99c7cb98ccf398ad727c0c1d12c8e55abca84f5e6391fa74c651a840
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac1cce5e5410e57d71489c02c27b3b2af6e3aa248217249170091e83a0f6878
3
  size 268290900
run-10/checkpoint-2500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70db23d4ed939a132446846331253447731390e3efec0825d1d709a0000e134f
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a948e2d7f8b7e0f8a1978c8146934316446d1c6d9823f7ea2c8496e3f39d3af
3
  size 536643898
run-10/checkpoint-2500/trainer_state.json CHANGED
@@ -10,95 +10,95 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.3992,
16
- "eval_samples_per_second": 574.162,
17
- "eval_steps_per_second": 12.039,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8416129032258064,
29
- "eval_loss": 0.09490782022476196,
30
- "eval_runtime": 5.4571,
31
- "eval_samples_per_second": 568.065,
32
- "eval_steps_per_second": 11.911,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8938709677419355,
38
- "eval_loss": 0.06310474872589111,
39
- "eval_runtime": 5.4013,
40
- "eval_samples_per_second": 573.935,
41
- "eval_steps_per_second": 12.034,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1093,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9032258064516129,
53
- "eval_loss": 0.04744185879826546,
54
- "eval_runtime": 5.4117,
55
- "eval_samples_per_second": 572.834,
56
- "eval_steps_per_second": 12.011,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
- "loss": 0.0689,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 5.0,
67
  "eval_accuracy": 0.9151612903225806,
68
- "eval_loss": 0.03924418240785599,
69
- "eval_runtime": 5.5557,
70
- "eval_samples_per_second": 557.989,
71
- "eval_steps_per_second": 11.7,
72
  "step": 1590
73
  },
74
  {
75
  "epoch": 6.0,
76
- "eval_accuracy": 0.917741935483871,
77
- "eval_loss": 0.03422646224498749,
78
- "eval_runtime": 5.3885,
79
- "eval_samples_per_second": 575.3,
80
- "eval_steps_per_second": 12.063,
81
  "step": 1908
82
  },
83
  {
84
  "epoch": 6.29,
85
  "learning_rate": 7.421383647798742e-06,
86
- "loss": 0.0539,
87
  "step": 2000
88
  },
89
  {
90
  "epoch": 7.0,
91
- "eval_accuracy": 0.9190322580645162,
92
- "eval_loss": 0.03100522980093956,
93
- "eval_runtime": 5.4997,
94
- "eval_samples_per_second": 563.666,
95
- "eval_steps_per_second": 11.819,
96
  "step": 2226
97
  },
98
  {
99
  "epoch": 7.86,
100
  "learning_rate": 4.276729559748428e-06,
101
- "loss": 0.0466,
102
  "step": 2500
103
  }
104
  ],
@@ -106,11 +106,11 @@
106
  "max_steps": 3180,
107
  "num_train_epochs": 10,
108
  "save_steps": 500,
109
- "total_flos": 790936363356552.0,
110
  "trial_name": null,
111
  "trial_params": {
112
- "alpha": 0.1451722512397559,
113
  "num_train_epochs": 10,
114
- "temperature": 14
115
  }
116
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6035483870967742,
14
+ "eval_loss": 0.2065293788909912,
15
+ "eval_runtime": 5.4973,
16
+ "eval_samples_per_second": 563.918,
17
+ "eval_steps_per_second": 11.824,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3298,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8422580645161291,
29
+ "eval_loss": 0.09804486483335495,
30
+ "eval_runtime": 5.6345,
31
+ "eval_samples_per_second": 550.179,
32
+ "eval_steps_per_second": 11.536,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8964516129032258,
38
+ "eval_loss": 0.06418585777282715,
39
+ "eval_runtime": 5.5011,
40
+ "eval_samples_per_second": 563.528,
41
+ "eval_steps_per_second": 11.816,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
+ "loss": 0.113,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.9045161290322581,
53
+ "eval_loss": 0.04777143895626068,
54
+ "eval_runtime": 5.6659,
55
+ "eval_samples_per_second": 547.133,
56
+ "eval_steps_per_second": 11.472,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
+ "loss": 0.0703,
63
  "step": 1500
64
  },
65
  {
66
  "epoch": 5.0,
67
  "eval_accuracy": 0.9151612903225806,
68
+ "eval_loss": 0.03928793966770172,
69
+ "eval_runtime": 5.7374,
70
+ "eval_samples_per_second": 540.317,
71
+ "eval_steps_per_second": 11.329,
72
  "step": 1590
73
  },
74
  {
75
  "epoch": 6.0,
76
+ "eval_accuracy": 0.9193548387096774,
77
+ "eval_loss": 0.034144770354032516,
78
+ "eval_runtime": 5.8666,
79
+ "eval_samples_per_second": 528.418,
80
+ "eval_steps_per_second": 11.08,
81
  "step": 1908
82
  },
83
  {
84
  "epoch": 6.29,
85
  "learning_rate": 7.421383647798742e-06,
86
+ "loss": 0.0547,
87
  "step": 2000
88
  },
89
  {
90
  "epoch": 7.0,
91
+ "eval_accuracy": 0.9216129032258065,
92
+ "eval_loss": 0.03088981844484806,
93
+ "eval_runtime": 5.6168,
94
+ "eval_samples_per_second": 551.918,
95
+ "eval_steps_per_second": 11.572,
96
  "step": 2226
97
  },
98
  {
99
  "epoch": 7.86,
100
  "learning_rate": 4.276729559748428e-06,
101
+ "loss": 0.0471,
102
  "step": 2500
103
  }
104
  ],
 
106
  "max_steps": 3180,
107
  "num_train_epochs": 10,
108
  "save_steps": 500,
109
+ "total_flos": 649636246191744.0,
110
  "trial_name": null,
111
  "trial_params": {
112
+ "alpha": 0.10298372805208489,
113
  "num_train_epochs": 10,
114
+ "temperature": 9
115
  }
116
  }
run-10/checkpoint-2500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4877bbd4bf8c3b4c2d0a169a9c195b85ebd557cc55dabfa43eadab9c9abe3fd
3
  size 4664
runs/Jan25_17-40-54_c146da53f02f/events.out.tfevents.1706209036.c146da53f02f.3236.11 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed0d8dd27bd2283017b5c43cdab1144abd6bf2335b06ba83f58b18d215f3414b
3
- size 14408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3cea5f9b1c5f185363e75ce60869c1b39ff69e09529fa18dc3434494e04700
3
+ size 15534