cezeozue commited on
Commit
6cb977c
1 Parent(s): a4967b9

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac1cce5e5410e57d71489c02c27b3b2af6e3aa248217249170091e83a0f6878
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fb000b904203220d022e331b69fe81deb5815b3a48bbe27b12ee5d31c51b3b
3
  size 268290900
run-11/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d6932a93af79c9e4747e455fda2d1176497efb879c59250b1e1b7bc52f350b2
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af68119ff3bb5bcc866e7c8d9118467f41e08f8d7b2b484776daa6a35e06a915
3
  size 268290900
run-11/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d911fd34d2f3cb8dae21ef6011cf533414404b6592194a7926ffb47a5337b492
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edac4838f7f4fd230174eb238574f075ef04fbb12467be8f401c31e5969cdd5d
3
  size 536643898
run-11/checkpoint-1000/trainer_state.json CHANGED
@@ -10,41 +10,41 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.2661,
16
- "eval_samples_per_second": 588.67,
17
- "eval_steps_per_second": 12.343,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8416129032258064,
29
- "eval_loss": 0.09490782022476196,
30
- "eval_runtime": 5.3608,
31
- "eval_samples_per_second": 578.275,
32
- "eval_steps_per_second": 12.125,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8938709677419355,
38
- "eval_loss": 0.06310474872589111,
39
- "eval_runtime": 5.4541,
40
- "eval_samples_per_second": 568.382,
41
- "eval_steps_per_second": 11.918,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1093,
48
  "step": 1000
49
  }
50
  ],
@@ -52,11 +52,11 @@
52
  "max_steps": 3180,
53
  "num_train_epochs": 10,
54
  "save_steps": 500,
55
- "total_flos": 259653090321324.0,
56
  "trial_name": null,
57
  "trial_params": {
58
- "alpha": 0.015447886949790768,
59
  "num_train_epochs": 10,
60
- "temperature": 14
61
  }
62
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.597741935483871,
14
+ "eval_loss": 0.20127475261688232,
15
+ "eval_runtime": 5.5852,
16
+ "eval_samples_per_second": 555.041,
17
+ "eval_steps_per_second": 11.638,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3218,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8429032258064516,
29
+ "eval_loss": 0.0963522419333458,
30
+ "eval_runtime": 5.674,
31
+ "eval_samples_per_second": 546.351,
32
+ "eval_steps_per_second": 11.456,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8945161290322581,
38
+ "eval_loss": 0.0636168122291565,
39
+ "eval_runtime": 5.453,
40
+ "eval_samples_per_second": 568.492,
41
+ "eval_steps_per_second": 11.92,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
+ "loss": 0.111,
48
  "step": 1000
49
  }
50
  ],
 
52
  "max_steps": 3180,
53
  "num_train_epochs": 10,
54
  "save_steps": 500,
55
+ "total_flos": 353514894043500.0,
56
  "trial_name": null,
57
  "trial_params": {
58
+ "alpha": 0.8510017086349121,
59
  "num_train_epochs": 10,
60
+ "temperature": 11
61
  }
62
  }
run-11/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf96d7fa8f7d2304812dddf41bd56b652e21c4ea0d68aad0ef98db6b20f0079
3
  size 4664
run-11/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9edbd771b7fb84ea3e8f82fa1685fc0dba5eb395241a0e289c3e2609479c330
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71fb000b904203220d022e331b69fe81deb5815b3a48bbe27b12ee5d31c51b3b
3
  size 268290900
run-11/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c761dc90c89d08084f78b18c4915c010a87b9d0ccba04efecd2c4cd05990a73
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e169807fcbafdbd24834d1aed0852e75f336d58f5227032ad9cfe420681ceb7
3
  size 536643898
run-11/checkpoint-1500/trainer_state.json CHANGED
@@ -10,56 +10,56 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.2661,
16
- "eval_samples_per_second": 588.67,
17
- "eval_steps_per_second": 12.343,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8416129032258064,
29
- "eval_loss": 0.09490782022476196,
30
- "eval_runtime": 5.3608,
31
- "eval_samples_per_second": 578.275,
32
- "eval_steps_per_second": 12.125,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8938709677419355,
38
- "eval_loss": 0.06310474872589111,
39
- "eval_runtime": 5.4541,
40
- "eval_samples_per_second": 568.382,
41
- "eval_steps_per_second": 11.918,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1093,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9032258064516129,
53
- "eval_loss": 0.04744185879826546,
54
- "eval_runtime": 5.4951,
55
- "eval_samples_per_second": 564.135,
56
- "eval_steps_per_second": 11.829,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
- "loss": 0.0689,
63
  "step": 1500
64
  }
65
  ],
@@ -67,11 +67,11 @@
67
  "max_steps": 3180,
68
  "num_train_epochs": 10,
69
  "save_steps": 500,
70
- "total_flos": 389479376069112.0,
71
  "trial_name": null,
72
  "trial_params": {
73
- "alpha": 0.015447886949790768,
74
  "num_train_epochs": 10,
75
- "temperature": 14
76
  }
77
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.597741935483871,
14
+ "eval_loss": 0.20127475261688232,
15
+ "eval_runtime": 5.5852,
16
+ "eval_samples_per_second": 555.041,
17
+ "eval_steps_per_second": 11.638,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3218,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8429032258064516,
29
+ "eval_loss": 0.0963522419333458,
30
+ "eval_runtime": 5.674,
31
+ "eval_samples_per_second": 546.351,
32
+ "eval_steps_per_second": 11.456,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8945161290322581,
38
+ "eval_loss": 0.0636168122291565,
39
+ "eval_runtime": 5.453,
40
+ "eval_samples_per_second": 568.492,
41
+ "eval_steps_per_second": 11.92,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
  "learning_rate": 1.371069182389937e-05,
47
+ "loss": 0.111,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.9041935483870968,
53
+ "eval_loss": 0.04760991781949997,
54
+ "eval_runtime": 6.1888,
55
+ "eval_samples_per_second": 500.906,
56
+ "eval_steps_per_second": 10.503,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
  "learning_rate": 1.0566037735849058e-05,
62
+ "loss": 0.0696,
63
  "step": 1500
64
  }
65
  ],
 
67
  "max_steps": 3180,
68
  "num_train_epochs": 10,
69
  "save_steps": 500,
70
+ "total_flos": 483341179791288.0,
71
  "trial_name": null,
72
  "trial_params": {
73
+ "alpha": 0.8510017086349121,
74
  "num_train_epochs": 10,
75
+ "temperature": 11
76
  }
77
  }
run-11/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf96d7fa8f7d2304812dddf41bd56b652e21c4ea0d68aad0ef98db6b20f0079
3
  size 4664
run-11/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5c5a7ee255f1cb326dd52602cee96add5d67240ad1bb87ba8c17efcf7df8ea4
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b38da463b6bde9b84fba1337286f6155c7648f03b6be4806a2eb326ea78e04c
3
  size 268290900
run-11/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d174eab804765ed333282a71e14f91d5db751e50cd88cc4c551cef932b09efe
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16a08b35e6bac8a1b949b6127e6a1bb43e8d796f921c57e85ca43426991dcb1
3
  size 536643898
run-11/checkpoint-500/trainer_state.json CHANGED
@@ -10,17 +10,17 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5929032258064516,
14
- "eval_loss": 0.19686882197856903,
15
- "eval_runtime": 5.2661,
16
- "eval_samples_per_second": 588.67,
17
- "eval_steps_per_second": 12.343,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3149,
24
  "step": 500
25
  }
26
  ],
@@ -28,11 +28,11 @@
28
  "max_steps": 3180,
29
  "num_train_epochs": 10,
30
  "save_steps": 500,
31
- "total_flos": 130007874759588.0,
32
  "trial_name": null,
33
  "trial_params": {
34
- "alpha": 0.015447886949790768,
35
  "num_train_epochs": 10,
36
- "temperature": 14
37
  }
38
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.597741935483871,
14
+ "eval_loss": 0.20127475261688232,
15
+ "eval_runtime": 5.5852,
16
+ "eval_samples_per_second": 555.041,
17
+ "eval_steps_per_second": 11.638,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
  "learning_rate": 1.685534591194969e-05,
23
+ "loss": 0.3218,
24
  "step": 500
25
  }
26
  ],
 
28
  "max_steps": 3180,
29
  "num_train_epochs": 10,
30
  "save_steps": 500,
31
+ "total_flos": 223869678481764.0,
32
  "trial_name": null,
33
  "trial_params": {
34
+ "alpha": 0.8510017086349121,
35
  "num_train_epochs": 10,
36
+ "temperature": 11
37
  }
38
  }
run-11/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb35f8dd4016b76d51232ce8b58b91b877adcb6c836af64b8e70048a18fdaae2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf96d7fa8f7d2304812dddf41bd56b652e21c4ea0d68aad0ef98db6b20f0079
3
  size 4664
runs/Jan25_17-40-54_c146da53f02f/events.out.tfevents.1706209619.c146da53f02f.3236.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe37c300ea4d436d50a231df15ec84ae3377c1a97731b4c109f7251db9a0811e
3
+ size 15534
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4877bbd4bf8c3b4c2d0a169a9c195b85ebd557cc55dabfa43eadab9c9abe3fd
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf96d7fa8f7d2304812dddf41bd56b652e21c4ea0d68aad0ef98db6b20f0079
3
  size 4664