cezeozue commited on
Commit
2984f2b
1 Parent(s): 15c4d96

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a83d23f9c280b1b78545a720da15a8e9067a3694eaf73c8bd335a73ff1dfdd14
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03bffd09b24ec13e3096504399449cf97bf57621524dc0504b7d091fab80fdd
3
  size 268290900
run-2/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9cf0f8a2189351356d6987a53766ff1ba02473ec638516dde50f31a1e8179fd
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03bffd09b24ec13e3096504399449cf97bf57621524dc0504b7d091fab80fdd
3
  size 268290900
run-2/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7582495c2bb697085fde7d23892867d0d72d934021d5a2f5f7a9fe88f3833167
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e5c6bf52e7a2bfb516bde6af6e76d5ca6dbc8e7f9511bf5096c4a7a88abd81f
3
  size 536643898
run-2/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad4d7d251acf36e559c362893a1fb310c9f46b20e8a330025a14b6829ce4ab07
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c8d3ce0734337fc0c187ca5543b4c70ca45d996531f199209b3a0c2a798109
3
  size 1064
run-2/checkpoint-1500/trainer_state.json CHANGED
@@ -10,68 +10,68 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5851612903225807,
14
- "eval_loss": 0.2035684436559677,
15
- "eval_runtime": 5.6989,
16
- "eval_samples_per_second": 543.962,
17
- "eval_steps_per_second": 11.406,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
- "learning_rate": 1.550763701707098e-05,
23
- "loss": 0.3221,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8335483870967741,
29
- "eval_loss": 0.10106482356786728,
30
- "eval_runtime": 5.8785,
31
- "eval_samples_per_second": 527.345,
32
- "eval_steps_per_second": 11.057,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.885483870967742,
38
- "eval_loss": 0.06870077550411224,
39
- "eval_runtime": 5.783,
40
- "eval_samples_per_second": 536.053,
41
- "eval_steps_per_second": 11.24,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
- "learning_rate": 1.101527403414196e-05,
47
- "loss": 0.1162,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.8980645161290323,
53
- "eval_loss": 0.053607575595378876,
54
- "eval_runtime": 5.8706,
55
- "eval_samples_per_second": 528.052,
56
- "eval_steps_per_second": 11.072,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
- "learning_rate": 6.522911051212939e-06,
62
- "loss": 0.0765,
63
  "step": 1500
64
  }
65
  ],
66
  "logging_steps": 500,
67
- "max_steps": 2226,
68
- "num_train_epochs": 7,
69
  "save_steps": 500,
70
  "total_flos": 389479376069112.0,
71
  "trial_name": null,
72
  "trial_params": {
73
- "alpha": 0.8305833099612083,
74
- "num_train_epochs": 7,
75
- "temperature": 12
76
  }
77
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6,
14
+ "eval_loss": 0.22239726781845093,
15
+ "eval_runtime": 5.4481,
16
+ "eval_samples_per_second": 569.006,
17
+ "eval_steps_per_second": 11.931,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
+ "learning_rate": 1.4758909853249476e-05,
23
+ "loss": 0.3484,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.83,
29
+ "eval_loss": 0.109930619597435,
30
+ "eval_runtime": 5.7104,
31
+ "eval_samples_per_second": 542.874,
32
+ "eval_steps_per_second": 11.383,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8809677419354839,
38
+ "eval_loss": 0.07449585199356079,
39
+ "eval_runtime": 5.3903,
40
+ "eval_samples_per_second": 575.109,
41
+ "eval_steps_per_second": 12.059,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
+ "learning_rate": 9.517819706498952e-06,
47
+ "loss": 0.1265,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.8964516129032258,
53
+ "eval_loss": 0.05887645110487938,
54
+ "eval_runtime": 5.4647,
55
+ "eval_samples_per_second": 567.278,
56
+ "eval_steps_per_second": 11.895,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
+ "learning_rate": 4.276729559748428e-06,
62
+ "loss": 0.0843,
63
  "step": 1500
64
  }
65
  ],
66
  "logging_steps": 500,
67
+ "max_steps": 1908,
68
+ "num_train_epochs": 6,
69
  "save_steps": 500,
70
  "total_flos": 389479376069112.0,
71
  "trial_name": null,
72
  "trial_params": {
73
+ "alpha": 0.339174080203406,
74
+ "num_train_epochs": 6,
75
+ "temperature": 7
76
  }
77
  }
run-2/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c739c287048413530d77539873d77810678560a3c9c21cac037ba107d1ab725
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bcd9576316ea4b78c0edb02ef77b7b54c380e6454ae67bc0bbd59e18e13a200
3
  size 4664
runs/Jan25_17-40-54_c146da53f02f/events.out.tfevents.1706206065.c146da53f02f.3236.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f24128428e8f274ac37e8f5de24cca3e5f330964e2cd9cca05f88629521eaf78
3
- size 13447
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb7aa29e29f29790ce6a5253807cf5aee0d4a8b0472e9e547949dbf83a42f7c
3
+ size 14604