synergyai-jaeung commited on
Commit
3e5857d
1 Parent(s): 2ede172

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -3,15 +3,15 @@
3
  "eval_accuracy": 0.9032258064516129,
4
  "eval_auc": 0.5,
5
  "eval_f1": 0.4745762711864407,
6
- "eval_loss": 0.33827999234199524,
7
  "eval_precision": 0.9032258064516129,
8
  "eval_recall": 1.0,
9
- "eval_runtime": 1.0789,
10
- "eval_samples_per_second": 172.394,
11
- "eval_steps_per_second": 2.781,
12
  "total_flos": 1.8691703717363712e+17,
13
- "train_loss": 0.28762030601501465,
14
- "train_runtime": 70.3105,
15
- "train_samples_per_second": 124.59,
16
- "train_steps_per_second": 0.427
17
  }
 
3
  "eval_accuracy": 0.9032258064516129,
4
  "eval_auc": 0.5,
5
  "eval_f1": 0.4745762711864407,
6
+ "eval_loss": 0.3023151159286499,
7
  "eval_precision": 0.9032258064516129,
8
  "eval_recall": 1.0,
9
+ "eval_runtime": 0.6301,
10
+ "eval_samples_per_second": 295.208,
11
+ "eval_steps_per_second": 4.761,
12
  "total_flos": 1.8691703717363712e+17,
13
+ "train_loss": 0.2957582632700602,
14
+ "train_runtime": 47.4143,
15
+ "train_samples_per_second": 184.754,
16
+ "train_steps_per_second": 0.633
17
  }
eval_results.json CHANGED
@@ -3,10 +3,10 @@
3
  "eval_accuracy": 0.9032258064516129,
4
  "eval_auc": 0.5,
5
  "eval_f1": 0.4745762711864407,
6
- "eval_loss": 0.33827999234199524,
7
  "eval_precision": 0.9032258064516129,
8
  "eval_recall": 1.0,
9
- "eval_runtime": 1.0789,
10
- "eval_samples_per_second": 172.394,
11
- "eval_steps_per_second": 2.781
12
  }
 
3
  "eval_accuracy": 0.9032258064516129,
4
  "eval_auc": 0.5,
5
  "eval_f1": 0.4745762711864407,
6
+ "eval_loss": 0.3023151159286499,
7
  "eval_precision": 0.9032258064516129,
8
  "eval_recall": 1.0,
9
+ "eval_runtime": 0.6301,
10
+ "eval_samples_per_second": 295.208,
11
+ "eval_steps_per_second": 4.761
12
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dd2e3b6404034899d709305ce15a3ca43637919f5098d55edd7a72091de6aad
3
  size 110342832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d07e7c41d6e3d022c28c3c64002a95b5a168904c992038f6dda631436efb1a4
3
  size 110342832
runs/May24_17-00-00_RTX3090/events.out.tfevents.1716537663.RTX3090.3776938.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:362058ec16218e8a98099fc83235a96e08487274902ee254d0b5a2f27661afda
3
+ size 597
runs/May27_15-58-23_RTX3090/events.out.tfevents.1716793114.RTX3090.4190332.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86818b56df27abc12eeaacdac861f509e01147a2e1fa722bfa93fdbf0b61610d
3
+ size 5277
runs/May27_16-00-38_RTX3090/events.out.tfevents.1716793245.RTX3090.4192365.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:283cd10d3002d8958503306ee9f92d2fa7ba78d98b4bc4f8627db85051b32e12
3
+ size 5786
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 8.571428571428571,
3
  "total_flos": 1.8691703717363712e+17,
4
- "train_loss": 0.28762030601501465,
5
- "train_runtime": 70.3105,
6
- "train_samples_per_second": 124.59,
7
- "train_steps_per_second": 0.427
8
  }
 
1
  {
2
  "epoch": 8.571428571428571,
3
  "total_flos": 1.8691703717363712e+17,
4
+ "train_loss": 0.2957582632700602,
5
+ "train_runtime": 47.4143,
6
+ "train_samples_per_second": 184.754,
7
+ "train_steps_per_second": 0.633
8
  }
trainer_state.json CHANGED
@@ -13,12 +13,12 @@
13
  "eval_accuracy": 0.9032258064516129,
14
  "eval_auc": 0.5,
15
  "eval_f1": 0.4745762711864407,
16
- "eval_loss": 0.33827999234199524,
17
  "eval_precision": 0.9032258064516129,
18
  "eval_recall": 1.0,
19
- "eval_runtime": 1.0242,
20
- "eval_samples_per_second": 181.602,
21
- "eval_steps_per_second": 2.929,
22
  "step": 3
23
  },
24
  {
@@ -26,19 +26,19 @@
26
  "eval_accuracy": 0.9032258064516129,
27
  "eval_auc": 0.5,
28
  "eval_f1": 0.4745762711864407,
29
- "eval_loss": 0.3653673529624939,
30
  "eval_precision": 0.9032258064516129,
31
  "eval_recall": 1.0,
32
- "eval_runtime": 1.0375,
33
- "eval_samples_per_second": 179.274,
34
- "eval_steps_per_second": 2.892,
35
  "step": 7
36
  },
37
  {
38
  "epoch": 2.857142857142857,
39
- "grad_norm": 4.537749767303467,
40
  "learning_rate": 3.7037037037037037e-05,
41
- "loss": 0.3833,
42
  "step": 10
43
  },
44
  {
@@ -46,12 +46,12 @@
46
  "eval_accuracy": 0.9032258064516129,
47
  "eval_auc": 0.5,
48
  "eval_f1": 0.4745762711864407,
49
- "eval_loss": 0.3421719968318939,
50
  "eval_precision": 0.9032258064516129,
51
  "eval_recall": 1.0,
52
- "eval_runtime": 1.0114,
53
- "eval_samples_per_second": 183.911,
54
- "eval_steps_per_second": 2.966,
55
  "step": 10
56
  },
57
  {
@@ -59,12 +59,12 @@
59
  "eval_accuracy": 0.9032258064516129,
60
  "eval_auc": 0.5,
61
  "eval_f1": 0.4745762711864407,
62
- "eval_loss": 0.3555563986301422,
63
  "eval_precision": 0.9032258064516129,
64
  "eval_recall": 1.0,
65
- "eval_runtime": 1.0242,
66
- "eval_samples_per_second": 181.606,
67
- "eval_steps_per_second": 2.929,
68
  "step": 14
69
  },
70
  {
@@ -72,19 +72,19 @@
72
  "eval_accuracy": 0.9032258064516129,
73
  "eval_auc": 0.5,
74
  "eval_f1": 0.4745762711864407,
75
- "eval_loss": 0.3424948453903198,
76
  "eval_precision": 0.9032258064516129,
77
  "eval_recall": 1.0,
78
- "eval_runtime": 1.0046,
79
- "eval_samples_per_second": 185.14,
80
- "eval_steps_per_second": 2.986,
81
  "step": 17
82
  },
83
  {
84
  "epoch": 5.714285714285714,
85
- "grad_norm": 1.4052001237869263,
86
  "learning_rate": 1.8518518518518518e-05,
87
- "loss": 0.2492,
88
  "step": 20
89
  },
90
  {
@@ -92,12 +92,12 @@
92
  "eval_accuracy": 0.9032258064516129,
93
  "eval_auc": 0.5,
94
  "eval_f1": 0.4745762711864407,
95
- "eval_loss": 0.3400775194168091,
96
  "eval_precision": 0.9032258064516129,
97
  "eval_recall": 1.0,
98
- "eval_runtime": 1.0288,
99
- "eval_samples_per_second": 180.792,
100
- "eval_steps_per_second": 2.916,
101
  "step": 21
102
  },
103
  {
@@ -105,12 +105,12 @@
105
  "eval_accuracy": 0.9032258064516129,
106
  "eval_auc": 0.5,
107
  "eval_f1": 0.4745762711864407,
108
- "eval_loss": 0.35429030656814575,
109
  "eval_precision": 0.9032258064516129,
110
  "eval_recall": 1.0,
111
- "eval_runtime": 0.999,
112
- "eval_samples_per_second": 186.195,
113
- "eval_steps_per_second": 3.003,
114
  "step": 24
115
  },
116
  {
@@ -118,19 +118,19 @@
118
  "eval_accuracy": 0.9032258064516129,
119
  "eval_auc": 0.5,
120
  "eval_f1": 0.4745762711864407,
121
- "eval_loss": 0.3571774363517761,
122
  "eval_precision": 0.9032258064516129,
123
  "eval_recall": 1.0,
124
- "eval_runtime": 1.0317,
125
- "eval_samples_per_second": 180.286,
126
- "eval_steps_per_second": 2.908,
127
  "step": 28
128
  },
129
  {
130
  "epoch": 8.571428571428571,
131
- "grad_norm": 1.283613920211792,
132
  "learning_rate": 0.0,
133
- "loss": 0.2304,
134
  "step": 30
135
  },
136
  {
@@ -138,22 +138,22 @@
138
  "eval_accuracy": 0.9032258064516129,
139
  "eval_auc": 0.5,
140
  "eval_f1": 0.4745762711864407,
141
- "eval_loss": 0.35649099946022034,
142
  "eval_precision": 0.9032258064516129,
143
  "eval_recall": 1.0,
144
- "eval_runtime": 1.006,
145
- "eval_samples_per_second": 184.89,
146
- "eval_steps_per_second": 2.982,
147
  "step": 30
148
  },
149
  {
150
  "epoch": 8.571428571428571,
151
  "step": 30,
152
  "total_flos": 1.8691703717363712e+17,
153
- "train_loss": 0.28762030601501465,
154
- "train_runtime": 70.3105,
155
- "train_samples_per_second": 124.59,
156
- "train_steps_per_second": 0.427
157
  }
158
  ],
159
  "logging_steps": 10,
 
13
  "eval_accuracy": 0.9032258064516129,
14
  "eval_auc": 0.5,
15
  "eval_f1": 0.4745762711864407,
16
+ "eval_loss": 0.3023151159286499,
17
  "eval_precision": 0.9032258064516129,
18
  "eval_recall": 1.0,
19
+ "eval_runtime": 0.5993,
20
+ "eval_samples_per_second": 310.347,
21
+ "eval_steps_per_second": 5.006,
22
  "step": 3
23
  },
24
  {
 
26
  "eval_accuracy": 0.9032258064516129,
27
  "eval_auc": 0.5,
28
  "eval_f1": 0.4745762711864407,
29
+ "eval_loss": 0.36768200993537903,
30
  "eval_precision": 0.9032258064516129,
31
  "eval_recall": 1.0,
32
+ "eval_runtime": 0.6162,
33
+ "eval_samples_per_second": 301.832,
34
+ "eval_steps_per_second": 4.868,
35
  "step": 7
36
  },
37
  {
38
  "epoch": 2.857142857142857,
39
+ "grad_norm": 2.596181631088257,
40
  "learning_rate": 3.7037037037037037e-05,
41
+ "loss": 0.4028,
42
  "step": 10
43
  },
44
  {
 
46
  "eval_accuracy": 0.9032258064516129,
47
  "eval_auc": 0.5,
48
  "eval_f1": 0.4745762711864407,
49
+ "eval_loss": 0.32757532596588135,
50
  "eval_precision": 0.9032258064516129,
51
  "eval_recall": 1.0,
52
+ "eval_runtime": 0.6008,
53
+ "eval_samples_per_second": 309.59,
54
+ "eval_steps_per_second": 4.993,
55
  "step": 10
56
  },
57
  {
 
59
  "eval_accuracy": 0.9032258064516129,
60
  "eval_auc": 0.5,
61
  "eval_f1": 0.4745762711864407,
62
+ "eval_loss": 0.32863086462020874,
63
  "eval_precision": 0.9032258064516129,
64
  "eval_recall": 1.0,
65
+ "eval_runtime": 0.6313,
66
+ "eval_samples_per_second": 294.647,
67
+ "eval_steps_per_second": 4.752,
68
  "step": 14
69
  },
70
  {
 
72
  "eval_accuracy": 0.9032258064516129,
73
  "eval_auc": 0.5,
74
  "eval_f1": 0.4745762711864407,
75
+ "eval_loss": 0.3271201252937317,
76
  "eval_precision": 0.9032258064516129,
77
  "eval_recall": 1.0,
78
+ "eval_runtime": 0.6283,
79
+ "eval_samples_per_second": 296.051,
80
+ "eval_steps_per_second": 4.775,
81
  "step": 17
82
  },
83
  {
84
  "epoch": 5.714285714285714,
85
+ "grad_norm": 1.759938359260559,
86
  "learning_rate": 1.8518518518518518e-05,
87
+ "loss": 0.2494,
88
  "step": 20
89
  },
90
  {
 
92
  "eval_accuracy": 0.9032258064516129,
93
  "eval_auc": 0.5,
94
  "eval_f1": 0.4745762711864407,
95
+ "eval_loss": 0.32589343190193176,
96
  "eval_precision": 0.9032258064516129,
97
  "eval_recall": 1.0,
98
+ "eval_runtime": 0.607,
99
+ "eval_samples_per_second": 306.423,
100
+ "eval_steps_per_second": 4.942,
101
  "step": 21
102
  },
103
  {
 
105
  "eval_accuracy": 0.9032258064516129,
106
  "eval_auc": 0.5,
107
  "eval_f1": 0.4745762711864407,
108
+ "eval_loss": 0.3279106020927429,
109
  "eval_precision": 0.9032258064516129,
110
  "eval_recall": 1.0,
111
+ "eval_runtime": 0.6103,
112
+ "eval_samples_per_second": 304.771,
113
+ "eval_steps_per_second": 4.916,
114
  "step": 24
115
  },
116
  {
 
118
  "eval_accuracy": 0.9032258064516129,
119
  "eval_auc": 0.5,
120
  "eval_f1": 0.4745762711864407,
121
+ "eval_loss": 0.3266947269439697,
122
  "eval_precision": 0.9032258064516129,
123
  "eval_recall": 1.0,
124
+ "eval_runtime": 0.6353,
125
+ "eval_samples_per_second": 292.77,
126
+ "eval_steps_per_second": 4.722,
127
  "step": 28
128
  },
129
  {
130
  "epoch": 8.571428571428571,
131
+ "grad_norm": 2.163374423980713,
132
  "learning_rate": 0.0,
133
+ "loss": 0.2351,
134
  "step": 30
135
  },
136
  {
 
138
  "eval_accuracy": 0.9032258064516129,
139
  "eval_auc": 0.5,
140
  "eval_f1": 0.4745762711864407,
141
+ "eval_loss": 0.32613006234169006,
142
  "eval_precision": 0.9032258064516129,
143
  "eval_recall": 1.0,
144
+ "eval_runtime": 0.6179,
145
+ "eval_samples_per_second": 301.013,
146
+ "eval_steps_per_second": 4.855,
147
  "step": 30
148
  },
149
  {
150
  "epoch": 8.571428571428571,
151
  "step": 30,
152
  "total_flos": 1.8691703717363712e+17,
153
+ "train_loss": 0.2957582632700602,
154
+ "train_runtime": 47.4143,
155
+ "train_samples_per_second": 184.754,
156
+ "train_steps_per_second": 0.633
157
  }
158
  ],
159
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d4c5476f8f49bfc14a67ca98ff73785fd0e5765eba1ff25ad649cd22bc6aaea
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e96781d201b4962023e2deb6df7f41bd5345729797e7384d64800c6fd5a32f6
3
  size 4731