synergyai-jaeung commited on
Commit
d485d0c
1 Parent(s): 953fe43

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,8 +1,17 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 1.385556774308905e+17,
4
- "train_loss": 0.33811448540603906,
5
- "train_runtime": 48.6875,
6
- "train_samples_per_second": 36.724,
7
- "train_steps_per_second": 1.171
 
 
 
 
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_AUC": 0.5354853273137697,
4
+ "eval_F1": 0.5271698859516118,
5
+ "eval_Precision": 0.8450433108758422,
6
+ "eval_Recall": 0.9909706546275395,
7
+ "eval_accuracy": 0.8407163053722903,
8
+ "eval_loss": 0.42085567116737366,
9
+ "eval_runtime": 11.8095,
10
+ "eval_samples_per_second": 89.843,
11
+ "eval_steps_per_second": 5.673,
12
+ "total_flos": 3.2879851193471386e+19,
13
+ "train_loss": 0.05622970362024654,
14
+ "train_runtime": 7456.0224,
15
+ "train_samples_per_second": 56.907,
16
+ "train_steps_per_second": 3.568
17
  }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_AUC": 0.5354853273137697,
4
+ "eval_F1": 0.5271698859516118,
5
+ "eval_Precision": 0.8450433108758422,
6
+ "eval_Recall": 0.9909706546275395,
7
+ "eval_accuracy": 0.8407163053722903,
8
+ "eval_loss": 0.42085567116737366,
9
+ "eval_runtime": 11.8095,
10
+ "eval_samples_per_second": 89.843,
11
+ "eval_steps_per_second": 5.673
12
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3abe4097d5e56fd23c7f775a44c6357ab155b7301f1265b60ba464ef2d0eaae
3
  size 343223968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:821c8b4f00bae0fe5bb7907ddff695d80ed05c3c5049cd6eea4b76b51faac684
3
  size 343223968
runs/May28_10-35-54_RTX3090/events.out.tfevents.1716868887.RTX3090.123086.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89672d01ed2eab339588546a698998d4b3ddc1e017ed0ef25088d74cc6ae39e0
3
+ size 617
runs/May28_13-11-48_RTX3090/events.out.tfevents.1716869509.RTX3090.58490.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:662b83880d53b2c7b48def2cfde901de234243734a37509b9ae7b3ea662de76f
3
+ size 5635
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 1.385556774308905e+17,
4
- "train_loss": 0.33811448540603906,
5
- "train_runtime": 48.6875,
6
- "train_samples_per_second": 36.724,
7
- "train_steps_per_second": 1.171
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "total_flos": 3.2879851193471386e+19,
4
+ "train_loss": 0.05622970362024654,
5
+ "train_runtime": 7456.0224,
6
+ "train_samples_per_second": 56.907,
7
+ "train_steps_per_second": 3.568
8
  }
trainer_state.json CHANGED
@@ -1,70 +1,1705 @@
1
  {
2
- "best_metric": 0.3061524033546448,
3
- "best_model_checkpoint": "google/vit-base-patch16-224-in21k_covid_19_ct_scans/checkpoint-57",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 57,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.05263157894736842,
13
- "grad_norm": 0.7303336262702942,
14
- "learning_rate": 0.00019649122807017543,
15
- "loss": 0.6963,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_F1": 0.7857142857142857,
21
- "eval_Precision": 0.75,
22
- "eval_Recall": 0.825,
23
- "eval_accuracy": 0.76,
24
- "eval_loss": 0.5245552659034729,
25
- "eval_runtime": 6.9325,
26
- "eval_samples_per_second": 21.637,
27
- "eval_steps_per_second": 1.442,
28
- "step": 19
 
 
 
 
 
 
 
 
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_F1": 0.8322147651006712,
33
- "eval_Precision": 0.8985507246376812,
34
- "eval_Recall": 0.775,
35
- "eval_accuracy": 0.8333333333333334,
36
- "eval_loss": 0.39111995697021484,
37
- "eval_runtime": 6.1326,
38
- "eval_samples_per_second": 24.46,
39
- "eval_steps_per_second": 1.631,
40
- "step": 38
 
41
  },
42
  {
43
  "epoch": 3.0,
44
- "eval_F1": 0.8571428571428571,
45
- "eval_Precision": 0.8518518518518519,
46
- "eval_Recall": 0.8625,
47
- "eval_accuracy": 0.8466666666666667,
48
- "eval_loss": 0.3061524033546448,
49
- "eval_runtime": 5.7977,
50
- "eval_samples_per_second": 25.872,
51
- "eval_steps_per_second": 1.725,
52
- "step": 57
 
53
  },
54
  {
55
- "epoch": 3.0,
56
- "step": 57,
57
- "total_flos": 1.385556774308905e+17,
58
- "train_loss": 0.33811448540603906,
59
- "train_runtime": 48.6875,
60
- "train_samples_per_second": 36.724,
61
- "train_steps_per_second": 1.171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 500,
65
- "max_steps": 57,
66
  "num_input_tokens_seen": 0,
67
- "num_train_epochs": 3,
68
  "save_steps": 500,
69
  "stateful_callbacks": {
70
  "TrainerControl": {
@@ -78,8 +1713,8 @@
78
  "attributes": {}
79
  }
80
  },
81
- "total_flos": 1.385556774308905e+17,
82
- "train_batch_size": 32,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
 
1
  {
2
+ "best_metric": 0.42085567116737366,
3
+ "best_model_checkpoint": "google/vit-base-patch16-224-in21k_covid_19_ct_scans/checkpoint-2394",
4
+ "epoch": 100.0,
5
  "eval_steps": 500,
6
+ "global_step": 26600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0037593984962406013,
13
+ "grad_norm": 3.3277029991149902,
14
+ "learning_rate": 0.00019999248120300753,
15
+ "loss": 0.768,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_AUC": 0.5,
21
+ "eval_F1": 0.45505906522855677,
22
+ "eval_Precision": 0.8350612629594723,
23
+ "eval_Recall": 1.0,
24
+ "eval_accuracy": 0.8350612629594723,
25
+ "eval_loss": 0.45460373163223267,
26
+ "eval_runtime": 23.809,
27
+ "eval_samples_per_second": 44.563,
28
+ "eval_steps_per_second": 2.814,
29
+ "step": 266
30
+ },
31
+ {
32
+ "epoch": 1.8796992481203008,
33
+ "grad_norm": 0.9518311023712158,
34
+ "learning_rate": 0.0001962406015037594,
35
+ "loss": 0.4516,
36
+ "step": 500
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_AUC": 0.5,
41
+ "eval_F1": 0.45505906522855677,
42
+ "eval_Precision": 0.8350612629594723,
43
+ "eval_Recall": 1.0,
44
+ "eval_accuracy": 0.8350612629594723,
45
+ "eval_loss": 0.449796199798584,
46
+ "eval_runtime": 12.1544,
47
+ "eval_samples_per_second": 87.293,
48
+ "eval_steps_per_second": 5.512,
49
+ "step": 532
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "eval_AUC": 0.5,
54
+ "eval_F1": 0.45505906522855677,
55
+ "eval_Precision": 0.8350612629594723,
56
+ "eval_Recall": 1.0,
57
+ "eval_accuracy": 0.8350612629594723,
58
+ "eval_loss": 0.4491786062717438,
59
+ "eval_runtime": 12.1586,
60
+ "eval_samples_per_second": 87.263,
61
+ "eval_steps_per_second": 5.51,
62
+ "step": 798
63
  },
64
  {
65
+ "epoch": 3.7593984962406015,
66
+ "grad_norm": 0.6577199101448059,
67
+ "learning_rate": 0.0001924812030075188,
68
+ "loss": 0.4521,
69
+ "step": 1000
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "eval_AUC": 0.5,
74
+ "eval_F1": 0.45505906522855677,
75
+ "eval_Precision": 0.8350612629594723,
76
+ "eval_Recall": 1.0,
77
+ "eval_accuracy": 0.8350612629594723,
78
+ "eval_loss": 0.44855841994285583,
79
+ "eval_runtime": 12.3395,
80
+ "eval_samples_per_second": 85.984,
81
+ "eval_steps_per_second": 5.43,
82
+ "step": 1064
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "eval_AUC": 0.5,
87
+ "eval_F1": 0.45505906522855677,
88
+ "eval_Precision": 0.8350612629594723,
89
+ "eval_Recall": 1.0,
90
+ "eval_accuracy": 0.8350612629594723,
91
+ "eval_loss": 0.44568774104118347,
92
+ "eval_runtime": 12.3116,
93
+ "eval_samples_per_second": 86.179,
94
+ "eval_steps_per_second": 5.442,
95
+ "step": 1330
96
+ },
97
+ {
98
+ "epoch": 5.639097744360902,
99
+ "grad_norm": 0.03062070906162262,
100
+ "learning_rate": 0.00018872180451127822,
101
+ "loss": 0.4415,
102
+ "step": 1500
103
+ },
104
+ {
105
+ "epoch": 6.0,
106
+ "eval_AUC": 0.5,
107
+ "eval_F1": 0.45505906522855677,
108
+ "eval_Precision": 0.8350612629594723,
109
+ "eval_Recall": 1.0,
110
+ "eval_accuracy": 0.8350612629594723,
111
+ "eval_loss": 0.4421917796134949,
112
+ "eval_runtime": 11.8255,
113
+ "eval_samples_per_second": 89.721,
114
+ "eval_steps_per_second": 5.666,
115
+ "step": 1596
116
+ },
117
+ {
118
+ "epoch": 7.0,
119
+ "eval_AUC": 0.5,
120
+ "eval_F1": 0.45505906522855677,
121
+ "eval_Precision": 0.8350612629594723,
122
+ "eval_Recall": 1.0,
123
+ "eval_accuracy": 0.8350612629594723,
124
+ "eval_loss": 0.42494845390319824,
125
+ "eval_runtime": 11.7622,
126
+ "eval_samples_per_second": 90.204,
127
+ "eval_steps_per_second": 5.696,
128
+ "step": 1862
129
+ },
130
+ {
131
+ "epoch": 7.518796992481203,
132
+ "grad_norm": 0.4491100311279297,
133
+ "learning_rate": 0.0001849624060150376,
134
+ "loss": 0.4344,
135
+ "step": 2000
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_AUC": 0.5183424701709126,
140
+ "eval_F1": 0.4965715075876916,
141
+ "eval_Precision": 0.8401913875598086,
142
+ "eval_Recall": 0.9909706546275395,
143
+ "eval_accuracy": 0.8350612629594723,
144
+ "eval_loss": 0.4643925130367279,
145
+ "eval_runtime": 12.3543,
146
+ "eval_samples_per_second": 85.881,
147
+ "eval_steps_per_second": 5.423,
148
+ "step": 2128
149
+ },
150
+ {
151
+ "epoch": 9.0,
152
+ "eval_AUC": 0.5354853273137697,
153
+ "eval_F1": 0.5271698859516118,
154
+ "eval_Precision": 0.8450433108758422,
155
+ "eval_Recall": 0.9909706546275395,
156
+ "eval_accuracy": 0.8407163053722903,
157
+ "eval_loss": 0.42085567116737366,
158
+ "eval_runtime": 12.5156,
159
+ "eval_samples_per_second": 84.774,
160
+ "eval_steps_per_second": 5.353,
161
+ "step": 2394
162
+ },
163
+ {
164
+ "epoch": 9.398496240601503,
165
+ "grad_norm": 0.23315227031707764,
166
+ "learning_rate": 0.000181203007518797,
167
+ "loss": 0.3848,
168
+ "step": 2500
169
+ },
170
+ {
171
+ "epoch": 10.0,
172
+ "eval_AUC": 0.6642373427926476,
173
+ "eval_F1": 0.6571900744677727,
174
+ "eval_Precision": 0.8904267589388697,
175
+ "eval_Recall": 0.871331828442438,
176
+ "eval_accuracy": 0.8030160226201697,
177
+ "eval_loss": 0.4335523247718811,
178
+ "eval_runtime": 12.0639,
179
+ "eval_samples_per_second": 87.948,
180
+ "eval_steps_per_second": 5.554,
181
+ "step": 2660
182
+ },
183
+ {
184
+ "epoch": 11.0,
185
+ "eval_AUC": 0.6386617220251531,
186
+ "eval_F1": 0.6595258665704602,
187
+ "eval_Precision": 0.8777660695468915,
188
+ "eval_Recall": 0.9401805869074492,
189
+ "eval_accuracy": 0.8407163053722903,
190
+ "eval_loss": 0.43072912096977234,
191
+ "eval_runtime": 11.6338,
192
+ "eval_samples_per_second": 91.2,
193
+ "eval_steps_per_second": 5.759,
194
+ "step": 2926
195
+ },
196
+ {
197
+ "epoch": 11.278195488721805,
198
+ "grad_norm": 0.8828286528587341,
199
+ "learning_rate": 0.0001774436090225564,
200
+ "loss": 0.2882,
201
+ "step": 3000
202
+ },
203
+ {
204
+ "epoch": 12.0,
205
+ "eval_AUC": 0.7007449209932279,
206
+ "eval_F1": 0.6913138583881986,
207
+ "eval_Precision": 0.9028901734104047,
208
+ "eval_Recall": 0.881489841986456,
209
+ "eval_accuracy": 0.82186616399623,
210
+ "eval_loss": 0.5094270706176758,
211
+ "eval_runtime": 12.013,
212
+ "eval_samples_per_second": 88.321,
213
+ "eval_steps_per_second": 5.577,
214
+ "step": 3192
215
+ },
216
+ {
217
+ "epoch": 13.0,
218
+ "eval_AUC": 0.6362624959690422,
219
+ "eval_F1": 0.6636744500641045,
220
+ "eval_Precision": 0.8761609907120743,
221
+ "eval_Recall": 0.9582392776523702,
222
+ "eval_accuracy": 0.8520263901979265,
223
+ "eval_loss": 0.46198517084121704,
224
+ "eval_runtime": 11.833,
225
+ "eval_samples_per_second": 89.665,
226
+ "eval_steps_per_second": 5.662,
227
+ "step": 3458
228
+ },
229
+ {
230
+ "epoch": 13.157894736842104,
231
+ "grad_norm": 0.10690835863351822,
232
+ "learning_rate": 0.0001736842105263158,
233
+ "loss": 0.1654,
234
+ "step": 3500
235
+ },
236
+ {
237
+ "epoch": 14.0,
238
+ "eval_AUC": 0.7246952595936795,
239
+ "eval_F1": 0.7141794985075913,
240
+ "eval_Precision": 0.9109826589595376,
241
+ "eval_Recall": 0.8893905191873589,
242
+ "eval_accuracy": 0.8350612629594723,
243
+ "eval_loss": 0.58914715051651,
244
+ "eval_runtime": 12.2166,
245
+ "eval_samples_per_second": 86.849,
246
+ "eval_steps_per_second": 5.484,
247
+ "step": 3724
248
+ },
249
+ {
250
+ "epoch": 15.0,
251
+ "eval_AUC": 0.6827894227668494,
252
+ "eval_F1": 0.6940221645449677,
253
+ "eval_Precision": 0.893640350877193,
254
+ "eval_Recall": 0.9198645598194131,
255
+ "eval_accuracy": 0.8416588124410933,
256
+ "eval_loss": 0.5601742267608643,
257
+ "eval_runtime": 12.0344,
258
+ "eval_samples_per_second": 88.164,
259
+ "eval_steps_per_second": 5.567,
260
+ "step": 3990
261
+ },
262
+ {
263
+ "epoch": 15.037593984962406,
264
+ "grad_norm": 0.1679229587316513,
265
+ "learning_rate": 0.0001699248120300752,
266
+ "loss": 0.0868,
267
+ "step": 4000
268
+ },
269
+ {
270
+ "epoch": 16.0,
271
+ "eval_AUC": 0.6785198323121573,
272
+ "eval_F1": 0.7114280962304207,
273
+ "eval_Precision": 0.8894681960375391,
274
+ "eval_Recall": 0.9627539503386005,
275
+ "eval_accuracy": 0.8689915174363808,
276
+ "eval_loss": 0.5927982926368713,
277
+ "eval_runtime": 11.7748,
278
+ "eval_samples_per_second": 90.107,
279
+ "eval_steps_per_second": 5.69,
280
+ "step": 4256
281
+ },
282
+ {
283
+ "epoch": 16.917293233082706,
284
+ "grad_norm": 0.02159872278571129,
285
+ "learning_rate": 0.00016616541353383458,
286
+ "loss": 0.045,
287
+ "step": 4500
288
+ },
289
+ {
290
+ "epoch": 17.0,
291
+ "eval_AUC": 0.7072331505965818,
292
+ "eval_F1": 0.7268478980719824,
293
+ "eval_Precision": 0.9005405405405406,
294
+ "eval_Recall": 0.9401805869074492,
295
+ "eval_accuracy": 0.8633364750235627,
296
+ "eval_loss": 0.6153913140296936,
297
+ "eval_runtime": 11.9361,
298
+ "eval_samples_per_second": 88.89,
299
+ "eval_steps_per_second": 5.613,
300
+ "step": 4522
301
+ },
302
+ {
303
+ "epoch": 18.0,
304
+ "eval_AUC": 0.7169332473395679,
305
+ "eval_F1": 0.7369669924918544,
306
+ "eval_Precision": 0.9036796536796536,
307
+ "eval_Recall": 0.9424379232505643,
308
+ "eval_accuracy": 0.8680490103675778,
309
+ "eval_loss": 0.6357868909835815,
310
+ "eval_runtime": 11.728,
311
+ "eval_samples_per_second": 90.467,
312
+ "eval_steps_per_second": 5.713,
313
+ "step": 4788
314
+ },
315
+ {
316
+ "epoch": 18.796992481203006,
317
+ "grad_norm": 0.00539048295468092,
318
+ "learning_rate": 0.00016240601503759398,
319
+ "loss": 0.021,
320
+ "step": 5000
321
+ },
322
+ {
323
+ "epoch": 19.0,
324
+ "eval_AUC": 0.7422960335375686,
325
+ "eval_F1": 0.737905217953103,
326
+ "eval_Precision": 0.9157175398633257,
327
+ "eval_Recall": 0.90744920993228,
328
+ "eval_accuracy": 0.8529688972667295,
329
+ "eval_loss": 0.8246906399726868,
330
+ "eval_runtime": 11.7691,
331
+ "eval_samples_per_second": 90.151,
332
+ "eval_steps_per_second": 5.693,
333
+ "step": 5054
334
+ },
335
+ {
336
+ "epoch": 20.0,
337
+ "eval_AUC": 0.7228603676233473,
338
+ "eval_F1": 0.7228603676233474,
339
+ "eval_Precision": 0.9085778781038375,
340
+ "eval_Recall": 0.9085778781038375,
341
+ "eval_accuracy": 0.8473138548539114,
342
+ "eval_loss": 0.9930059909820557,
343
+ "eval_runtime": 11.643,
344
+ "eval_samples_per_second": 91.128,
345
+ "eval_steps_per_second": 5.755,
346
+ "step": 5320
347
+ },
348
+ {
349
+ "epoch": 20.67669172932331,
350
+ "grad_norm": 1.4996395111083984,
351
+ "learning_rate": 0.0001586466165413534,
352
+ "loss": 0.0136,
353
+ "step": 5500
354
+ },
355
+ {
356
+ "epoch": 21.0,
357
+ "eval_AUC": 0.7037762012254112,
358
+ "eval_F1": 0.7262231926161842,
359
+ "eval_Precision": 0.8990332975295381,
360
+ "eval_Recall": 0.9446952595936795,
361
+ "eval_accuracy": 0.8652214891611687,
362
+ "eval_loss": 0.5600523352622986,
363
+ "eval_runtime": 11.6192,
364
+ "eval_samples_per_second": 91.315,
365
+ "eval_steps_per_second": 5.766,
366
+ "step": 5586
367
+ },
368
+ {
369
+ "epoch": 22.0,
370
+ "eval_AUC": 0.6561560786842955,
371
+ "eval_F1": 0.6934912580385852,
372
+ "eval_Precision": 0.8816326530612245,
373
+ "eval_Recall": 0.9751693002257337,
374
+ "eval_accuracy": 0.8699340245051838,
375
+ "eval_loss": 0.64747554063797,
376
+ "eval_runtime": 11.5363,
377
+ "eval_samples_per_second": 91.971,
378
+ "eval_steps_per_second": 5.808,
379
+ "step": 5852
380
+ },
381
+ {
382
+ "epoch": 22.55639097744361,
383
+ "grad_norm": 0.029281923547387123,
384
+ "learning_rate": 0.0001548872180451128,
385
+ "loss": 0.0464,
386
+ "step": 6000
387
+ },
388
+ {
389
+ "epoch": 23.0,
390
+ "eval_AUC": 0.7170396646243147,
391
+ "eval_F1": 0.7272862554112554,
392
+ "eval_Precision": 0.9050772626931567,
393
+ "eval_Recall": 0.9255079006772009,
394
+ "eval_accuracy": 0.8567389255419415,
395
+ "eval_loss": 0.5766553282737732,
396
+ "eval_runtime": 11.6613,
397
+ "eval_samples_per_second": 90.984,
398
+ "eval_steps_per_second": 5.745,
399
+ "step": 6118
400
+ },
401
+ {
402
+ "epoch": 24.0,
403
+ "eval_AUC": 0.7451886488229603,
404
+ "eval_F1": 0.736944199717763,
405
+ "eval_Precision": 0.9173363949483353,
406
+ "eval_Recall": 0.9018058690744921,
407
+ "eval_accuracy": 0.8501413760603205,
408
+ "eval_loss": 0.7393656373023987,
409
+ "eval_runtime": 11.8414,
410
+ "eval_samples_per_second": 89.601,
411
+ "eval_steps_per_second": 5.658,
412
+ "step": 6384
413
+ },
414
+ {
415
+ "epoch": 24.43609022556391,
416
+ "grad_norm": 0.0072451187297701836,
417
+ "learning_rate": 0.00015112781954887218,
418
+ "loss": 0.0438,
419
+ "step": 6500
420
+ },
421
+ {
422
+ "epoch": 25.0,
423
+ "eval_AUC": 0.6412705578845533,
424
+ "eval_F1": 0.6781337216357238,
425
+ "eval_Precision": 0.8767676767676768,
426
+ "eval_Recall": 0.9796839729119639,
427
+ "eval_accuracy": 0.8680490103675778,
428
+ "eval_loss": 0.762208878993988,
429
+ "eval_runtime": 11.8755,
430
+ "eval_samples_per_second": 89.344,
431
+ "eval_steps_per_second": 5.642,
432
+ "step": 6650
433
+ },
434
+ {
435
+ "epoch": 26.0,
436
+ "eval_AUC": 0.7167913576265721,
437
+ "eval_F1": 0.7509163334545014,
438
+ "eval_Precision": 0.9018987341772152,
439
+ "eval_Recall": 0.9650112866817155,
440
+ "eval_accuracy": 0.883129123468426,
441
+ "eval_loss": 0.7616934180259705,
442
+ "eval_runtime": 11.673,
443
+ "eval_samples_per_second": 90.893,
444
+ "eval_steps_per_second": 5.74,
445
+ "step": 6916
446
+ },
447
+ {
448
+ "epoch": 26.31578947368421,
449
+ "grad_norm": 0.005205586086958647,
450
+ "learning_rate": 0.00014736842105263158,
451
+ "loss": 0.0126,
452
+ "step": 7000
453
+ },
454
+ {
455
+ "epoch": 27.0,
456
+ "eval_AUC": 0.7227184779103515,
457
+ "eval_F1": 0.7354277398991624,
458
+ "eval_Precision": 0.9065934065934066,
459
+ "eval_Recall": 0.9311512415349887,
460
+ "eval_accuracy": 0.8623939679547596,
461
+ "eval_loss": 0.8840720653533936,
462
+ "eval_runtime": 12.2613,
463
+ "eval_samples_per_second": 86.533,
464
+ "eval_steps_per_second": 5.464,
465
+ "step": 7182
466
+ },
467
+ {
468
+ "epoch": 28.0,
469
+ "eval_AUC": 0.7300193485972267,
470
+ "eval_F1": 0.7543650900476486,
471
+ "eval_Precision": 0.907427341227126,
472
+ "eval_Recall": 0.9514672686230248,
473
+ "eval_accuracy": 0.8784165881244109,
474
+ "eval_loss": 0.7538221478462219,
475
+ "eval_runtime": 11.7032,
476
+ "eval_samples_per_second": 90.659,
477
+ "eval_steps_per_second": 5.725,
478
+ "step": 7448
479
+ },
480
+ {
481
+ "epoch": 28.195488721804512,
482
+ "grad_norm": 0.03350173309445381,
483
+ "learning_rate": 0.000143609022556391,
484
+ "loss": 0.016,
485
+ "step": 7500
486
+ },
487
+ {
488
+ "epoch": 29.0,
489
+ "eval_AUC": 0.6320638503708481,
490
+ "eval_F1": 0.6709177157453019,
491
+ "eval_Precision": 0.8735059760956175,
492
+ "eval_Recall": 0.989841986455982,
493
+ "eval_accuracy": 0.8718190386427899,
494
+ "eval_loss": 0.7105740308761597,
495
+ "eval_runtime": 11.7867,
496
+ "eval_samples_per_second": 90.017,
497
+ "eval_steps_per_second": 5.684,
498
+ "step": 7714
499
+ },
500
+ {
501
+ "epoch": 30.0,
502
+ "eval_AUC": 0.689348597226701,
503
+ "eval_F1": 0.72511658580244,
504
+ "eval_Precision": 0.8927083333333333,
505
+ "eval_Recall": 0.9672686230248307,
506
+ "eval_accuracy": 0.8755890669180019,
507
+ "eval_loss": 0.6111597418785095,
508
+ "eval_runtime": 11.7504,
509
+ "eval_samples_per_second": 90.295,
510
+ "eval_steps_per_second": 5.702,
511
+ "step": 7980
512
+ },
513
+ {
514
+ "epoch": 30.075187969924812,
515
+ "grad_norm": 0.013551408424973488,
516
+ "learning_rate": 0.0001398496240601504,
517
+ "loss": 0.0384,
518
+ "step": 8000
519
+ },
520
+ {
521
+ "epoch": 31.0,
522
+ "eval_AUC": 0.6887487907126733,
523
+ "eval_F1": 0.7271465907527794,
524
+ "eval_Precision": 0.8922279792746114,
525
+ "eval_Recall": 0.9717832957110609,
526
+ "eval_accuracy": 0.8784165881244109,
527
+ "eval_loss": 0.5990052223205566,
528
+ "eval_runtime": 11.854,
529
+ "eval_samples_per_second": 89.506,
530
+ "eval_steps_per_second": 5.652,
531
+ "step": 8246
532
+ },
533
+ {
534
+ "epoch": 31.954887218045112,
535
+ "grad_norm": 0.10003461688756943,
536
+ "learning_rate": 0.0001360902255639098,
537
+ "loss": 0.0276,
538
+ "step": 8500
539
+ },
540
+ {
541
+ "epoch": 32.0,
542
+ "eval_AUC": 0.699577555627217,
543
+ "eval_F1": 0.741136709063275,
544
+ "eval_Precision": 0.8954451345755694,
545
+ "eval_Recall": 0.9762979683972912,
546
+ "eval_accuracy": 0.8850141376060321,
547
+ "eval_loss": 0.6617034673690796,
548
+ "eval_runtime": 11.7353,
549
+ "eval_samples_per_second": 90.411,
550
+ "eval_steps_per_second": 5.709,
551
+ "step": 8512
552
+ },
553
+ {
554
+ "epoch": 33.0,
555
+ "eval_AUC": 0.7190132215414382,
556
+ "eval_F1": 0.7599135442188549,
557
+ "eval_Precision": 0.9018789144050104,
558
+ "eval_Recall": 0.9751693002257337,
559
+ "eval_accuracy": 0.8906691800188501,
560
+ "eval_loss": 0.7068904042243958,
561
+ "eval_runtime": 11.6304,
562
+ "eval_samples_per_second": 91.226,
563
+ "eval_steps_per_second": 5.761,
564
+ "step": 8778
565
+ },
566
+ {
567
+ "epoch": 33.83458646616541,
568
+ "grad_norm": 0.00604345602914691,
569
+ "learning_rate": 0.00013233082706766918,
570
+ "loss": 0.0109,
571
+ "step": 9000
572
+ },
573
+ {
574
+ "epoch": 34.0,
575
+ "eval_AUC": 0.6566849403418252,
576
+ "eval_F1": 0.6974271887335782,
577
+ "eval_Precision": 0.8814589665653495,
578
+ "eval_Recall": 0.981941309255079,
579
+ "eval_accuracy": 0.8746465598491989,
580
+ "eval_loss": 0.8042259812355042,
581
+ "eval_runtime": 11.739,
582
+ "eval_samples_per_second": 90.382,
583
+ "eval_steps_per_second": 5.707,
584
+ "step": 9044
585
+ },
586
+ {
587
+ "epoch": 35.0,
588
+ "eval_AUC": 0.6961560786842954,
589
+ "eval_F1": 0.7368930485561156,
590
+ "eval_Precision": 0.8944099378881988,
591
+ "eval_Recall": 0.9751693002257337,
592
+ "eval_accuracy": 0.883129123468426,
593
+ "eval_loss": 0.7705923914909363,
594
+ "eval_runtime": 11.6824,
595
+ "eval_samples_per_second": 90.821,
596
+ "eval_steps_per_second": 5.735,
597
+ "step": 9310
598
+ },
599
+ {
600
+ "epoch": 35.714285714285715,
601
+ "grad_norm": 0.0020399852655828,
602
+ "learning_rate": 0.00012857142857142858,
603
+ "loss": 0.0028,
604
+ "step": 9500
605
+ },
606
+ {
607
+ "epoch": 36.0,
608
+ "eval_AUC": 0.712170267655595,
609
+ "eval_F1": 0.7516347009160568,
610
+ "eval_Precision": 0.8997912317327766,
611
+ "eval_Recall": 0.9729119638826185,
612
+ "eval_accuracy": 0.8868991517436381,
613
+ "eval_loss": 0.8394030928611755,
614
+ "eval_runtime": 11.8855,
615
+ "eval_samples_per_second": 89.268,
616
+ "eval_steps_per_second": 5.637,
617
+ "step": 9576
618
+ },
619
+ {
620
+ "epoch": 37.0,
621
+ "eval_AUC": 0.7087487907126733,
622
+ "eval_F1": 0.7474952792646576,
623
+ "eval_Precision": 0.8987473903966597,
624
+ "eval_Recall": 0.9717832957110609,
625
+ "eval_accuracy": 0.8850141376060321,
626
+ "eval_loss": 0.8953573107719421,
627
+ "eval_runtime": 11.8186,
628
+ "eval_samples_per_second": 89.774,
629
+ "eval_steps_per_second": 5.669,
630
+ "step": 9842
631
+ },
632
+ {
633
+ "epoch": 37.59398496240601,
634
+ "grad_norm": 0.0010929929558187723,
635
+ "learning_rate": 0.00012481203007518797,
636
+ "loss": 0.0076,
637
+ "step": 10000
638
+ },
639
+ {
640
+ "epoch": 38.0,
641
+ "eval_AUC": 0.7087487907126733,
642
+ "eval_F1": 0.7474952792646576,
643
+ "eval_Precision": 0.8987473903966597,
644
+ "eval_Recall": 0.9717832957110609,
645
+ "eval_accuracy": 0.8850141376060321,
646
+ "eval_loss": 0.9388997554779053,
647
+ "eval_runtime": 11.6255,
648
+ "eval_samples_per_second": 91.265,
649
+ "eval_steps_per_second": 5.763,
650
+ "step": 10108
651
+ },
652
+ {
653
+ "epoch": 39.0,
654
+ "eval_AUC": 0.7087487907126733,
655
+ "eval_F1": 0.7474952792646576,
656
+ "eval_Precision": 0.8987473903966597,
657
+ "eval_Recall": 0.9717832957110609,
658
+ "eval_accuracy": 0.8850141376060321,
659
+ "eval_loss": 0.9697290062904358,
660
+ "eval_runtime": 11.5778,
661
+ "eval_samples_per_second": 91.641,
662
+ "eval_steps_per_second": 5.787,
663
+ "step": 10374
664
+ },
665
+ {
666
+ "epoch": 39.473684210526315,
667
+ "grad_norm": 0.0006237945053726435,
668
+ "learning_rate": 0.00012105263157894738,
669
+ "loss": 0.0001,
670
+ "step": 10500
671
+ },
672
+ {
673
+ "epoch": 40.0,
674
+ "eval_AUC": 0.7087487907126733,
675
+ "eval_F1": 0.7474952792646576,
676
+ "eval_Precision": 0.8987473903966597,
677
+ "eval_Recall": 0.9717832957110609,
678
+ "eval_accuracy": 0.8850141376060321,
679
+ "eval_loss": 0.9953697323799133,
680
+ "eval_runtime": 11.7777,
681
+ "eval_samples_per_second": 90.086,
682
+ "eval_steps_per_second": 5.689,
683
+ "step": 10640
684
+ },
685
+ {
686
+ "epoch": 41.0,
687
+ "eval_AUC": 0.7087487907126733,
688
+ "eval_F1": 0.7474952792646576,
689
+ "eval_Precision": 0.8987473903966597,
690
+ "eval_Recall": 0.9717832957110609,
691
+ "eval_accuracy": 0.8850141376060321,
692
+ "eval_loss": 1.0168683528900146,
693
+ "eval_runtime": 11.7874,
694
+ "eval_samples_per_second": 90.011,
695
+ "eval_steps_per_second": 5.684,
696
+ "step": 10906
697
+ },
698
+ {
699
+ "epoch": 41.35338345864662,
700
+ "grad_norm": 0.000347771099768579,
701
+ "learning_rate": 0.00011729323308270677,
702
+ "loss": 0.0,
703
+ "step": 11000
704
+ },
705
+ {
706
+ "epoch": 42.0,
707
+ "eval_AUC": 0.7093131247984521,
708
+ "eval_F1": 0.7487971197401504,
709
+ "eval_Precision": 0.8988529718456726,
710
+ "eval_Recall": 0.9729119638826185,
711
+ "eval_accuracy": 0.885956644674835,
712
+ "eval_loss": 1.038093090057373,
713
+ "eval_runtime": 11.6656,
714
+ "eval_samples_per_second": 90.951,
715
+ "eval_steps_per_second": 5.743,
716
+ "step": 11172
717
+ },
718
+ {
719
+ "epoch": 43.0,
720
+ "eval_AUC": 0.7093131247984521,
721
+ "eval_F1": 0.7487971197401504,
722
+ "eval_Precision": 0.8988529718456726,
723
+ "eval_Recall": 0.9729119638826185,
724
+ "eval_accuracy": 0.885956644674835,
725
+ "eval_loss": 1.0582064390182495,
726
+ "eval_runtime": 11.7863,
727
+ "eval_samples_per_second": 90.019,
728
+ "eval_steps_per_second": 5.685,
729
+ "step": 11438
730
+ },
731
+ {
732
+ "epoch": 43.233082706766915,
733
+ "grad_norm": 0.00026405107928439975,
734
+ "learning_rate": 0.00011353383458646618,
735
+ "loss": 0.0,
736
+ "step": 11500
737
+ },
738
+ {
739
+ "epoch": 44.0,
740
+ "eval_AUC": 0.7093131247984521,
741
+ "eval_F1": 0.7487971197401504,
742
+ "eval_Precision": 0.8988529718456726,
743
+ "eval_Recall": 0.9729119638826185,
744
+ "eval_accuracy": 0.885956644674835,
745
+ "eval_loss": 1.0762717723846436,
746
+ "eval_runtime": 11.6351,
747
+ "eval_samples_per_second": 91.189,
748
+ "eval_steps_per_second": 5.758,
749
+ "step": 11704
750
+ },
751
+ {
752
+ "epoch": 45.0,
753
+ "eval_AUC": 0.7093131247984521,
754
+ "eval_F1": 0.7487971197401504,
755
+ "eval_Precision": 0.8988529718456726,
756
+ "eval_Recall": 0.9729119638826185,
757
+ "eval_accuracy": 0.885956644674835,
758
+ "eval_loss": 1.0936743021011353,
759
+ "eval_runtime": 11.9085,
760
+ "eval_samples_per_second": 89.096,
761
+ "eval_steps_per_second": 5.626,
762
+ "step": 11970
763
+ },
764
+ {
765
+ "epoch": 45.11278195488722,
766
+ "grad_norm": 0.00022154749603942037,
767
+ "learning_rate": 0.00010977443609022557,
768
+ "loss": 0.0,
769
+ "step": 12000
770
+ },
771
+ {
772
+ "epoch": 46.0,
773
+ "eval_AUC": 0.7150274105127379,
774
+ "eval_F1": 0.7544540322094451,
775
+ "eval_Precision": 0.9007314524555904,
776
+ "eval_Recall": 0.9729119638826185,
777
+ "eval_accuracy": 0.8878416588124411,
778
+ "eval_loss": 1.1094835996627808,
779
+ "eval_runtime": 11.7724,
780
+ "eval_samples_per_second": 90.126,
781
+ "eval_steps_per_second": 5.691,
782
+ "step": 12236
783
+ },
784
+ {
785
+ "epoch": 46.99248120300752,
786
+ "grad_norm": 0.00019688473548740149,
787
+ "learning_rate": 0.00010601503759398497,
788
+ "loss": 0.0,
789
+ "step": 12500
790
+ },
791
+ {
792
+ "epoch": 47.0,
793
+ "eval_AUC": 0.7150274105127379,
794
+ "eval_F1": 0.7544540322094451,
795
+ "eval_Precision": 0.9007314524555904,
796
+ "eval_Recall": 0.9729119638826185,
797
+ "eval_accuracy": 0.8878416588124411,
798
+ "eval_loss": 1.1262503862380981,
799
+ "eval_runtime": 11.5453,
800
+ "eval_samples_per_second": 91.899,
801
+ "eval_steps_per_second": 5.803,
802
+ "step": 12502
803
+ },
804
+ {
805
+ "epoch": 48.0,
806
+ "eval_AUC": 0.7150274105127379,
807
+ "eval_F1": 0.7544540322094451,
808
+ "eval_Precision": 0.9007314524555904,
809
+ "eval_Recall": 0.9729119638826185,
810
+ "eval_accuracy": 0.8878416588124411,
811
+ "eval_loss": 1.1426563262939453,
812
+ "eval_runtime": 11.6837,
813
+ "eval_samples_per_second": 90.81,
814
+ "eval_steps_per_second": 5.734,
815
+ "step": 12768
816
+ },
817
+ {
818
+ "epoch": 48.87218045112782,
819
+ "grad_norm": 0.0001134950143750757,
820
+ "learning_rate": 0.00010225563909774436,
821
+ "loss": 0.0,
822
+ "step": 13000
823
+ },
824
+ {
825
+ "epoch": 49.0,
826
+ "eval_AUC": 0.7150274105127379,
827
+ "eval_F1": 0.7544540322094451,
828
+ "eval_Precision": 0.9007314524555904,
829
+ "eval_Recall": 0.9729119638826185,
830
+ "eval_accuracy": 0.8878416588124411,
831
+ "eval_loss": 1.1587177515029907,
832
+ "eval_runtime": 11.7191,
833
+ "eval_samples_per_second": 90.536,
834
+ "eval_steps_per_second": 5.717,
835
+ "step": 13034
836
+ },
837
+ {
838
+ "epoch": 50.0,
839
+ "eval_AUC": 0.7150274105127379,
840
+ "eval_F1": 0.7544540322094451,
841
+ "eval_Precision": 0.9007314524555904,
842
+ "eval_Recall": 0.9729119638826185,
843
+ "eval_accuracy": 0.8878416588124411,
844
+ "eval_loss": 1.174465537071228,
845
+ "eval_runtime": 11.8222,
846
+ "eval_samples_per_second": 89.747,
847
+ "eval_steps_per_second": 5.667,
848
+ "step": 13300
849
+ },
850
+ {
851
+ "epoch": 50.75187969924812,
852
+ "grad_norm": 9.584094368619844e-05,
853
+ "learning_rate": 9.849624060150377e-05,
854
+ "loss": 0.0,
855
+ "step": 13500
856
+ },
857
+ {
858
+ "epoch": 51.0,
859
+ "eval_AUC": 0.7150274105127379,
860
+ "eval_F1": 0.7544540322094451,
861
+ "eval_Precision": 0.9007314524555904,
862
+ "eval_Recall": 0.9729119638826185,
863
+ "eval_accuracy": 0.8878416588124411,
864
+ "eval_loss": 1.1900520324707031,
865
+ "eval_runtime": 11.7601,
866
+ "eval_samples_per_second": 90.22,
867
+ "eval_steps_per_second": 5.697,
868
+ "step": 13566
869
+ },
870
+ {
871
+ "epoch": 52.0,
872
+ "eval_AUC": 0.7178845533698807,
873
+ "eval_F1": 0.7572553125484722,
874
+ "eval_Precision": 0.9016736401673641,
875
+ "eval_Recall": 0.9729119638826185,
876
+ "eval_accuracy": 0.8887841658812441,
877
+ "eval_loss": 1.2051938772201538,
878
+ "eval_runtime": 11.9347,
879
+ "eval_samples_per_second": 88.901,
880
+ "eval_steps_per_second": 5.614,
881
+ "step": 13832
882
+ },
883
+ {
884
+ "epoch": 52.63157894736842,
885
+ "grad_norm": 7.240776903927326e-05,
886
+ "learning_rate": 9.473684210526316e-05,
887
+ "loss": 0.0,
888
+ "step": 14000
889
+ },
890
+ {
891
+ "epoch": 53.0,
892
+ "eval_AUC": 0.7178845533698807,
893
+ "eval_F1": 0.7572553125484722,
894
+ "eval_Precision": 0.9016736401673641,
895
+ "eval_Recall": 0.9729119638826185,
896
+ "eval_accuracy": 0.8887841658812441,
897
+ "eval_loss": 1.2201390266418457,
898
+ "eval_runtime": 11.8013,
899
+ "eval_samples_per_second": 89.905,
900
+ "eval_steps_per_second": 5.677,
901
+ "step": 14098
902
+ },
903
+ {
904
+ "epoch": 54.0,
905
+ "eval_AUC": 0.7178845533698807,
906
+ "eval_F1": 0.7572553125484722,
907
+ "eval_Precision": 0.9016736401673641,
908
+ "eval_Recall": 0.9729119638826185,
909
+ "eval_accuracy": 0.8887841658812441,
910
+ "eval_loss": 1.2349706888198853,
911
+ "eval_runtime": 11.8152,
912
+ "eval_samples_per_second": 89.8,
913
+ "eval_steps_per_second": 5.671,
914
+ "step": 14364
915
+ },
916
+ {
917
+ "epoch": 54.51127819548872,
918
+ "grad_norm": 4.7142420953605324e-05,
919
+ "learning_rate": 9.097744360902256e-05,
920
+ "loss": 0.0,
921
+ "step": 14500
922
+ },
923
+ {
924
+ "epoch": 55.0,
925
+ "eval_AUC": 0.7178845533698807,
926
+ "eval_F1": 0.7572553125484722,
927
+ "eval_Precision": 0.9016736401673641,
928
+ "eval_Recall": 0.9729119638826185,
929
+ "eval_accuracy": 0.8887841658812441,
930
+ "eval_loss": 1.249691367149353,
931
+ "eval_runtime": 11.9642,
932
+ "eval_samples_per_second": 88.682,
933
+ "eval_steps_per_second": 5.6,
934
+ "step": 14630
935
+ },
936
+ {
937
+ "epoch": 56.0,
938
+ "eval_AUC": 0.7178845533698807,
939
+ "eval_F1": 0.7572553125484722,
940
+ "eval_Precision": 0.9016736401673641,
941
+ "eval_Recall": 0.9729119638826185,
942
+ "eval_accuracy": 0.8887841658812441,
943
+ "eval_loss": 1.2640849351882935,
944
+ "eval_runtime": 11.9363,
945
+ "eval_samples_per_second": 88.889,
946
+ "eval_steps_per_second": 5.613,
947
+ "step": 14896
948
+ },
949
+ {
950
+ "epoch": 56.390977443609025,
951
+ "grad_norm": 4.406652442412451e-05,
952
+ "learning_rate": 8.721804511278195e-05,
953
+ "loss": 0.0,
954
+ "step": 15000
955
+ },
956
+ {
957
+ "epoch": 57.0,
958
+ "eval_AUC": 0.7178845533698807,
959
+ "eval_F1": 0.7572553125484722,
960
+ "eval_Precision": 0.9016736401673641,
961
+ "eval_Recall": 0.9729119638826185,
962
+ "eval_accuracy": 0.8887841658812441,
963
+ "eval_loss": 1.2785232067108154,
964
+ "eval_runtime": 11.9798,
965
+ "eval_samples_per_second": 88.566,
966
+ "eval_steps_per_second": 5.593,
967
+ "step": 15162
968
+ },
969
+ {
970
+ "epoch": 58.0,
971
+ "eval_AUC": 0.7178845533698807,
972
+ "eval_F1": 0.7572553125484722,
973
+ "eval_Precision": 0.9016736401673641,
974
+ "eval_Recall": 0.9729119638826185,
975
+ "eval_accuracy": 0.8887841658812441,
976
+ "eval_loss": 1.2925220727920532,
977
+ "eval_runtime": 11.9038,
978
+ "eval_samples_per_second": 89.131,
979
+ "eval_steps_per_second": 5.628,
980
+ "step": 15428
981
+ },
982
+ {
983
+ "epoch": 58.27067669172932,
984
+ "grad_norm": 2.6122717827092856e-05,
985
+ "learning_rate": 8.345864661654136e-05,
986
+ "loss": 0.0,
987
+ "step": 15500
988
+ },
989
+ {
990
+ "epoch": 59.0,
991
+ "eval_AUC": 0.7178845533698807,
992
+ "eval_F1": 0.7572553125484722,
993
+ "eval_Precision": 0.9016736401673641,
994
+ "eval_Recall": 0.9729119638826185,
995
+ "eval_accuracy": 0.8887841658812441,
996
+ "eval_loss": 1.3067699670791626,
997
+ "eval_runtime": 11.8267,
998
+ "eval_samples_per_second": 89.713,
999
+ "eval_steps_per_second": 5.665,
1000
+ "step": 15694
1001
+ },
1002
+ {
1003
+ "epoch": 60.0,
1004
+ "eval_AUC": 0.7178845533698807,
1005
+ "eval_F1": 0.7572553125484722,
1006
+ "eval_Precision": 0.9016736401673641,
1007
+ "eval_Recall": 0.9729119638826185,
1008
+ "eval_accuracy": 0.8887841658812441,
1009
+ "eval_loss": 1.3207120895385742,
1010
+ "eval_runtime": 11.6077,
1011
+ "eval_samples_per_second": 91.405,
1012
+ "eval_steps_per_second": 5.772,
1013
+ "step": 15960
1014
+ },
1015
+ {
1016
+ "epoch": 60.150375939849624,
1017
+ "grad_norm": 2.532277903810609e-05,
1018
+ "learning_rate": 7.969924812030075e-05,
1019
+ "loss": 0.0,
1020
+ "step": 16000
1021
+ },
1022
+ {
1023
+ "epoch": 61.0,
1024
+ "eval_AUC": 0.7178845533698807,
1025
+ "eval_F1": 0.7572553125484722,
1026
+ "eval_Precision": 0.9016736401673641,
1027
+ "eval_Recall": 0.9729119638826185,
1028
+ "eval_accuracy": 0.8887841658812441,
1029
+ "eval_loss": 1.3345941305160522,
1030
+ "eval_runtime": 11.8443,
1031
+ "eval_samples_per_second": 89.579,
1032
+ "eval_steps_per_second": 5.657,
1033
+ "step": 16226
1034
+ },
1035
+ {
1036
+ "epoch": 62.0,
1037
+ "eval_AUC": 0.7178845533698807,
1038
+ "eval_F1": 0.7572553125484722,
1039
+ "eval_Precision": 0.9016736401673641,
1040
+ "eval_Recall": 0.9729119638826185,
1041
+ "eval_accuracy": 0.8887841658812441,
1042
+ "eval_loss": 1.3484621047973633,
1043
+ "eval_runtime": 11.6458,
1044
+ "eval_samples_per_second": 91.106,
1045
+ "eval_steps_per_second": 5.753,
1046
+ "step": 16492
1047
+ },
1048
+ {
1049
+ "epoch": 62.03007518796993,
1050
+ "grad_norm": 1.7661703168414533e-05,
1051
+ "learning_rate": 7.593984962406016e-05,
1052
+ "loss": 0.0,
1053
+ "step": 16500
1054
+ },
1055
+ {
1056
+ "epoch": 63.0,
1057
+ "eval_AUC": 0.7178845533698807,
1058
+ "eval_F1": 0.7572553125484722,
1059
+ "eval_Precision": 0.9016736401673641,
1060
+ "eval_Recall": 0.9729119638826185,
1061
+ "eval_accuracy": 0.8887841658812441,
1062
+ "eval_loss": 1.3622149229049683,
1063
+ "eval_runtime": 11.7437,
1064
+ "eval_samples_per_second": 90.346,
1065
+ "eval_steps_per_second": 5.705,
1066
+ "step": 16758
1067
+ },
1068
+ {
1069
+ "epoch": 63.909774436090224,
1070
+ "grad_norm": 1.633859210414812e-05,
1071
+ "learning_rate": 7.218045112781955e-05,
1072
+ "loss": 0.0,
1073
+ "step": 17000
1074
+ },
1075
+ {
1076
+ "epoch": 64.0,
1077
+ "eval_AUC": 0.7178845533698807,
1078
+ "eval_F1": 0.7572553125484722,
1079
+ "eval_Precision": 0.9016736401673641,
1080
+ "eval_Recall": 0.9729119638826185,
1081
+ "eval_accuracy": 0.8887841658812441,
1082
+ "eval_loss": 1.3757728338241577,
1083
+ "eval_runtime": 11.6035,
1084
+ "eval_samples_per_second": 91.438,
1085
+ "eval_steps_per_second": 5.774,
1086
+ "step": 17024
1087
+ },
1088
+ {
1089
+ "epoch": 65.0,
1090
+ "eval_AUC": 0.7178845533698807,
1091
+ "eval_F1": 0.7572553125484722,
1092
+ "eval_Precision": 0.9016736401673641,
1093
+ "eval_Recall": 0.9729119638826185,
1094
+ "eval_accuracy": 0.8887841658812441,
1095
+ "eval_loss": 1.3893355131149292,
1096
+ "eval_runtime": 11.8424,
1097
+ "eval_samples_per_second": 89.593,
1098
+ "eval_steps_per_second": 5.658,
1099
+ "step": 17290
1100
+ },
1101
+ {
1102
+ "epoch": 65.78947368421052,
1103
+ "grad_norm": 1.2574956599564757e-05,
1104
+ "learning_rate": 6.842105263157895e-05,
1105
+ "loss": 0.0,
1106
+ "step": 17500
1107
+ },
1108
+ {
1109
+ "epoch": 66.0,
1110
+ "eval_AUC": 0.7178845533698807,
1111
+ "eval_F1": 0.7572553125484722,
1112
+ "eval_Precision": 0.9016736401673641,
1113
+ "eval_Recall": 0.9729119638826185,
1114
+ "eval_accuracy": 0.8887841658812441,
1115
+ "eval_loss": 1.40289306640625,
1116
+ "eval_runtime": 11.9699,
1117
+ "eval_samples_per_second": 88.639,
1118
+ "eval_steps_per_second": 5.597,
1119
+ "step": 17556
1120
+ },
1121
+ {
1122
+ "epoch": 67.0,
1123
+ "eval_AUC": 0.7178845533698807,
1124
+ "eval_F1": 0.7572553125484722,
1125
+ "eval_Precision": 0.9016736401673641,
1126
+ "eval_Recall": 0.9729119638826185,
1127
+ "eval_accuracy": 0.8887841658812441,
1128
+ "eval_loss": 1.4165505170822144,
1129
+ "eval_runtime": 11.6917,
1130
+ "eval_samples_per_second": 90.748,
1131
+ "eval_steps_per_second": 5.731,
1132
+ "step": 17822
1133
+ },
1134
+ {
1135
+ "epoch": 67.66917293233082,
1136
+ "grad_norm": 1.0964651664835401e-05,
1137
+ "learning_rate": 6.466165413533834e-05,
1138
+ "loss": 0.0,
1139
+ "step": 18000
1140
+ },
1141
+ {
1142
+ "epoch": 68.0,
1143
+ "eval_AUC": 0.7178845533698807,
1144
+ "eval_F1": 0.7572553125484722,
1145
+ "eval_Precision": 0.9016736401673641,
1146
+ "eval_Recall": 0.9729119638826185,
1147
+ "eval_accuracy": 0.8887841658812441,
1148
+ "eval_loss": 1.4297924041748047,
1149
+ "eval_runtime": 11.881,
1150
+ "eval_samples_per_second": 89.302,
1151
+ "eval_steps_per_second": 5.639,
1152
+ "step": 18088
1153
+ },
1154
+ {
1155
+ "epoch": 69.0,
1156
+ "eval_AUC": 0.7178845533698807,
1157
+ "eval_F1": 0.7572553125484722,
1158
+ "eval_Precision": 0.9016736401673641,
1159
+ "eval_Recall": 0.9729119638826185,
1160
+ "eval_accuracy": 0.8887841658812441,
1161
+ "eval_loss": 1.4431047439575195,
1162
+ "eval_runtime": 11.6298,
1163
+ "eval_samples_per_second": 91.231,
1164
+ "eval_steps_per_second": 5.761,
1165
+ "step": 18354
1166
+ },
1167
+ {
1168
+ "epoch": 69.54887218045113,
1169
+ "grad_norm": 8.276247172034346e-06,
1170
+ "learning_rate": 6.090225563909775e-05,
1171
+ "loss": 0.0,
1172
+ "step": 18500
1173
+ },
1174
+ {
1175
+ "epoch": 70.0,
1176
+ "eval_AUC": 0.7178845533698807,
1177
+ "eval_F1": 0.7572553125484722,
1178
+ "eval_Precision": 0.9016736401673641,
1179
+ "eval_Recall": 0.9729119638826185,
1180
+ "eval_accuracy": 0.8887841658812441,
1181
+ "eval_loss": 1.4565781354904175,
1182
+ "eval_runtime": 11.6654,
1183
+ "eval_samples_per_second": 90.952,
1184
+ "eval_steps_per_second": 5.743,
1185
+ "step": 18620
1186
+ },
1187
+ {
1188
+ "epoch": 71.0,
1189
+ "eval_AUC": 0.7178845533698807,
1190
+ "eval_F1": 0.7572553125484722,
1191
+ "eval_Precision": 0.9016736401673641,
1192
+ "eval_Recall": 0.9729119638826185,
1193
+ "eval_accuracy": 0.8887841658812441,
1194
+ "eval_loss": 1.4694792032241821,
1195
+ "eval_runtime": 12.0384,
1196
+ "eval_samples_per_second": 88.134,
1197
+ "eval_steps_per_second": 5.566,
1198
+ "step": 18886
1199
+ },
1200
+ {
1201
+ "epoch": 71.42857142857143,
1202
+ "grad_norm": 7.255929176608333e-06,
1203
+ "learning_rate": 5.714285714285714e-05,
1204
+ "loss": 0.0,
1205
+ "step": 19000
1206
+ },
1207
+ {
1208
+ "epoch": 72.0,
1209
+ "eval_AUC": 0.7178845533698807,
1210
+ "eval_F1": 0.7572553125484722,
1211
+ "eval_Precision": 0.9016736401673641,
1212
+ "eval_Recall": 0.9729119638826185,
1213
+ "eval_accuracy": 0.8887841658812441,
1214
+ "eval_loss": 1.482446551322937,
1215
+ "eval_runtime": 11.6854,
1216
+ "eval_samples_per_second": 90.797,
1217
+ "eval_steps_per_second": 5.734,
1218
+ "step": 19152
1219
+ },
1220
+ {
1221
+ "epoch": 73.0,
1222
+ "eval_AUC": 0.7178845533698807,
1223
+ "eval_F1": 0.7572553125484722,
1224
+ "eval_Precision": 0.9016736401673641,
1225
+ "eval_Recall": 0.9729119638826185,
1226
+ "eval_accuracy": 0.8887841658812441,
1227
+ "eval_loss": 1.4949710369110107,
1228
+ "eval_runtime": 11.7291,
1229
+ "eval_samples_per_second": 90.459,
1230
+ "eval_steps_per_second": 5.712,
1231
+ "step": 19418
1232
+ },
1233
+ {
1234
+ "epoch": 73.30827067669173,
1235
+ "grad_norm": 5.73582974539022e-06,
1236
+ "learning_rate": 5.338345864661655e-05,
1237
+ "loss": 0.0,
1238
+ "step": 19500
1239
+ },
1240
+ {
1241
+ "epoch": 74.0,
1242
+ "eval_AUC": 0.7178845533698807,
1243
+ "eval_F1": 0.7572553125484722,
1244
+ "eval_Precision": 0.9016736401673641,
1245
+ "eval_Recall": 0.9729119638826185,
1246
+ "eval_accuracy": 0.8887841658812441,
1247
+ "eval_loss": 1.50760817527771,
1248
+ "eval_runtime": 11.6937,
1249
+ "eval_samples_per_second": 90.732,
1250
+ "eval_steps_per_second": 5.73,
1251
+ "step": 19684
1252
+ },
1253
+ {
1254
+ "epoch": 75.0,
1255
+ "eval_AUC": 0.7178845533698807,
1256
+ "eval_F1": 0.7572553125484722,
1257
+ "eval_Precision": 0.9016736401673641,
1258
+ "eval_Recall": 0.9729119638826185,
1259
+ "eval_accuracy": 0.8887841658812441,
1260
+ "eval_loss": 1.5201044082641602,
1261
+ "eval_runtime": 11.779,
1262
+ "eval_samples_per_second": 90.075,
1263
+ "eval_steps_per_second": 5.688,
1264
+ "step": 19950
1265
+ },
1266
+ {
1267
+ "epoch": 75.18796992481202,
1268
+ "grad_norm": 3.381761189302779e-06,
1269
+ "learning_rate": 4.9624060150375936e-05,
1270
+ "loss": 0.0,
1271
+ "step": 20000
1272
+ },
1273
+ {
1274
+ "epoch": 76.0,
1275
+ "eval_AUC": 0.7178845533698807,
1276
+ "eval_F1": 0.7572553125484722,
1277
+ "eval_Precision": 0.9016736401673641,
1278
+ "eval_Recall": 0.9729119638826185,
1279
+ "eval_accuracy": 0.8887841658812441,
1280
+ "eval_loss": 1.5320940017700195,
1281
+ "eval_runtime": 11.7044,
1282
+ "eval_samples_per_second": 90.65,
1283
+ "eval_steps_per_second": 5.724,
1284
+ "step": 20216
1285
+ },
1286
+ {
1287
+ "epoch": 77.0,
1288
+ "eval_AUC": 0.7178845533698807,
1289
+ "eval_F1": 0.7572553125484722,
1290
+ "eval_Precision": 0.9016736401673641,
1291
+ "eval_Recall": 0.9729119638826185,
1292
+ "eval_accuracy": 0.8887841658812441,
1293
+ "eval_loss": 1.5440773963928223,
1294
+ "eval_runtime": 11.8766,
1295
+ "eval_samples_per_second": 89.335,
1296
+ "eval_steps_per_second": 5.641,
1297
+ "step": 20482
1298
+ },
1299
+ {
1300
+ "epoch": 77.06766917293233,
1301
+ "grad_norm": 4.261892627255293e-06,
1302
+ "learning_rate": 4.586466165413534e-05,
1303
+ "loss": 0.0,
1304
+ "step": 20500
1305
+ },
1306
+ {
1307
+ "epoch": 78.0,
1308
+ "eval_AUC": 0.7178845533698807,
1309
+ "eval_F1": 0.7572553125484722,
1310
+ "eval_Precision": 0.9016736401673641,
1311
+ "eval_Recall": 0.9729119638826185,
1312
+ "eval_accuracy": 0.8887841658812441,
1313
+ "eval_loss": 1.5564316511154175,
1314
+ "eval_runtime": 11.9626,
1315
+ "eval_samples_per_second": 88.693,
1316
+ "eval_steps_per_second": 5.601,
1317
+ "step": 20748
1318
+ },
1319
+ {
1320
+ "epoch": 78.94736842105263,
1321
+ "grad_norm": 2.5668264242995065e-06,
1322
+ "learning_rate": 4.210526315789474e-05,
1323
+ "loss": 0.0,
1324
+ "step": 21000
1325
+ },
1326
+ {
1327
+ "epoch": 79.0,
1328
+ "eval_AUC": 0.7178845533698807,
1329
+ "eval_F1": 0.7572553125484722,
1330
+ "eval_Precision": 0.9016736401673641,
1331
+ "eval_Recall": 0.9729119638826185,
1332
+ "eval_accuracy": 0.8887841658812441,
1333
+ "eval_loss": 1.5691113471984863,
1334
+ "eval_runtime": 11.9711,
1335
+ "eval_samples_per_second": 88.63,
1336
+ "eval_steps_per_second": 5.597,
1337
+ "step": 21014
1338
+ },
1339
+ {
1340
+ "epoch": 80.0,
1341
+ "eval_AUC": 0.7178845533698807,
1342
+ "eval_F1": 0.7572553125484722,
1343
+ "eval_Precision": 0.9016736401673641,
1344
+ "eval_Recall": 0.9729119638826185,
1345
+ "eval_accuracy": 0.8887841658812441,
1346
+ "eval_loss": 1.5799812078475952,
1347
+ "eval_runtime": 11.8905,
1348
+ "eval_samples_per_second": 89.231,
1349
+ "eval_steps_per_second": 5.635,
1350
+ "step": 21280
1351
+ },
1352
+ {
1353
+ "epoch": 80.82706766917293,
1354
+ "grad_norm": 1.7882749716591206e-06,
1355
+ "learning_rate": 3.834586466165413e-05,
1356
+ "loss": 0.0,
1357
+ "step": 21500
1358
+ },
1359
+ {
1360
+ "epoch": 81.0,
1361
+ "eval_AUC": 0.7178845533698807,
1362
+ "eval_F1": 0.7572553125484722,
1363
+ "eval_Precision": 0.9016736401673641,
1364
+ "eval_Recall": 0.9729119638826185,
1365
+ "eval_accuracy": 0.8887841658812441,
1366
+ "eval_loss": 1.5909699201583862,
1367
+ "eval_runtime": 11.8739,
1368
+ "eval_samples_per_second": 89.355,
1369
+ "eval_steps_per_second": 5.643,
1370
+ "step": 21546
1371
+ },
1372
+ {
1373
+ "epoch": 82.0,
1374
+ "eval_AUC": 0.7178845533698807,
1375
+ "eval_F1": 0.7572553125484722,
1376
+ "eval_Precision": 0.9016736401673641,
1377
+ "eval_Recall": 0.9729119638826185,
1378
+ "eval_accuracy": 0.8887841658812441,
1379
+ "eval_loss": 1.6020997762680054,
1380
+ "eval_runtime": 11.7593,
1381
+ "eval_samples_per_second": 90.226,
1382
+ "eval_steps_per_second": 5.698,
1383
+ "step": 21812
1384
+ },
1385
+ {
1386
+ "epoch": 82.70676691729324,
1387
+ "grad_norm": 3.307637371108285e-06,
1388
+ "learning_rate": 3.458646616541353e-05,
1389
+ "loss": 0.0,
1390
+ "step": 22000
1391
+ },
1392
+ {
1393
+ "epoch": 83.0,
1394
+ "eval_AUC": 0.7178845533698807,
1395
+ "eval_F1": 0.7572553125484722,
1396
+ "eval_Precision": 0.9016736401673641,
1397
+ "eval_Recall": 0.9729119638826185,
1398
+ "eval_accuracy": 0.8887841658812441,
1399
+ "eval_loss": 1.6133201122283936,
1400
+ "eval_runtime": 11.7049,
1401
+ "eval_samples_per_second": 90.645,
1402
+ "eval_steps_per_second": 5.724,
1403
+ "step": 22078
1404
+ },
1405
+ {
1406
+ "epoch": 84.0,
1407
+ "eval_AUC": 0.7178845533698807,
1408
+ "eval_F1": 0.7572553125484722,
1409
+ "eval_Precision": 0.9016736401673641,
1410
+ "eval_Recall": 0.9729119638826185,
1411
+ "eval_accuracy": 0.8887841658812441,
1412
+ "eval_loss": 1.6243833303451538,
1413
+ "eval_runtime": 11.7522,
1414
+ "eval_samples_per_second": 90.281,
1415
+ "eval_steps_per_second": 5.701,
1416
+ "step": 22344
1417
+ },
1418
+ {
1419
+ "epoch": 84.58646616541354,
1420
+ "grad_norm": 2.0808365661650896e-06,
1421
+ "learning_rate": 3.082706766917293e-05,
1422
+ "loss": 0.0,
1423
+ "step": 22500
1424
+ },
1425
+ {
1426
+ "epoch": 85.0,
1427
+ "eval_AUC": 0.7178845533698807,
1428
+ "eval_F1": 0.7572553125484722,
1429
+ "eval_Precision": 0.9016736401673641,
1430
+ "eval_Recall": 0.9729119638826185,
1431
+ "eval_accuracy": 0.8887841658812441,
1432
+ "eval_loss": 1.6356879472732544,
1433
+ "eval_runtime": 11.8494,
1434
+ "eval_samples_per_second": 89.54,
1435
+ "eval_steps_per_second": 5.654,
1436
+ "step": 22610
1437
+ },
1438
+ {
1439
+ "epoch": 86.0,
1440
+ "eval_AUC": 0.7178845533698807,
1441
+ "eval_F1": 0.7572553125484722,
1442
+ "eval_Precision": 0.9016736401673641,
1443
+ "eval_Recall": 0.9729119638826185,
1444
+ "eval_accuracy": 0.8887841658812441,
1445
+ "eval_loss": 1.646845817565918,
1446
+ "eval_runtime": 11.9457,
1447
+ "eval_samples_per_second": 88.819,
1448
+ "eval_steps_per_second": 5.609,
1449
+ "step": 22876
1450
+ },
1451
+ {
1452
+ "epoch": 86.46616541353383,
1453
+ "grad_norm": 1.6075608755272697e-06,
1454
+ "learning_rate": 2.706766917293233e-05,
1455
+ "loss": 0.0,
1456
+ "step": 23000
1457
+ },
1458
+ {
1459
+ "epoch": 87.0,
1460
+ "eval_AUC": 0.7178845533698807,
1461
+ "eval_F1": 0.7572553125484722,
1462
+ "eval_Precision": 0.9016736401673641,
1463
+ "eval_Recall": 0.9729119638826185,
1464
+ "eval_accuracy": 0.8887841658812441,
1465
+ "eval_loss": 1.6580337285995483,
1466
+ "eval_runtime": 11.988,
1467
+ "eval_samples_per_second": 88.505,
1468
+ "eval_steps_per_second": 5.589,
1469
+ "step": 23142
1470
+ },
1471
+ {
1472
+ "epoch": 88.0,
1473
+ "eval_AUC": 0.7178845533698807,
1474
+ "eval_F1": 0.7572553125484722,
1475
+ "eval_Precision": 0.9016736401673641,
1476
+ "eval_Recall": 0.9729119638826185,
1477
+ "eval_accuracy": 0.8887841658812441,
1478
+ "eval_loss": 1.6693716049194336,
1479
+ "eval_runtime": 11.563,
1480
+ "eval_samples_per_second": 91.758,
1481
+ "eval_steps_per_second": 5.794,
1482
+ "step": 23408
1483
+ },
1484
+ {
1485
+ "epoch": 88.34586466165413,
1486
+ "grad_norm": 1.4785607618250651e-06,
1487
+ "learning_rate": 2.3308270676691728e-05,
1488
+ "loss": 0.0,
1489
+ "step": 23500
1490
+ },
1491
+ {
1492
+ "epoch": 89.0,
1493
+ "eval_AUC": 0.7178845533698807,
1494
+ "eval_F1": 0.7572553125484722,
1495
+ "eval_Precision": 0.9016736401673641,
1496
+ "eval_Recall": 0.9729119638826185,
1497
+ "eval_accuracy": 0.8887841658812441,
1498
+ "eval_loss": 1.6805604696273804,
1499
+ "eval_runtime": 11.7292,
1500
+ "eval_samples_per_second": 90.458,
1501
+ "eval_steps_per_second": 5.712,
1502
+ "step": 23674
1503
+ },
1504
+ {
1505
+ "epoch": 90.0,
1506
+ "eval_AUC": 0.7178845533698807,
1507
+ "eval_F1": 0.7572553125484722,
1508
+ "eval_Precision": 0.9016736401673641,
1509
+ "eval_Recall": 0.9729119638826185,
1510
+ "eval_accuracy": 0.8887841658812441,
1511
+ "eval_loss": 1.6876078844070435,
1512
+ "eval_runtime": 11.6884,
1513
+ "eval_samples_per_second": 90.774,
1514
+ "eval_steps_per_second": 5.732,
1515
+ "step": 23940
1516
+ },
1517
+ {
1518
+ "epoch": 90.22556390977444,
1519
+ "grad_norm": 7.791019811520528e-07,
1520
+ "learning_rate": 1.954887218045113e-05,
1521
+ "loss": 0.0,
1522
+ "step": 24000
1523
+ },
1524
+ {
1525
+ "epoch": 91.0,
1526
+ "eval_AUC": 0.7178845533698807,
1527
+ "eval_F1": 0.7572553125484722,
1528
+ "eval_Precision": 0.9016736401673641,
1529
+ "eval_Recall": 0.9729119638826185,
1530
+ "eval_accuracy": 0.8887841658812441,
1531
+ "eval_loss": 1.6937507390975952,
1532
+ "eval_runtime": 11.7263,
1533
+ "eval_samples_per_second": 90.481,
1534
+ "eval_steps_per_second": 5.714,
1535
+ "step": 24206
1536
+ },
1537
+ {
1538
+ "epoch": 92.0,
1539
+ "eval_AUC": 0.7178845533698807,
1540
+ "eval_F1": 0.7572553125484722,
1541
+ "eval_Precision": 0.9016736401673641,
1542
+ "eval_Recall": 0.9729119638826185,
1543
+ "eval_accuracy": 0.8887841658812441,
1544
+ "eval_loss": 1.6996102333068848,
1545
+ "eval_runtime": 11.8252,
1546
+ "eval_samples_per_second": 89.723,
1547
+ "eval_steps_per_second": 5.666,
1548
+ "step": 24472
1549
+ },
1550
+ {
1551
+ "epoch": 92.10526315789474,
1552
+ "grad_norm": 8.590963034293964e-07,
1553
+ "learning_rate": 1.5789473684210526e-05,
1554
+ "loss": 0.0,
1555
+ "step": 24500
1556
+ },
1557
+ {
1558
+ "epoch": 93.0,
1559
+ "eval_AUC": 0.7178845533698807,
1560
+ "eval_F1": 0.7572553125484722,
1561
+ "eval_Precision": 0.9016736401673641,
1562
+ "eval_Recall": 0.9729119638826185,
1563
+ "eval_accuracy": 0.8887841658812441,
1564
+ "eval_loss": 1.705134630203247,
1565
+ "eval_runtime": 12.0159,
1566
+ "eval_samples_per_second": 88.3,
1567
+ "eval_steps_per_second": 5.576,
1568
+ "step": 24738
1569
+ },
1570
+ {
1571
+ "epoch": 93.98496240601504,
1572
+ "grad_norm": 1.100646727536514e-06,
1573
+ "learning_rate": 1.2030075187969925e-05,
1574
+ "loss": 0.0,
1575
+ "step": 25000
1576
+ },
1577
+ {
1578
+ "epoch": 94.0,
1579
+ "eval_AUC": 0.7178845533698807,
1580
+ "eval_F1": 0.7572553125484722,
1581
+ "eval_Precision": 0.9016736401673641,
1582
+ "eval_Recall": 0.9729119638826185,
1583
+ "eval_accuracy": 0.8887841658812441,
1584
+ "eval_loss": 1.7103519439697266,
1585
+ "eval_runtime": 11.7931,
1586
+ "eval_samples_per_second": 89.968,
1587
+ "eval_steps_per_second": 5.681,
1588
+ "step": 25004
1589
+ },
1590
+ {
1591
+ "epoch": 95.0,
1592
+ "eval_AUC": 0.7178845533698807,
1593
+ "eval_F1": 0.7572553125484722,
1594
+ "eval_Precision": 0.9016736401673641,
1595
+ "eval_Recall": 0.9729119638826185,
1596
+ "eval_accuracy": 0.8887841658812441,
1597
+ "eval_loss": 1.715171456336975,
1598
+ "eval_runtime": 11.6391,
1599
+ "eval_samples_per_second": 91.159,
1600
+ "eval_steps_per_second": 5.756,
1601
+ "step": 25270
1602
+ },
1603
+ {
1604
+ "epoch": 95.86466165413533,
1605
+ "grad_norm": 5.422148774414381e-07,
1606
+ "learning_rate": 8.270676691729324e-06,
1607
+ "loss": 0.0,
1608
+ "step": 25500
1609
+ },
1610
+ {
1611
+ "epoch": 96.0,
1612
+ "eval_AUC": 0.7178845533698807,
1613
+ "eval_F1": 0.7572553125484722,
1614
+ "eval_Precision": 0.9016736401673641,
1615
+ "eval_Recall": 0.9729119638826185,
1616
+ "eval_accuracy": 0.8887841658812441,
1617
+ "eval_loss": 1.7194596529006958,
1618
+ "eval_runtime": 11.8706,
1619
+ "eval_samples_per_second": 89.38,
1620
+ "eval_steps_per_second": 5.644,
1621
+ "step": 25536
1622
+ },
1623
+ {
1624
+ "epoch": 97.0,
1625
+ "eval_AUC": 0.7178845533698807,
1626
+ "eval_F1": 0.7572553125484722,
1627
+ "eval_Precision": 0.9016736401673641,
1628
+ "eval_Recall": 0.9729119638826185,
1629
+ "eval_accuracy": 0.8887841658812441,
1630
+ "eval_loss": 1.723157286643982,
1631
+ "eval_runtime": 12.0004,
1632
+ "eval_samples_per_second": 88.414,
1633
+ "eval_steps_per_second": 5.583,
1634
+ "step": 25802
1635
+ },
1636
+ {
1637
+ "epoch": 97.74436090225564,
1638
+ "grad_norm": 8.843226737553778e-07,
1639
+ "learning_rate": 4.511278195488722e-06,
1640
+ "loss": 0.0,
1641
+ "step": 26000
1642
+ },
1643
+ {
1644
+ "epoch": 98.0,
1645
+ "eval_AUC": 0.7178845533698807,
1646
+ "eval_F1": 0.7572553125484722,
1647
+ "eval_Precision": 0.9016736401673641,
1648
+ "eval_Recall": 0.9729119638826185,
1649
+ "eval_accuracy": 0.8887841658812441,
1650
+ "eval_loss": 1.7260410785675049,
1651
+ "eval_runtime": 11.8627,
1652
+ "eval_samples_per_second": 89.44,
1653
+ "eval_steps_per_second": 5.648,
1654
+ "step": 26068
1655
+ },
1656
+ {
1657
+ "epoch": 99.0,
1658
+ "eval_AUC": 0.7178845533698807,
1659
+ "eval_F1": 0.7572553125484722,
1660
+ "eval_Precision": 0.9016736401673641,
1661
+ "eval_Recall": 0.9729119638826185,
1662
+ "eval_accuracy": 0.8887841658812441,
1663
+ "eval_loss": 1.7279813289642334,
1664
+ "eval_runtime": 12.0306,
1665
+ "eval_samples_per_second": 88.192,
1666
+ "eval_steps_per_second": 5.569,
1667
+ "step": 26334
1668
+ },
1669
+ {
1670
+ "epoch": 99.62406015037594,
1671
+ "grad_norm": 6.334667546070705e-07,
1672
+ "learning_rate": 7.518796992481203e-07,
1673
+ "loss": 0.0,
1674
+ "step": 26500
1675
+ },
1676
+ {
1677
+ "epoch": 100.0,
1678
+ "eval_AUC": 0.7178845533698807,
1679
+ "eval_F1": 0.7572553125484722,
1680
+ "eval_Precision": 0.9016736401673641,
1681
+ "eval_Recall": 0.9729119638826185,
1682
+ "eval_accuracy": 0.8887841658812441,
1683
+ "eval_loss": 1.7286875247955322,
1684
+ "eval_runtime": 11.7137,
1685
+ "eval_samples_per_second": 90.578,
1686
+ "eval_steps_per_second": 5.72,
1687
+ "step": 26600
1688
+ },
1689
+ {
1690
+ "epoch": 100.0,
1691
+ "step": 26600,
1692
+ "total_flos": 3.2879851193471386e+19,
1693
+ "train_loss": 0.05622970362024654,
1694
+ "train_runtime": 7456.0224,
1695
+ "train_samples_per_second": 56.907,
1696
+ "train_steps_per_second": 3.568
1697
  }
1698
  ],
1699
  "logging_steps": 500,
1700
+ "max_steps": 26600,
1701
  "num_input_tokens_seen": 0,
1702
+ "num_train_epochs": 100,
1703
  "save_steps": 500,
1704
  "stateful_callbacks": {
1705
  "TrainerControl": {
 
1713
  "attributes": {}
1714
  }
1715
  },
1716
+ "total_flos": 3.2879851193471386e+19,
1717
+ "train_batch_size": 16,
1718
  "trial_name": null,
1719
  "trial_params": null
1720
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2083fb31a3069b3585c96d3f3553dcb97dd69189e01137cd1ad0b76d96a137be
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c95be8dfa078f207769e6a45a60c21d63ea3efa725e9cccfa4863abae40541
3
  size 4731