nbroad HF staff commited on
Commit
2b187d2
1 Parent(s): 31862b1

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +19 -0
  2. test_results.json +13 -0
  3. train_results.json +8 -0
  4. trainer_state.json +175 -0
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_samples": 1233,
4
+ "test_false_f1": 0.789407313997478,
5
+ "test_loss": 0.616358757019043,
6
+ "test_macro_f1": 0.6733562800163174,
7
+ "test_micro_f1": 0.7923763179237632,
8
+ "test_mixture_f1": 0.5283950617283951,
9
+ "test_runtime": 37.1249,
10
+ "test_samples_per_second": 33.212,
11
+ "test_steps_per_second": 1.051,
12
+ "test_true_f1": 0.9064869418702612,
13
+ "test_unproven_f1": 0.46913580246913583,
14
+ "train_loss": 0.49966207906692944,
15
+ "train_runtime": 2323.0594,
16
+ "train_samples": 9804,
17
+ "train_samples_per_second": 12.661,
18
+ "train_steps_per_second": 0.792
19
+ }
test_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_samples": 1233,
3
+ "test_false_f1": 0.789407313997478,
4
+ "test_loss": 0.616358757019043,
5
+ "test_macro_f1": 0.6733562800163174,
6
+ "test_micro_f1": 0.7923763179237632,
7
+ "test_mixture_f1": 0.5283950617283951,
8
+ "test_runtime": 37.1249,
9
+ "test_samples_per_second": 33.212,
10
+ "test_steps_per_second": 1.051,
11
+ "test_true_f1": 0.9064869418702612,
12
+ "test_unproven_f1": 0.46913580246913583
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.49966207906692944,
4
+ "train_runtime": 2323.0594,
5
+ "train_samples": 9804,
6
+ "train_samples_per_second": 12.661,
7
+ "train_steps_per_second": 0.792
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.812191103789127,
3
+ "best_model_checkpoint": "./longformer-base-health-fact2/checkpoint-1839",
4
+ "epoch": 3.0,
5
+ "global_step": 1839,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.16,
12
+ "learning_rate": 1.597826086956522e-05,
13
+ "loss": 1.1814,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.33,
18
+ "learning_rate": 2.9764350453172204e-05,
19
+ "loss": 0.7803,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.49,
24
+ "learning_rate": 2.795166163141994e-05,
25
+ "loss": 0.695,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.65,
30
+ "learning_rate": 2.61570996978852e-05,
31
+ "loss": 0.6093,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.82,
36
+ "learning_rate": 2.434441087613293e-05,
37
+ "loss": 0.5988,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.98,
42
+ "learning_rate": 2.2531722054380667e-05,
43
+ "loss": 0.555,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_false_f1": 0.7698209718670076,
49
+ "eval_loss": 0.5242577791213989,
50
+ "eval_macro_f1": 0.5534734720356734,
51
+ "eval_micro_f1": 0.7841845140032949,
52
+ "eval_mixture_f1": 0.4169611307420495,
53
+ "eval_runtime": 35.7477,
54
+ "eval_samples_per_second": 33.96,
55
+ "eval_steps_per_second": 1.063,
56
+ "eval_true_f1": 0.8937784522003035,
57
+ "eval_unproven_f1": 0.13333333333333333,
58
+ "step": 613
59
+ },
60
+ {
61
+ "epoch": 1.14,
62
+ "learning_rate": 2.07190332326284e-05,
63
+ "loss": 0.5169,
64
+ "step": 700
65
+ },
66
+ {
67
+ "epoch": 1.31,
68
+ "learning_rate": 1.8906344410876132e-05,
69
+ "loss": 0.4659,
70
+ "step": 800
71
+ },
72
+ {
73
+ "epoch": 1.47,
74
+ "learning_rate": 1.709365558912387e-05,
75
+ "loss": 0.4781,
76
+ "step": 900
77
+ },
78
+ {
79
+ "epoch": 1.63,
80
+ "learning_rate": 1.52809667673716e-05,
81
+ "loss": 0.4591,
82
+ "step": 1000
83
+ },
84
+ {
85
+ "epoch": 1.79,
86
+ "learning_rate": 1.3468277945619335e-05,
87
+ "loss": 0.4591,
88
+ "step": 1100
89
+ },
90
+ {
91
+ "epoch": 1.96,
92
+ "learning_rate": 1.165558912386707e-05,
93
+ "loss": 0.4282,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 2.0,
98
+ "eval_false_f1": 0.7829360100376411,
99
+ "eval_loss": 0.5008112788200378,
100
+ "eval_macro_f1": 0.6393266392992799,
101
+ "eval_micro_f1": 0.8031301482701811,
102
+ "eval_mixture_f1": 0.46052631578947373,
103
+ "eval_runtime": 35.7682,
104
+ "eval_samples_per_second": 33.941,
105
+ "eval_steps_per_second": 1.062,
106
+ "eval_true_f1": 0.9199048374306107,
107
+ "eval_unproven_f1": 0.3939393939393939,
108
+ "step": 1226
109
+ },
110
+ {
111
+ "epoch": 2.12,
112
+ "learning_rate": 9.842900302114804e-06,
113
+ "loss": 0.3421,
114
+ "step": 1300
115
+ },
116
+ {
117
+ "epoch": 2.28,
118
+ "learning_rate": 8.030211480362539e-06,
119
+ "loss": 0.3261,
120
+ "step": 1400
121
+ },
122
+ {
123
+ "epoch": 2.45,
124
+ "learning_rate": 6.217522658610272e-06,
125
+ "loss": 0.3269,
126
+ "step": 1500
127
+ },
128
+ {
129
+ "epoch": 2.61,
130
+ "learning_rate": 4.404833836858006e-06,
131
+ "loss": 0.3102,
132
+ "step": 1600
133
+ },
134
+ {
135
+ "epoch": 2.77,
136
+ "learning_rate": 2.5921450151057403e-06,
137
+ "loss": 0.2872,
138
+ "step": 1700
139
+ },
140
+ {
141
+ "epoch": 2.94,
142
+ "learning_rate": 7.794561933534744e-07,
143
+ "loss": 0.2897,
144
+ "step": 1800
145
+ },
146
+ {
147
+ "epoch": 3.0,
148
+ "eval_false_f1": 0.7941176470588236,
149
+ "eval_loss": 0.5857986211776733,
150
+ "eval_macro_f1": 0.6829679742192657,
151
+ "eval_micro_f1": 0.812191103789127,
152
+ "eval_mixture_f1": 0.5014925373134329,
153
+ "eval_runtime": 35.8572,
154
+ "eval_samples_per_second": 33.857,
155
+ "eval_steps_per_second": 1.06,
156
+ "eval_true_f1": 0.9234411996842935,
157
+ "eval_unproven_f1": 0.5128205128205128,
158
+ "step": 1839
159
+ },
160
+ {
161
+ "epoch": 3.0,
162
+ "step": 1839,
163
+ "total_flos": 2.18669047246848e+16,
164
+ "train_loss": 0.49966207906692944,
165
+ "train_runtime": 2323.0594,
166
+ "train_samples_per_second": 12.661,
167
+ "train_steps_per_second": 0.792
168
+ }
169
+ ],
170
+ "max_steps": 1839,
171
+ "num_train_epochs": 3,
172
+ "total_flos": 2.18669047246848e+16,
173
+ "trial_name": null,
174
+ "trial_params": null
175
+ }