tmnam20 commited on
Commit
caf6650
1 Parent(s): d5f8dba

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +174 -0
trainer_state.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1074,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.14,
13
+ "learning_rate": 1.9068901303538176e-05,
14
+ "loss": 0.8387,
15
+ "step": 50
16
+ },
17
+ {
18
+ "epoch": 0.28,
19
+ "learning_rate": 1.813780260707635e-05,
20
+ "loss": 0.4021,
21
+ "step": 100
22
+ },
23
+ {
24
+ "epoch": 0.42,
25
+ "learning_rate": 1.7206703910614527e-05,
26
+ "loss": 0.2884,
27
+ "step": 150
28
+ },
29
+ {
30
+ "epoch": 0.56,
31
+ "learning_rate": 1.62756052141527e-05,
32
+ "loss": 0.2756,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.7,
37
+ "learning_rate": 1.5344506517690876e-05,
38
+ "loss": 0.2636,
39
+ "step": 250
40
+ },
41
+ {
42
+ "epoch": 0.84,
43
+ "learning_rate": 1.4413407821229052e-05,
44
+ "loss": 0.2349,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.98,
49
+ "learning_rate": 1.3482309124767226e-05,
50
+ "loss": 0.2557,
51
+ "step": 350
52
+ },
53
+ {
54
+ "epoch": 1.12,
55
+ "learning_rate": 1.25512104283054e-05,
56
+ "loss": 0.2014,
57
+ "step": 400
58
+ },
59
+ {
60
+ "epoch": 1.26,
61
+ "learning_rate": 1.1620111731843577e-05,
62
+ "loss": 0.1959,
63
+ "step": 450
64
+ },
65
+ {
66
+ "epoch": 1.4,
67
+ "learning_rate": 1.0689013035381753e-05,
68
+ "loss": 0.1964,
69
+ "step": 500
70
+ },
71
+ {
72
+ "epoch": 1.4,
73
+ "eval_accuracy": 0.9355653821857233,
74
+ "eval_loss": 0.2315603792667389,
75
+ "eval_runtime": 3.5804,
76
+ "eval_samples_per_second": 442.134,
77
+ "eval_steps_per_second": 27.651,
78
+ "step": 500
79
+ },
80
+ {
81
+ "epoch": 1.54,
82
+ "learning_rate": 9.757914338919926e-06,
83
+ "loss": 0.2142,
84
+ "step": 550
85
+ },
86
+ {
87
+ "epoch": 1.68,
88
+ "learning_rate": 8.826815642458101e-06,
89
+ "loss": 0.1913,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.82,
94
+ "learning_rate": 7.895716945996277e-06,
95
+ "loss": 0.1534,
96
+ "step": 650
97
+ },
98
+ {
99
+ "epoch": 1.96,
100
+ "learning_rate": 6.964618249534451e-06,
101
+ "loss": 0.1755,
102
+ "step": 700
103
+ },
104
+ {
105
+ "epoch": 2.09,
106
+ "learning_rate": 6.033519553072626e-06,
107
+ "loss": 0.1508,
108
+ "step": 750
109
+ },
110
+ {
111
+ "epoch": 2.23,
112
+ "learning_rate": 5.102420856610801e-06,
113
+ "loss": 0.1244,
114
+ "step": 800
115
+ },
116
+ {
117
+ "epoch": 2.37,
118
+ "learning_rate": 4.171322160148976e-06,
119
+ "loss": 0.1785,
120
+ "step": 850
121
+ },
122
+ {
123
+ "epoch": 2.51,
124
+ "learning_rate": 3.240223463687151e-06,
125
+ "loss": 0.1347,
126
+ "step": 900
127
+ },
128
+ {
129
+ "epoch": 2.65,
130
+ "learning_rate": 2.3091247672253262e-06,
131
+ "loss": 0.132,
132
+ "step": 950
133
+ },
134
+ {
135
+ "epoch": 2.79,
136
+ "learning_rate": 1.378026070763501e-06,
137
+ "loss": 0.1334,
138
+ "step": 1000
139
+ },
140
+ {
141
+ "epoch": 2.79,
142
+ "eval_accuracy": 0.9519898926089703,
143
+ "eval_loss": 0.21129217743873596,
144
+ "eval_runtime": 3.5946,
145
+ "eval_samples_per_second": 440.378,
146
+ "eval_steps_per_second": 27.541,
147
+ "step": 1000
148
+ },
149
+ {
150
+ "epoch": 2.93,
151
+ "learning_rate": 4.46927374301676e-07,
152
+ "loss": 0.1359,
153
+ "step": 1050
154
+ },
155
+ {
156
+ "epoch": 3.0,
157
+ "step": 1074,
158
+ "total_flos": 3337406743067076.0,
159
+ "train_loss": 0.23017916310876885,
160
+ "train_runtime": 431.913,
161
+ "train_samples_per_second": 79.363,
162
+ "train_steps_per_second": 2.487
163
+ }
164
+ ],
165
+ "logging_steps": 50,
166
+ "max_steps": 1074,
167
+ "num_input_tokens_seen": 0,
168
+ "num_train_epochs": 3,
169
+ "save_steps": 500,
170
+ "total_flos": 3337406743067076.0,
171
+ "train_batch_size": 32,
172
+ "trial_name": null,
173
+ "trial_params": null
174
+ }