indiejoseph commited on
Commit
998e451
1 Parent(s): e583239

End of training

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +40 -154
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -11,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # bert-base-cantonese
13
 
14
- This model was trained from scratch on an unknown dataset.
15
 
16
  ## Model description
17
 
 
1
  ---
2
+ base_model: /notebooks/cantonese/bert-base-cantonese
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # bert-base-cantonese
14
 
15
+ This model is a fine-tuned version of [/notebooks/cantonese/bert-base-cantonese](https://huggingface.co//notebooks/cantonese/bert-base-cantonese) on an unknown dataset.
16
 
17
  ## Model description
18
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.0,
3
- "train_loss": 1.6127097251780127,
4
- "train_runtime": 8839.4407,
5
  "train_samples": 91601,
6
- "train_samples_per_second": 72.539,
7
- "train_steps_per_second": 0.378
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 1.1470813844522174,
4
+ "train_runtime": 3794.0646,
5
  "train_samples": 91601,
6
+ "train_samples_per_second": 72.43,
7
+ "train_steps_per_second": 0.377
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.0,
3
- "train_loss": 1.6127097251780127,
4
- "train_runtime": 8839.4407,
5
  "train_samples": 91601,
6
- "train_samples_per_second": 72.539,
7
- "train_steps_per_second": 0.378
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 1.1470813844522174,
4
+ "train_runtime": 3794.0646,
5
  "train_samples": 91601,
6
+ "train_samples_per_second": 72.43,
7
+ "train_steps_per_second": 0.377
8
  }
trainer_state.json CHANGED
@@ -1,226 +1,112 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.9981660990306525,
5
  "eval_steps": 500,
6
- "global_step": 3339,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.21,
13
- "learning_rate": 4.8502545672356995e-05,
14
- "loss": 4.386,
15
  "step": 100
16
  },
17
  {
18
  "epoch": 0.42,
19
- "learning_rate": 4.700509134471399e-05,
20
- "loss": 2.3961,
21
  "step": 200
22
  },
23
  {
24
  "epoch": 0.63,
25
- "learning_rate": 4.550763701707098e-05,
26
- "loss": 2.0441,
27
  "step": 300
28
  },
29
  {
30
  "epoch": 0.84,
31
- "learning_rate": 4.401018268942798e-05,
32
- "loss": 1.8911,
33
  "step": 400
34
  },
35
  {
36
  "epoch": 1.05,
37
- "learning_rate": 4.251272836178497e-05,
38
- "loss": 1.8026,
39
  "step": 500
40
  },
41
  {
42
  "epoch": 1.26,
43
- "learning_rate": 4.101527403414196e-05,
44
- "loss": 1.7257,
45
  "step": 600
46
  },
47
  {
48
  "epoch": 1.47,
49
- "learning_rate": 3.9517819706498955e-05,
50
- "loss": 1.6764,
51
  "step": 700
52
  },
53
  {
54
  "epoch": 1.68,
55
- "learning_rate": 3.802036537885595e-05,
56
- "loss": 1.6339,
57
  "step": 800
58
  },
59
  {
60
  "epoch": 1.89,
61
- "learning_rate": 3.652291105121294e-05,
62
- "loss": 1.6142,
63
  "step": 900
64
  },
65
  {
66
  "epoch": 2.1,
67
- "learning_rate": 3.502545672356993e-05,
68
- "loss": 1.5706,
69
  "step": 1000
70
  },
71
  {
72
  "epoch": 2.31,
73
- "learning_rate": 3.352800239592693e-05,
74
- "loss": 1.5514,
75
  "step": 1100
76
  },
77
  {
78
  "epoch": 2.52,
79
- "learning_rate": 3.2030548068283916e-05,
80
- "loss": 1.5279,
81
  "step": 1200
82
  },
83
  {
84
  "epoch": 2.72,
85
- "learning_rate": 3.053309374064091e-05,
86
- "loss": 1.521,
87
  "step": 1300
88
  },
89
  {
90
  "epoch": 2.93,
91
- "learning_rate": 2.9035639412997907e-05,
92
- "loss": 1.4941,
93
  "step": 1400
94
  },
95
  {
96
- "epoch": 3.14,
97
- "learning_rate": 2.75381850853549e-05,
98
- "loss": 1.4752,
99
- "step": 1500
100
- },
101
- {
102
- "epoch": 3.35,
103
- "learning_rate": 2.604073075771189e-05,
104
- "loss": 1.4605,
105
- "step": 1600
106
- },
107
- {
108
- "epoch": 3.56,
109
- "learning_rate": 2.4543276430068884e-05,
110
- "loss": 1.4337,
111
- "step": 1700
112
- },
113
- {
114
- "epoch": 3.77,
115
- "learning_rate": 2.3045822102425876e-05,
116
- "loss": 1.4441,
117
- "step": 1800
118
- },
119
- {
120
- "epoch": 3.98,
121
- "learning_rate": 2.154836777478287e-05,
122
- "loss": 1.4323,
123
- "step": 1900
124
- },
125
- {
126
- "epoch": 4.19,
127
- "learning_rate": 2.0050913447139864e-05,
128
- "loss": 1.4108,
129
- "step": 2000
130
- },
131
- {
132
- "epoch": 4.4,
133
- "learning_rate": 1.8553459119496856e-05,
134
- "loss": 1.4184,
135
- "step": 2100
136
- },
137
- {
138
- "epoch": 4.61,
139
- "learning_rate": 1.7056004791853848e-05,
140
- "loss": 1.3959,
141
- "step": 2200
142
- },
143
- {
144
- "epoch": 4.82,
145
- "learning_rate": 1.555855046421084e-05,
146
- "loss": 1.3924,
147
- "step": 2300
148
- },
149
- {
150
- "epoch": 5.03,
151
- "learning_rate": 1.4061096136567836e-05,
152
- "loss": 1.3933,
153
- "step": 2400
154
- },
155
- {
156
- "epoch": 5.24,
157
- "learning_rate": 1.2563641808924828e-05,
158
- "loss": 1.3788,
159
- "step": 2500
160
- },
161
- {
162
- "epoch": 5.45,
163
- "learning_rate": 1.1066187481281822e-05,
164
- "loss": 1.3677,
165
- "step": 2600
166
- },
167
- {
168
- "epoch": 5.66,
169
- "learning_rate": 9.568733153638814e-06,
170
- "loss": 1.3602,
171
- "step": 2700
172
- },
173
- {
174
- "epoch": 5.87,
175
- "learning_rate": 8.071278825995808e-06,
176
- "loss": 1.3592,
177
- "step": 2800
178
- },
179
- {
180
- "epoch": 6.08,
181
- "learning_rate": 6.5738244983528e-06,
182
- "loss": 1.3522,
183
- "step": 2900
184
- },
185
- {
186
- "epoch": 6.29,
187
- "learning_rate": 5.0763701707097935e-06,
188
- "loss": 1.3586,
189
- "step": 3000
190
- },
191
- {
192
- "epoch": 6.5,
193
- "learning_rate": 3.5789158430667866e-06,
194
- "loss": 1.3571,
195
- "step": 3100
196
- },
197
- {
198
- "epoch": 6.71,
199
- "learning_rate": 2.0814615154237796e-06,
200
- "loss": 1.3572,
201
- "step": 3200
202
- },
203
- {
204
- "epoch": 6.92,
205
- "learning_rate": 5.840071877807727e-07,
206
- "loss": 1.3452,
207
- "step": 3300
208
- },
209
- {
210
- "epoch": 7.0,
211
- "step": 3339,
212
- "total_flos": 1.0075939996832611e+17,
213
- "train_loss": 1.6127097251780127,
214
- "train_runtime": 8839.4407,
215
- "train_samples_per_second": 72.539,
216
- "train_steps_per_second": 0.378
217
  }
218
  ],
219
  "logging_steps": 100,
220
- "max_steps": 3339,
221
- "num_train_epochs": 7,
222
  "save_steps": 500,
223
- "total_flos": 1.0075939996832611e+17,
224
  "trial_name": null,
225
  "trial_params": null
226
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.999214042441708,
5
  "eval_steps": 500,
6
+ "global_step": 1431,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.21,
13
+ "learning_rate": 4.650593990216632e-05,
14
+ "loss": 1.3096,
15
  "step": 100
16
  },
17
  {
18
  "epoch": 0.42,
19
+ "learning_rate": 4.301187980433264e-05,
20
+ "loss": 1.2305,
21
  "step": 200
22
  },
23
  {
24
  "epoch": 0.63,
25
+ "learning_rate": 3.9517819706498955e-05,
26
+ "loss": 1.1942,
27
  "step": 300
28
  },
29
  {
30
  "epoch": 0.84,
31
+ "learning_rate": 3.602375960866527e-05,
32
+ "loss": 1.1705,
33
  "step": 400
34
  },
35
  {
36
  "epoch": 1.05,
37
+ "learning_rate": 3.252969951083159e-05,
38
+ "loss": 1.152,
39
  "step": 500
40
  },
41
  {
42
  "epoch": 1.26,
43
+ "learning_rate": 2.9035639412997907e-05,
44
+ "loss": 1.1318,
45
  "step": 600
46
  },
47
  {
48
  "epoch": 1.47,
49
+ "learning_rate": 2.554157931516422e-05,
50
+ "loss": 1.1211,
51
  "step": 700
52
  },
53
  {
54
  "epoch": 1.68,
55
+ "learning_rate": 2.204751921733054e-05,
56
+ "loss": 1.1145,
57
  "step": 800
58
  },
59
  {
60
  "epoch": 1.89,
61
+ "learning_rate": 1.8553459119496856e-05,
62
+ "loss": 1.1184,
63
  "step": 900
64
  },
65
  {
66
  "epoch": 2.1,
67
+ "learning_rate": 1.5059399021663173e-05,
68
+ "loss": 1.1012,
69
  "step": 1000
70
  },
71
  {
72
  "epoch": 2.31,
73
+ "learning_rate": 1.1565338923829489e-05,
74
+ "loss": 1.1053,
75
  "step": 1100
76
  },
77
  {
78
  "epoch": 2.52,
79
+ "learning_rate": 8.071278825995808e-06,
80
+ "loss": 1.1016,
81
  "step": 1200
82
  },
83
  {
84
  "epoch": 2.72,
85
+ "learning_rate": 4.577218728162125e-06,
86
+ "loss": 1.1131,
87
  "step": 1300
88
  },
89
  {
90
  "epoch": 2.93,
91
+ "learning_rate": 1.0831586303284416e-06,
92
+ "loss": 1.1086,
93
  "step": 1400
94
  },
95
  {
96
+ "epoch": 3.0,
97
+ "step": 1431,
98
+ "total_flos": 4.3173778473741864e+16,
99
+ "train_loss": 1.1470813844522174,
100
+ "train_runtime": 3794.0646,
101
+ "train_samples_per_second": 72.43,
102
+ "train_steps_per_second": 0.377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
  ],
105
  "logging_steps": 100,
106
+ "max_steps": 1431,
107
+ "num_train_epochs": 3,
108
  "save_steps": 500,
109
+ "total_flos": 4.3173778473741864e+16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }