indiejoseph commited on
Commit
1b05c1d
1 Parent(s): 8876b51

End of training

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +170 -86
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -11,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # bert-base-cantonese
13
 
14
- This model was trained from scratch on an unknown dataset.
15
 
16
  ## Model description
17
 
 
1
  ---
2
+ base_model: /notebooks/cantonese/bert-base-cantonese
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # bert-base-cantonese
14
 
15
+ This model is a fine-tuned version of [/notebooks/cantonese/bert-base-cantonese](https://huggingface.co//notebooks/cantonese/bert-base-cantonese) on an unknown dataset.
16
 
17
  ## Model description
18
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 1.327384843945802,
4
- "train_runtime": 12602.7945,
5
- "train_samples": 212813,
6
- "train_samples_per_second": 50.659,
7
- "train_steps_per_second": 0.791
8
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "train_loss": 1.6127097251780127,
4
+ "train_runtime": 8839.4407,
5
+ "train_samples": 91601,
6
+ "train_samples_per_second": 72.539,
7
+ "train_steps_per_second": 0.378
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 1.327384843945802,
4
- "train_runtime": 12602.7945,
5
- "train_samples": 212813,
6
- "train_samples_per_second": 50.659,
7
- "train_steps_per_second": 0.791
8
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "train_loss": 1.6127097251780127,
4
+ "train_runtime": 8839.4407,
5
+ "train_samples": 91601,
6
+ "train_samples_per_second": 72.539,
7
+ "train_steps_per_second": 0.378
8
  }
trainer_state.json CHANGED
@@ -1,142 +1,226 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.999774453048643,
5
  "eval_steps": 500,
6
- "global_step": 9975,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.15,
13
- "learning_rate": 9.49874686716792e-05,
14
- "loss": 2.1878,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.3,
19
- "learning_rate": 8.99749373433584e-05,
20
- "loss": 1.5576,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.45,
25
- "learning_rate": 8.49624060150376e-05,
26
- "loss": 1.4644,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.6,
31
- "learning_rate": 7.994987468671679e-05,
32
- "loss": 1.4131,
33
- "step": 2000
34
  },
35
  {
36
- "epoch": 0.75,
37
- "learning_rate": 7.4937343358396e-05,
38
- "loss": 1.3789,
39
- "step": 2500
40
  },
41
  {
42
- "epoch": 0.9,
43
- "learning_rate": 6.99248120300752e-05,
44
- "loss": 1.3443,
45
- "step": 3000
46
  },
47
  {
48
- "epoch": 1.05,
49
- "learning_rate": 6.49122807017544e-05,
50
- "loss": 1.3263,
51
- "step": 3500
52
  },
53
  {
54
- "epoch": 1.2,
55
- "learning_rate": 5.989974937343359e-05,
56
- "loss": 1.2983,
57
- "step": 4000
58
  },
59
  {
60
- "epoch": 1.35,
61
- "learning_rate": 5.4887218045112786e-05,
62
- "loss": 1.2849,
63
- "step": 4500
64
  },
65
  {
66
- "epoch": 1.5,
67
- "learning_rate": 4.987468671679198e-05,
68
- "loss": 1.2644,
69
- "step": 5000
70
  },
71
  {
72
- "epoch": 1.65,
73
- "learning_rate": 4.486215538847118e-05,
74
- "loss": 1.2527,
75
- "step": 5500
76
  },
77
  {
78
- "epoch": 1.8,
79
- "learning_rate": 3.9849624060150376e-05,
80
- "loss": 1.241,
81
- "step": 6000
82
  },
83
  {
84
- "epoch": 1.95,
85
- "learning_rate": 3.483709273182957e-05,
86
- "loss": 1.2278,
87
- "step": 6500
88
  },
89
  {
90
- "epoch": 2.11,
91
- "learning_rate": 2.9824561403508772e-05,
92
- "loss": 1.2116,
93
- "step": 7000
94
  },
95
  {
96
- "epoch": 2.26,
97
- "learning_rate": 2.4812030075187968e-05,
98
- "loss": 1.1994,
99
- "step": 7500
100
  },
101
  {
102
- "epoch": 2.41,
103
- "learning_rate": 1.9799498746867168e-05,
104
- "loss": 1.1933,
105
- "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
106
  },
107
  {
108
- "epoch": 2.56,
109
- "learning_rate": 1.4786967418546366e-05,
110
- "loss": 1.1796,
111
- "step": 8500
112
  },
113
  {
114
- "epoch": 2.71,
115
- "learning_rate": 9.774436090225564e-06,
116
- "loss": 1.1787,
117
- "step": 9000
118
  },
119
  {
120
- "epoch": 2.86,
121
- "learning_rate": 4.7619047619047615e-06,
122
- "loss": 1.1672,
123
- "step": 9500
124
  },
125
  {
126
- "epoch": 3.0,
127
- "step": 9975,
128
- "total_flos": 1.6801282333309133e+17,
129
- "train_loss": 1.327384843945802,
130
- "train_runtime": 12602.7945,
131
- "train_samples_per_second": 50.659,
132
- "train_steps_per_second": 0.791
133
  }
134
  ],
135
- "logging_steps": 500,
136
- "max_steps": 9975,
137
- "num_train_epochs": 3,
138
  "save_steps": 500,
139
- "total_flos": 1.6801282333309133e+17,
140
  "trial_name": null,
141
  "trial_params": null
142
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.9981660990306525,
5
  "eval_steps": 500,
6
+ "global_step": 3339,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.21,
13
+ "learning_rate": 4.8502545672356995e-05,
14
+ "loss": 4.386,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.42,
19
+ "learning_rate": 4.700509134471399e-05,
20
+ "loss": 2.3961,
21
+ "step": 200
22
+ },
23
+ {
24
+ "epoch": 0.63,
25
+ "learning_rate": 4.550763701707098e-05,
26
+ "loss": 2.0441,
27
+ "step": 300
28
+ },
29
+ {
30
+ "epoch": 0.84,
31
+ "learning_rate": 4.401018268942798e-05,
32
+ "loss": 1.8911,
33
+ "step": 400
34
+ },
35
+ {
36
+ "epoch": 1.05,
37
+ "learning_rate": 4.251272836178497e-05,
38
+ "loss": 1.8026,
39
  "step": 500
40
  },
41
  {
42
+ "epoch": 1.26,
43
+ "learning_rate": 4.101527403414196e-05,
44
+ "loss": 1.7257,
45
+ "step": 600
46
+ },
47
+ {
48
+ "epoch": 1.47,
49
+ "learning_rate": 3.9517819706498955e-05,
50
+ "loss": 1.6764,
51
+ "step": 700
52
+ },
53
+ {
54
+ "epoch": 1.68,
55
+ "learning_rate": 3.802036537885595e-05,
56
+ "loss": 1.6339,
57
+ "step": 800
58
+ },
59
+ {
60
+ "epoch": 1.89,
61
+ "learning_rate": 3.652291105121294e-05,
62
+ "loss": 1.6142,
63
+ "step": 900
64
+ },
65
+ {
66
+ "epoch": 2.1,
67
+ "learning_rate": 3.502545672356993e-05,
68
+ "loss": 1.5706,
69
  "step": 1000
70
  },
71
  {
72
+ "epoch": 2.31,
73
+ "learning_rate": 3.352800239592693e-05,
74
+ "loss": 1.5514,
75
+ "step": 1100
76
+ },
77
+ {
78
+ "epoch": 2.52,
79
+ "learning_rate": 3.2030548068283916e-05,
80
+ "loss": 1.5279,
81
+ "step": 1200
82
+ },
83
+ {
84
+ "epoch": 2.72,
85
+ "learning_rate": 3.053309374064091e-05,
86
+ "loss": 1.521,
87
+ "step": 1300
88
+ },
89
+ {
90
+ "epoch": 2.93,
91
+ "learning_rate": 2.9035639412997907e-05,
92
+ "loss": 1.4941,
93
+ "step": 1400
94
+ },
95
+ {
96
+ "epoch": 3.14,
97
+ "learning_rate": 2.75381850853549e-05,
98
+ "loss": 1.4752,
99
  "step": 1500
100
  },
101
  {
102
+ "epoch": 3.35,
103
+ "learning_rate": 2.604073075771189e-05,
104
+ "loss": 1.4605,
105
+ "step": 1600
106
  },
107
  {
108
+ "epoch": 3.56,
109
+ "learning_rate": 2.4543276430068884e-05,
110
+ "loss": 1.4337,
111
+ "step": 1700
112
  },
113
  {
114
+ "epoch": 3.77,
115
+ "learning_rate": 2.3045822102425876e-05,
116
+ "loss": 1.4441,
117
+ "step": 1800
118
  },
119
  {
120
+ "epoch": 3.98,
121
+ "learning_rate": 2.154836777478287e-05,
122
+ "loss": 1.4323,
123
+ "step": 1900
124
  },
125
  {
126
+ "epoch": 4.19,
127
+ "learning_rate": 2.0050913447139864e-05,
128
+ "loss": 1.4108,
129
+ "step": 2000
130
  },
131
  {
132
+ "epoch": 4.4,
133
+ "learning_rate": 1.8553459119496856e-05,
134
+ "loss": 1.4184,
135
+ "step": 2100
136
  },
137
  {
138
+ "epoch": 4.61,
139
+ "learning_rate": 1.7056004791853848e-05,
140
+ "loss": 1.3959,
141
+ "step": 2200
142
  },
143
  {
144
+ "epoch": 4.82,
145
+ "learning_rate": 1.555855046421084e-05,
146
+ "loss": 1.3924,
147
+ "step": 2300
148
  },
149
  {
150
+ "epoch": 5.03,
151
+ "learning_rate": 1.4061096136567836e-05,
152
+ "loss": 1.3933,
153
+ "step": 2400
154
  },
155
  {
156
+ "epoch": 5.24,
157
+ "learning_rate": 1.2563641808924828e-05,
158
+ "loss": 1.3788,
159
+ "step": 2500
160
  },
161
  {
162
+ "epoch": 5.45,
163
+ "learning_rate": 1.1066187481281822e-05,
164
+ "loss": 1.3677,
165
+ "step": 2600
166
  },
167
  {
168
+ "epoch": 5.66,
169
+ "learning_rate": 9.568733153638814e-06,
170
+ "loss": 1.3602,
171
+ "step": 2700
172
  },
173
  {
174
+ "epoch": 5.87,
175
+ "learning_rate": 8.071278825995808e-06,
176
+ "loss": 1.3592,
177
+ "step": 2800
178
+ },
179
+ {
180
+ "epoch": 6.08,
181
+ "learning_rate": 6.5738244983528e-06,
182
+ "loss": 1.3522,
183
+ "step": 2900
184
+ },
185
+ {
186
+ "epoch": 6.29,
187
+ "learning_rate": 5.0763701707097935e-06,
188
+ "loss": 1.3586,
189
+ "step": 3000
190
  },
191
  {
192
+ "epoch": 6.5,
193
+ "learning_rate": 3.5789158430667866e-06,
194
+ "loss": 1.3571,
195
+ "step": 3100
196
  },
197
  {
198
+ "epoch": 6.71,
199
+ "learning_rate": 2.0814615154237796e-06,
200
+ "loss": 1.3572,
201
+ "step": 3200
202
  },
203
  {
204
+ "epoch": 6.92,
205
+ "learning_rate": 5.840071877807727e-07,
206
+ "loss": 1.3452,
207
+ "step": 3300
208
  },
209
  {
210
+ "epoch": 7.0,
211
+ "step": 3339,
212
+ "total_flos": 1.0075939996832611e+17,
213
+ "train_loss": 1.6127097251780127,
214
+ "train_runtime": 8839.4407,
215
+ "train_samples_per_second": 72.539,
216
+ "train_steps_per_second": 0.378
217
  }
218
  ],
219
+ "logging_steps": 100,
220
+ "max_steps": 3339,
221
+ "num_train_epochs": 7,
222
  "save_steps": 500,
223
+ "total_flos": 1.0075939996832611e+17,
224
  "trial_name": null,
225
  "trial_params": null
226
  }