indiejoseph commited on
Commit
6f3590e
1 Parent(s): e6baee5

End of training

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +37 -163
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -11,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # bert-base-cantonese
13
 
14
- This model was trained from scratch on an unknown dataset.
15
 
16
  ## Model description
17
 
 
1
  ---
2
+ base_model: /notebooks/cantonese/bert-base-cantonese
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # bert-base-cantonese
14
 
15
+ This model is a fine-tuned version of [/notebooks/cantonese/bert-base-cantonese](https://huggingface.co//notebooks/cantonese/bert-base-cantonese) on an unknown dataset.
16
 
17
  ## Model description
18
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.29,
3
- "train_loss": 0.0,
4
- "train_runtime": 0.0679,
5
  "train_samples": 12403,
6
- "train_samples_per_second": 2740728.613,
7
- "train_steps_per_second": 14142.275
8
  }
 
1
  {
2
+ "epoch": 14.92,
3
+ "train_loss": 0.7002776622772217,
4
+ "train_runtime": 2199.0947,
5
  "train_samples": 12403,
6
+ "train_samples_per_second": 84.601,
7
+ "train_steps_per_second": 0.437
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.29,
3
- "train_loss": 0.0,
4
- "train_runtime": 0.0679,
5
  "train_samples": 12403,
6
- "train_samples_per_second": 2740728.613,
7
- "train_steps_per_second": 14142.275
8
  }
 
1
  {
2
+ "epoch": 14.92,
3
+ "train_loss": 0.7002776622772217,
4
+ "train_runtime": 2199.0947,
5
  "train_samples": 12403,
6
+ "train_samples_per_second": 84.601,
7
+ "train_steps_per_second": 0.437
8
  }
trainer_state.json CHANGED
@@ -1,208 +1,82 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.287660466334818,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.21,
13
- "learning_rate": 4.8502545672356995e-05,
14
- "loss": 4.386,
15
  "step": 100
16
  },
17
  {
18
- "epoch": 0.42,
19
- "learning_rate": 4.700509134471399e-05,
20
- "loss": 2.3961,
21
  "step": 200
22
  },
23
  {
24
- "epoch": 0.63,
25
- "learning_rate": 4.550763701707098e-05,
26
- "loss": 2.0441,
27
  "step": 300
28
  },
29
  {
30
- "epoch": 0.84,
31
- "learning_rate": 4.401018268942798e-05,
32
- "loss": 1.8911,
33
  "step": 400
34
  },
35
  {
36
- "epoch": 1.05,
37
- "learning_rate": 4.251272836178497e-05,
38
- "loss": 1.8026,
39
  "step": 500
40
  },
41
  {
42
- "epoch": 1.26,
43
- "learning_rate": 4.101527403414196e-05,
44
- "loss": 1.7257,
45
  "step": 600
46
  },
47
  {
48
- "epoch": 1.47,
49
- "learning_rate": 3.9517819706498955e-05,
50
- "loss": 1.6764,
51
  "step": 700
52
  },
53
  {
54
- "epoch": 1.68,
55
- "learning_rate": 3.802036537885595e-05,
56
- "loss": 1.6339,
57
  "step": 800
58
  },
59
  {
60
- "epoch": 1.89,
61
- "learning_rate": 3.652291105121294e-05,
62
- "loss": 1.6142,
63
  "step": 900
64
  },
65
  {
66
- "epoch": 2.1,
67
- "learning_rate": 3.502545672356993e-05,
68
- "loss": 1.5706,
69
- "step": 1000
70
- },
71
- {
72
- "epoch": 2.31,
73
- "learning_rate": 3.352800239592693e-05,
74
- "loss": 1.5514,
75
- "step": 1100
76
- },
77
- {
78
- "epoch": 2.52,
79
- "learning_rate": 3.2030548068283916e-05,
80
- "loss": 1.5279,
81
- "step": 1200
82
- },
83
- {
84
- "epoch": 2.72,
85
- "learning_rate": 3.053309374064091e-05,
86
- "loss": 1.521,
87
- "step": 1300
88
- },
89
- {
90
- "epoch": 2.93,
91
- "learning_rate": 2.9035639412997907e-05,
92
- "loss": 1.4941,
93
- "step": 1400
94
- },
95
- {
96
- "epoch": 3.14,
97
- "learning_rate": 2.75381850853549e-05,
98
- "loss": 1.4752,
99
- "step": 1500
100
- },
101
- {
102
- "epoch": 3.35,
103
- "learning_rate": 2.604073075771189e-05,
104
- "loss": 1.4605,
105
- "step": 1600
106
- },
107
- {
108
- "epoch": 3.56,
109
- "learning_rate": 2.4543276430068884e-05,
110
- "loss": 1.4337,
111
- "step": 1700
112
- },
113
- {
114
- "epoch": 3.77,
115
- "learning_rate": 2.3045822102425876e-05,
116
- "loss": 1.4441,
117
- "step": 1800
118
- },
119
- {
120
- "epoch": 3.98,
121
- "learning_rate": 2.154836777478287e-05,
122
- "loss": 1.4323,
123
- "step": 1900
124
- },
125
- {
126
- "epoch": 4.19,
127
- "learning_rate": 2.0050913447139864e-05,
128
- "loss": 1.4108,
129
- "step": 2000
130
- },
131
- {
132
- "epoch": 4.4,
133
- "learning_rate": 1.8553459119496856e-05,
134
- "loss": 1.4184,
135
- "step": 2100
136
- },
137
- {
138
- "epoch": 4.61,
139
- "learning_rate": 1.7056004791853848e-05,
140
- "loss": 1.3959,
141
- "step": 2200
142
- },
143
- {
144
- "epoch": 4.82,
145
- "learning_rate": 1.555855046421084e-05,
146
- "loss": 1.3924,
147
- "step": 2300
148
- },
149
- {
150
- "epoch": 5.03,
151
- "learning_rate": 1.4061096136567836e-05,
152
- "loss": 1.3933,
153
- "step": 2400
154
- },
155
- {
156
- "epoch": 5.24,
157
- "learning_rate": 1.2563641808924828e-05,
158
- "loss": 1.3788,
159
- "step": 2500
160
- },
161
- {
162
- "epoch": 5.45,
163
- "learning_rate": 1.1066187481281822e-05,
164
- "loss": 1.3677,
165
- "step": 2600
166
- },
167
- {
168
- "epoch": 5.66,
169
- "learning_rate": 9.568733153638814e-06,
170
- "loss": 1.3602,
171
- "step": 2700
172
- },
173
- {
174
- "epoch": 5.87,
175
- "learning_rate": 8.071278825995808e-06,
176
- "loss": 1.3592,
177
- "step": 2800
178
- },
179
- {
180
- "epoch": 6.08,
181
- "learning_rate": 6.5738244983528e-06,
182
- "loss": 1.3522,
183
- "step": 2900
184
- },
185
- {
186
- "epoch": 6.29,
187
- "learning_rate": 5.0763701707097935e-06,
188
- "loss": 1.3586,
189
- "step": 3000
190
- },
191
- {
192
- "epoch": 6.29,
193
- "step": 3000,
194
- "total_flos": 9.05720378183041e+16,
195
- "train_loss": 0.0,
196
- "train_runtime": 0.0679,
197
- "train_samples_per_second": 2740728.613,
198
- "train_steps_per_second": 14142.275
199
  }
200
  ],
201
  "logging_steps": 100,
202
  "max_steps": 960,
203
  "num_train_epochs": 15,
204
  "save_steps": 500,
205
- "total_flos": 9.05720378183041e+16,
206
  "trial_name": null,
207
  "trial_params": null
208
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.922630560928432,
5
  "eval_steps": 500,
6
+ "global_step": 960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.55,
13
+ "learning_rate": 4.21875e-05,
14
+ "loss": 4.1586,
15
  "step": 100
16
  },
17
  {
18
+ "epoch": 3.09,
19
+ "learning_rate": 3.4375e-05,
20
+ "loss": 2.0648,
21
  "step": 200
22
  },
23
  {
24
+ "epoch": 4.64,
25
+ "learning_rate": 2.6562500000000002e-05,
26
+ "loss": 1.7655,
27
  "step": 300
28
  },
29
  {
30
+ "epoch": 6.19,
31
+ "learning_rate": 1.8750000000000002e-05,
32
+ "loss": 1.6324,
33
  "step": 400
34
  },
35
  {
36
+ "epoch": 7.74,
37
+ "learning_rate": 1.09375e-05,
38
+ "loss": 1.5631,
39
  "step": 500
40
  },
41
  {
42
+ "epoch": 9.35,
43
+ "learning_rate": 1.8750000000000002e-05,
44
+ "loss": 1.5197,
45
  "step": 600
46
  },
47
  {
48
+ "epoch": 10.9,
49
+ "learning_rate": 1.3541666666666666e-05,
50
+ "loss": 1.4763,
51
  "step": 700
52
  },
53
  {
54
+ "epoch": 12.45,
55
+ "learning_rate": 8.333333333333334e-06,
56
+ "loss": 1.4491,
57
  "step": 800
58
  },
59
  {
60
+ "epoch": 13.99,
61
+ "learning_rate": 3.125e-06,
62
+ "loss": 1.4228,
63
  "step": 900
64
  },
65
  {
66
+ "epoch": 14.92,
67
+ "step": 960,
68
+ "total_flos": 4.8490452612096e+16,
69
+ "train_loss": 0.7002776622772217,
70
+ "train_runtime": 2199.0947,
71
+ "train_samples_per_second": 84.601,
72
+ "train_steps_per_second": 0.437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
  ],
75
  "logging_steps": 100,
76
  "max_steps": 960,
77
  "num_train_epochs": 15,
78
  "save_steps": 500,
79
+ "total_flos": 4.8490452612096e+16,
80
  "trial_name": null,
81
  "trial_params": null
82
  }