akahana commited on
Commit
fab5b35
1 Parent(s): 81122b9

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: smallbert-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # smallbert-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: smallbert-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.1417211592798902
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # smallbert-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 6.2400
32
+ - Accuracy: 0.1417
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 0.1432211125795332,
4
- "eval_loss": 6.276556968688965,
5
- "eval_runtime": 5.1628,
6
  "eval_samples": 963,
7
- "eval_samples_per_second": 186.528,
8
- "eval_steps_per_second": 11.815,
9
- "perplexity": 531.9539730039461,
10
- "total_flos": 1.513744687742976e+16,
11
- "train_loss": 3.172654545207319,
12
- "train_runtime": 2061.7586,
13
  "train_samples": 19092,
14
- "train_samples_per_second": 185.201,
15
- "train_steps_per_second": 5.791
16
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.1417211592798902,
4
+ "eval_loss": 6.239955902099609,
5
+ "eval_runtime": 5.4847,
6
  "eval_samples": 963,
7
+ "eval_samples_per_second": 175.578,
8
+ "eval_steps_per_second": 11.122,
9
+ "perplexity": 512.8358954579503,
10
+ "total_flos": 1.89218085967872e+16,
11
+ "train_loss": 1.2342067862914834,
12
+ "train_runtime": 1036.8459,
13
  "train_samples": 19092,
14
+ "train_samples_per_second": 460.338,
15
+ "train_steps_per_second": 14.395
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 0.1432211125795332,
4
- "eval_loss": 6.276556968688965,
5
- "eval_runtime": 5.1628,
6
  "eval_samples": 963,
7
- "eval_samples_per_second": 186.528,
8
- "eval_steps_per_second": 11.815,
9
- "perplexity": 531.9539730039461
10
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.1417211592798902,
4
+ "eval_loss": 6.239955902099609,
5
+ "eval_runtime": 5.4847,
6
  "eval_samples": 963,
7
+ "eval_samples_per_second": 175.578,
8
+ "eval_steps_per_second": 11.122,
9
+ "perplexity": 512.8358954579503
10
  }
runs/Jul31_02-24-15_390be55cffea/events.out.tfevents.1722393739.390be55cffea.22163.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b41a4f4e583400700c1fa1321ba7dd972a51cb5198615d670957efefefc505fd
3
+ size 411
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 20.0,
3
- "total_flos": 1.513744687742976e+16,
4
- "train_loss": 3.172654545207319,
5
- "train_runtime": 2061.7586,
6
  "train_samples": 19092,
7
- "train_samples_per_second": 185.201,
8
- "train_steps_per_second": 5.791
9
  }
 
1
  {
2
+ "epoch": 25.0,
3
+ "total_flos": 1.89218085967872e+16,
4
+ "train_loss": 1.2342067862914834,
5
+ "train_runtime": 1036.8459,
6
  "train_samples": 19092,
7
+ "train_samples_per_second": 460.338,
8
+ "train_steps_per_second": 14.395
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.0,
5
  "eval_steps": 500,
6
- "global_step": 11940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -186,12 +186,63 @@
186
  "train_runtime": 2061.7586,
187
  "train_samples_per_second": 185.201,
188
  "train_steps_per_second": 5.791
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ],
191
  "logging_steps": 500,
192
- "max_steps": 11940,
193
  "num_input_tokens_seen": 0,
194
- "num_train_epochs": 20,
195
  "save_steps": 1000,
196
  "stateful_callbacks": {
197
  "TrainerControl": {
@@ -205,7 +256,7 @@
205
  "attributes": {}
206
  }
207
  },
208
- "total_flos": 1.513744687742976e+16,
209
  "train_batch_size": 32,
210
  "trial_name": null,
211
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 25.0,
5
  "eval_steps": 500,
6
+ "global_step": 14925,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
186
  "train_runtime": 2061.7586,
187
  "train_samples_per_second": 185.201,
188
  "train_steps_per_second": 5.791
189
+ },
190
+ {
191
+ "epoch": 20.100502512562816,
192
+ "grad_norm": 1.9149502515792847,
193
+ "learning_rate": 4.9798994974874375e-05,
194
+ "loss": 6.2133,
195
+ "step": 12000
196
+ },
197
+ {
198
+ "epoch": 20.938023450586265,
199
+ "grad_norm": 1.8770983219146729,
200
+ "learning_rate": 4.8123953098827474e-05,
201
+ "loss": 6.222,
202
+ "step": 12500
203
+ },
204
+ {
205
+ "epoch": 21.775544388609717,
206
+ "grad_norm": 1.8764543533325195,
207
+ "learning_rate": 4.6448911222780573e-05,
208
+ "loss": 6.1987,
209
+ "step": 13000
210
+ },
211
+ {
212
+ "epoch": 22.613065326633166,
213
+ "grad_norm": 1.928965449333191,
214
+ "learning_rate": 4.477386934673367e-05,
215
+ "loss": 6.1787,
216
+ "step": 13500
217
+ },
218
+ {
219
+ "epoch": 23.450586264656618,
220
+ "grad_norm": 1.6476266384124756,
221
+ "learning_rate": 4.309882747068677e-05,
222
+ "loss": 6.1524,
223
+ "step": 14000
224
+ },
225
+ {
226
+ "epoch": 24.288107202680067,
227
+ "grad_norm": 1.7504838705062866,
228
+ "learning_rate": 4.142378559463987e-05,
229
+ "loss": 6.1409,
230
+ "step": 14500
231
+ },
232
+ {
233
+ "epoch": 25.0,
234
+ "step": 14925,
235
+ "total_flos": 1.89218085967872e+16,
236
+ "train_loss": 1.2342067862914834,
237
+ "train_runtime": 1036.8459,
238
+ "train_samples_per_second": 460.338,
239
+ "train_steps_per_second": 14.395
240
  }
241
  ],
242
  "logging_steps": 500,
243
+ "max_steps": 14925,
244
  "num_input_tokens_seen": 0,
245
+ "num_train_epochs": 25,
246
  "save_steps": 1000,
247
  "stateful_callbacks": {
248
  "TrainerControl": {
 
256
  "attributes": {}
257
  }
258
  },
259
+ "total_flos": 1.89218085967872e+16,
260
  "train_batch_size": 32,
261
  "trial_name": null,
262
  "trial_params": null