nlparabic commited on
Commit
70ccabd
1 Parent(s): aa08640

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 2.0654
22
- - Bleu: 0.3119
23
- - Rouge1: 0.5862
24
- - Rouge2: 0.3489
25
- - Rougel: 0.5479
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 2.1411
22
+ - Bleu: 0.2987
23
+ - Rouge1: 0.5831
24
+ - Rouge2: 0.3405
25
+ - Rougel: 0.5413
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 18.0,
3
- "eval_bleu": 0.31303204778954513,
4
- "eval_loss": 1.9793106317520142,
5
- "eval_rouge1": 0.5832240471437244,
6
- "eval_rouge2": 0.34612838297153165,
7
- "eval_rougeL": 0.545438606875621,
8
- "eval_runtime": 29.6378,
9
- "eval_samples": 925,
10
- "eval_samples_per_second": 31.21,
11
- "eval_steps_per_second": 3.914,
12
- "perplexity": 7.237751788702562,
13
- "total_flos": 2.5969031184384e+16,
14
- "train_loss": 1.6592627337077177,
15
- "train_runtime": 3469.9556,
16
- "train_samples": 3681,
17
- "train_samples_per_second": 21.216,
18
- "train_steps_per_second": 2.657
19
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_bleu": 0.2986862166187925,
4
+ "eval_loss": 2.1411283016204834,
5
+ "eval_rouge1": 0.5830931277059221,
6
+ "eval_rouge2": 0.3405124611999597,
7
+ "eval_rougeL": 0.5412563338799627,
8
+ "eval_runtime": 29.7624,
9
+ "eval_samples": 884,
10
+ "eval_samples_per_second": 29.702,
11
+ "eval_steps_per_second": 3.73,
12
+ "perplexity": 8.50903297033769,
13
+ "total_flos": 2.7664555180032e+16,
14
+ "train_loss": 0.09870077279897836,
15
+ "train_runtime": 270.9621,
16
+ "train_samples": 3531,
17
+ "train_samples_per_second": 260.627,
18
+ "train_steps_per_second": 32.624
19
  }
egy_training_log.txt CHANGED
@@ -472,3 +472,5 @@ WARNING:root:Epoch 19.0: No losses recorded yet.
472
  INFO:absl:Using default tokenizer.
473
  INFO:root:Epoch 20.0: Train Loss = 1.6189, Eval Loss = 2.0766849517822266
474
  INFO:absl:Using default tokenizer.
 
 
 
472
  INFO:absl:Using default tokenizer.
473
  INFO:root:Epoch 20.0: Train Loss = 1.6189, Eval Loss = 2.0766849517822266
474
  INFO:absl:Using default tokenizer.
475
+ INFO:__main__:*** Evaluate ***
476
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 18.0,
3
- "eval_bleu": 0.31303204778954513,
4
- "eval_loss": 1.9793106317520142,
5
- "eval_rouge1": 0.5832240471437244,
6
- "eval_rouge2": 0.34612838297153165,
7
- "eval_rougeL": 0.545438606875621,
8
- "eval_runtime": 29.6378,
9
- "eval_samples": 925,
10
- "eval_samples_per_second": 31.21,
11
- "eval_steps_per_second": 3.914,
12
- "perplexity": 7.237751788702562
13
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_bleu": 0.2986862166187925,
4
+ "eval_loss": 2.1411283016204834,
5
+ "eval_rouge1": 0.5830931277059221,
6
+ "eval_rouge2": 0.3405124611999597,
7
+ "eval_rougeL": 0.5412563338799627,
8
+ "eval_runtime": 29.7624,
9
+ "eval_samples": 884,
10
+ "eval_samples_per_second": 29.702,
11
+ "eval_steps_per_second": 3.73,
12
+ "perplexity": 8.50903297033769
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 18.0,
3
- "total_flos": 2.5969031184384e+16,
4
- "train_loss": 1.6592627337077177,
5
- "train_runtime": 3469.9556,
6
- "train_samples": 3681,
7
- "train_samples_per_second": 21.216,
8
- "train_steps_per_second": 2.657
9
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 2.7664555180032e+16,
4
+ "train_loss": 0.09870077279897836,
5
+ "train_runtime": 270.9621,
6
+ "train_samples": 3531,
7
+ "train_samples_per_second": 260.627,
8
+ "train_steps_per_second": 32.624
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.9793106317520142,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results_fixed/checkpoint-6915",
4
- "epoch": 18.0,
5
  "eval_steps": 500,
6
- "global_step": 8298,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -337,17 +337,48 @@
337
  "step": 8298
338
  },
339
  {
340
- "epoch": 18.0,
341
- "step": 8298,
342
- "total_flos": 2.5969031184384e+16,
343
- "train_loss": 1.6592627337077177,
344
- "train_runtime": 3469.9556,
345
- "train_samples_per_second": 21.216,
346
- "train_steps_per_second": 2.657
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "logging_steps": 500,
350
- "max_steps": 9220,
351
  "num_input_tokens_seen": 0,
352
  "num_train_epochs": 20,
353
  "save_steps": 500,
@@ -372,7 +403,7 @@
372
  "attributes": {}
373
  }
374
  },
375
- "total_flos": 2.5969031184384e+16,
376
  "train_batch_size": 8,
377
  "trial_name": null,
378
  "trial_params": null
 
1
  {
2
  "best_metric": 1.9793106317520142,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results_fixed/checkpoint-6915",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 8840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
337
  "step": 8298
338
  },
339
  {
340
+ "epoch": 19.0,
341
+ "eval_bleu": 0.3098880521487277,
342
+ "eval_loss": 2.0766849517822266,
343
+ "eval_rouge1": 0.5857850781975715,
344
+ "eval_rouge2": 0.3476439342876294,
345
+ "eval_rougeL": 0.5470857900583828,
346
+ "eval_runtime": 29.9167,
347
+ "eval_samples_per_second": 29.549,
348
+ "eval_steps_per_second": 3.71,
349
+ "step": 8398
350
+ },
351
+ {
352
+ "epoch": 19.23076923076923,
353
+ "grad_norm": 1.234198808670044,
354
+ "learning_rate": 2.0383693045563552e-06,
355
+ "loss": 1.6189,
356
+ "step": 8500
357
+ },
358
+ {
359
+ "epoch": 20.0,
360
+ "eval_bleu": 0.31187064199371684,
361
+ "eval_loss": 2.06535267829895,
362
+ "eval_rouge1": 0.5861980524100046,
363
+ "eval_rouge2": 0.3488620525891558,
364
+ "eval_rougeL": 0.5478910457529194,
365
+ "eval_runtime": 29.8775,
366
+ "eval_samples_per_second": 29.588,
367
+ "eval_steps_per_second": 3.715,
368
+ "step": 8840
369
+ },
370
+ {
371
+ "epoch": 20.0,
372
+ "step": 8840,
373
+ "total_flos": 2.7664555180032e+16,
374
+ "train_loss": 0.09870077279897836,
375
+ "train_runtime": 270.9621,
376
+ "train_samples_per_second": 260.627,
377
+ "train_steps_per_second": 32.624
378
  }
379
  ],
380
  "logging_steps": 500,
381
+ "max_steps": 8840,
382
  "num_input_tokens_seen": 0,
383
  "num_train_epochs": 20,
384
  "save_steps": 500,
 
403
  "attributes": {}
404
  }
405
  },
406
+ "total_flos": 2.7664555180032e+16,
407
  "train_batch_size": 8,
408
  "trial_name": null,
409
  "trial_params": null