anas-awadalla commited on
Commit
54b0145
1 Parent(s): 1c795fb

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_exact_match": 73.6802270577105,
4
- "eval_f1": 83.81913172157984,
5
  "eval_samples": 10790,
6
- "train_loss": 0.12154395081276118,
7
- "train_runtime": 262.3624,
8
  "train_samples": 1024,
9
- "train_samples_per_second": 78.06,
10
- "train_steps_per_second": 6.556
11
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_exact_match": 69.46073793755913,
4
+ "eval_f1": 81.37817461204958,
5
  "eval_samples": 10790,
6
+ "train_loss": 2.5731575012207033,
7
+ "train_runtime": 296.5753,
8
  "train_samples": 1024,
9
+ "train_samples_per_second": 69.055,
10
+ "train_steps_per_second": 1.079
11
  }
eval_nbest_predictions.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c62aef08bf8f69e0d83cd5da4f847b47690e7c4ca60d1337590ab7ae1644be
3
- size 49075762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d028f460cdc9e6f7c4f5fbd13ca760928d008a2d85be87a3a4f0e850cf79eed5
3
+ size 48796622
eval_predictions.json CHANGED
The diff for this file is too large to render. See raw diff
 
eval_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_exact_match": 73.6802270577105,
4
- "eval_f1": 83.81913172157984,
5
  "eval_samples": 10790
6
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_exact_match": 69.46073793755913,
4
+ "eval_f1": 81.37817461204958,
5
  "eval_samples": 10790
6
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.12154395081276118,
4
- "train_runtime": 262.3624,
5
  "train_samples": 1024,
6
- "train_samples_per_second": 78.06,
7
- "train_steps_per_second": 6.556
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 2.5731575012207033,
4
+ "train_runtime": 296.5753,
5
  "train_samples": 1024,
6
+ "train_samples_per_second": 69.055,
7
+ "train_steps_per_second": 1.079
8
  }
trainer_state.json CHANGED
@@ -2,42 +2,24 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 20.0,
5
- "global_step": 1720,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 5.81,
12
- "learning_rate": 7.881136950904393e-05,
13
- "loss": 2.578,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 11.63,
18
- "learning_rate": 4.651162790697675e-05,
19
- "loss": 0.4809,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 17.44,
24
- "learning_rate": 1.421188630490956e-05,
25
- "loss": 0.3023,
26
- "step": 1500
27
- },
28
  {
29
  "epoch": 20.0,
30
- "step": 1720,
31
- "total_flos": 1.456427177607168e+16,
32
- "train_loss": 0.12154395081276118,
33
- "train_runtime": 262.3624,
34
- "train_samples_per_second": 78.06,
35
- "train_steps_per_second": 6.556
36
  }
37
  ],
38
- "max_steps": 1720,
39
  "num_train_epochs": 20,
40
- "total_flos": 1.456427177607168e+16,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 20.0,
5
+ "global_step": 320,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 20.0,
12
+ "step": 320,
13
+ "total_flos": 1.456427168169984e+16,
14
+ "train_loss": 2.5731575012207033,
15
+ "train_runtime": 296.5753,
16
+ "train_samples_per_second": 69.055,
17
+ "train_steps_per_second": 1.079
18
  }
19
  ],
20
+ "max_steps": 320,
21
  "num_train_epochs": 20,
22
+ "total_flos": 1.456427168169984e+16,
23
  "trial_name": null,
24
  "trial_params": null
25
  }