navjordj commited on
Commit
3a19d11
1 Parent(s): e861a61

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +7 -7
  3. train_results.json +5 -5
  4. trainer_state.json +32 -14
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_bleu": 3.1263,
4
- "eval_gen_len": 101.3243,
5
- "eval_loss": 2.2739031314849854,
6
- "eval_runtime": 177.4487,
7
  "eval_samples": 12422,
8
- "eval_samples_per_second": 70.003,
9
- "eval_steps_per_second": 0.552,
10
- "train_loss": 2.8308688191229425,
11
- "train_runtime": 860.89,
12
  "train_samples": 62107,
13
- "train_samples_per_second": 216.428,
14
- "train_steps_per_second": 0.847
15
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_bleu": 10.9236,
4
+ "eval_gen_len": 79.2493,
5
+ "eval_loss": 1.7087136507034302,
6
+ "eval_runtime": 177.7595,
7
  "eval_samples": 12422,
8
+ "eval_samples_per_second": 69.881,
9
+ "eval_steps_per_second": 0.551,
10
+ "train_loss": 2.27133990001286,
11
+ "train_runtime": 2909.2317,
12
  "train_samples": 62107,
13
+ "train_samples_per_second": 213.482,
14
+ "train_steps_per_second": 0.835
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_bleu": 3.1263,
4
- "eval_gen_len": 101.3243,
5
- "eval_loss": 2.2739031314849854,
6
- "eval_runtime": 177.4487,
7
  "eval_samples": 12422,
8
- "eval_samples_per_second": 70.003,
9
- "eval_steps_per_second": 0.552
10
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_bleu": 10.9236,
4
+ "eval_gen_len": 79.2493,
5
+ "eval_loss": 1.7087136507034302,
6
+ "eval_runtime": 177.7595,
7
  "eval_samples": 12422,
8
+ "eval_samples_per_second": 69.881,
9
+ "eval_steps_per_second": 0.551
10
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 2.8308688191229425,
4
- "train_runtime": 860.89,
5
  "train_samples": 62107,
6
- "train_samples_per_second": 216.428,
7
- "train_steps_per_second": 0.847
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 2.27133990001286,
4
+ "train_runtime": 2909.2317,
5
  "train_samples": 62107,
6
+ "train_samples_per_second": 213.482,
7
+ "train_steps_per_second": 0.835
8
  }
trainer_state.json CHANGED
@@ -1,31 +1,49 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 729,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 2.06,
12
- "learning_rate": 1.570644718792867e-05,
13
- "loss": 2.9439,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 3.0,
18
- "step": 729,
19
- "total_flos": 6806063508836352.0,
20
- "train_loss": 2.8308688191229425,
21
- "train_runtime": 860.89,
22
- "train_samples_per_second": 216.428,
23
- "train_steps_per_second": 0.847
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
- "max_steps": 729,
27
- "num_train_epochs": 3,
28
- "total_flos": 6806063508836352.0,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 2430,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 2.06,
12
+ "learning_rate": 3.971193415637861e-05,
13
+ "loss": 2.8803,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 4.12,
18
+ "learning_rate": 2.9423868312757202e-05,
19
+ "loss": 2.3089,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 6.17,
24
+ "learning_rate": 1.91358024691358e-05,
25
+ "loss": 2.116,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 8.23,
30
+ "learning_rate": 8.847736625514404e-06,
31
+ "loss": 2.0249,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 10.0,
36
+ "step": 2430,
37
+ "total_flos": 2.2677509529759744e+16,
38
+ "train_loss": 2.27133990001286,
39
+ "train_runtime": 2909.2317,
40
+ "train_samples_per_second": 213.482,
41
+ "train_steps_per_second": 0.835
42
  }
43
  ],
44
+ "max_steps": 2430,
45
+ "num_train_epochs": 10,
46
+ "total_flos": 2.2677509529759744e+16,
47
  "trial_name": null,
48
  "trial_params": null
49
  }