navjordj commited on
Commit
6ebd099
1 Parent(s): 298b895

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +12 -12
  2. eval_results.json +8 -8
  3. train_results.json +5 -5
  4. trainer_state.json +18 -12
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 0.01,
3
- "eval_bleu": 0.0331,
4
- "eval_gen_len": 124.5707,
5
- "eval_loss": 3.317145347595215,
6
- "eval_runtime": 75.5891,
7
- "eval_samples": 622,
8
- "eval_samples_per_second": 8.229,
9
- "eval_steps_per_second": 1.032,
10
- "train_loss": 3.6924142456054687,
11
- "train_runtime": 9.393,
12
  "train_samples": 62107,
13
- "train_samples_per_second": 85.17,
14
- "train_steps_per_second": 10.646
15
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_bleu": 3.1263,
4
+ "eval_gen_len": 101.3243,
5
+ "eval_loss": 2.2739031314849854,
6
+ "eval_runtime": 177.4487,
7
+ "eval_samples": 12422,
8
+ "eval_samples_per_second": 70.003,
9
+ "eval_steps_per_second": 0.552,
10
+ "train_loss": 2.8308688191229425,
11
+ "train_runtime": 860.89,
12
  "train_samples": 62107,
13
+ "train_samples_per_second": 216.428,
14
+ "train_steps_per_second": 0.847
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 0.01,
3
- "eval_bleu": 0.0331,
4
- "eval_gen_len": 124.5707,
5
- "eval_loss": 3.317145347595215,
6
- "eval_runtime": 75.5891,
7
- "eval_samples": 622,
8
- "eval_samples_per_second": 8.229,
9
- "eval_steps_per_second": 1.032
10
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_bleu": 3.1263,
4
+ "eval_gen_len": 101.3243,
5
+ "eval_loss": 2.2739031314849854,
6
+ "eval_runtime": 177.4487,
7
+ "eval_samples": 12422,
8
+ "eval_samples_per_second": 70.003,
9
+ "eval_steps_per_second": 0.552
10
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.01,
3
- "train_loss": 3.6924142456054687,
4
- "train_runtime": 9.393,
5
  "train_samples": 62107,
6
- "train_samples_per_second": 85.17,
7
- "train_steps_per_second": 10.646
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 2.8308688191229425,
4
+ "train_runtime": 860.89,
5
  "train_samples": 62107,
6
+ "train_samples_per_second": 216.428,
7
+ "train_steps_per_second": 0.847
8
  }
trainer_state.json CHANGED
@@ -1,25 +1,31 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01287995878413189,
5
- "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
- "step": 100,
13
- "total_flos": 19701488959488.0,
14
- "train_loss": 3.6924142456054687,
15
- "train_runtime": 9.393,
16
- "train_samples_per_second": 85.17,
17
- "train_steps_per_second": 10.646
 
 
 
 
 
 
18
  }
19
  ],
20
- "max_steps": 100,
21
- "num_train_epochs": 1,
22
- "total_flos": 19701488959488.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 729,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.06,
12
+ "learning_rate": 1.570644718792867e-05,
13
+ "loss": 2.9439,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 3.0,
18
+ "step": 729,
19
+ "total_flos": 6806063508836352.0,
20
+ "train_loss": 2.8308688191229425,
21
+ "train_runtime": 860.89,
22
+ "train_samples_per_second": 216.428,
23
+ "train_steps_per_second": 0.847
24
  }
25
  ],
26
+ "max_steps": 729,
27
+ "num_train_epochs": 3,
28
+ "total_flos": 6806063508836352.0,
29
  "trial_name": null,
30
  "trial_params": null
31
  }