mekjr1 commited on
Commit
d870ccc
1 Parent(s): 0151eb2

End of training

Browse files
all_results.json CHANGED
@@ -3,21 +3,21 @@
3
  "eval_bleu": 0.0,
4
  "eval_gen_len": 9.0,
5
  "eval_loss": 0.734420120716095,
6
- "eval_runtime": 13.5697,
7
  "eval_samples": 793,
8
- "eval_samples_per_second": 58.439,
9
- "eval_steps_per_second": 3.685,
10
  "predict_bleu": 0.0,
11
  "predict_gen_len": 9.0,
12
  "predict_loss": 0.7520648837089539,
13
- "predict_runtime": 13.5032,
14
  "predict_samples": 794,
15
- "predict_samples_per_second": 58.801,
16
- "predict_steps_per_second": 3.703,
17
  "total_flos": 2212105531392000.0,
18
  "train_loss": 1.0583241114388184,
19
- "train_runtime": 487.1376,
20
  "train_samples": 6345,
21
- "train_samples_per_second": 130.251,
22
- "train_steps_per_second": 8.15
23
  }
 
3
  "eval_bleu": 0.0,
4
  "eval_gen_len": 9.0,
5
  "eval_loss": 0.734420120716095,
6
+ "eval_runtime": 14.2391,
7
  "eval_samples": 793,
8
+ "eval_samples_per_second": 55.692,
9
+ "eval_steps_per_second": 3.511,
10
  "predict_bleu": 0.0,
11
  "predict_gen_len": 9.0,
12
  "predict_loss": 0.7520648837089539,
13
+ "predict_runtime": 14.2551,
14
  "predict_samples": 794,
15
+ "predict_samples_per_second": 55.699,
16
+ "predict_steps_per_second": 3.508,
17
  "total_flos": 2212105531392000.0,
18
  "train_loss": 1.0583241114388184,
19
+ "train_runtime": 517.4021,
20
  "train_samples": 6345,
21
+ "train_samples_per_second": 122.632,
22
+ "train_steps_per_second": 7.673
23
  }
eval_results.json CHANGED
@@ -3,8 +3,8 @@
3
  "eval_bleu": 0.0,
4
  "eval_gen_len": 9.0,
5
  "eval_loss": 0.734420120716095,
6
- "eval_runtime": 13.5697,
7
  "eval_samples": 793,
8
- "eval_samples_per_second": 58.439,
9
- "eval_steps_per_second": 3.685
10
  }
 
3
  "eval_bleu": 0.0,
4
  "eval_gen_len": 9.0,
5
  "eval_loss": 0.734420120716095,
6
+ "eval_runtime": 14.2391,
7
  "eval_samples": 793,
8
+ "eval_samples_per_second": 55.692,
9
+ "eval_steps_per_second": 3.511
10
  }
generation_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
 
1
  {
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
predict_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "predict_bleu": 0.0,
3
  "predict_gen_len": 9.0,
4
  "predict_loss": 0.7520648837089539,
5
- "predict_runtime": 13.5032,
6
  "predict_samples": 794,
7
- "predict_samples_per_second": 58.801,
8
- "predict_steps_per_second": 3.703
9
  }
 
2
  "predict_bleu": 0.0,
3
  "predict_gen_len": 9.0,
4
  "predict_loss": 0.7520648837089539,
5
+ "predict_runtime": 14.2551,
6
  "predict_samples": 794,
7
+ "predict_samples_per_second": 55.699,
8
+ "predict_steps_per_second": 3.508
9
  }
runs/Apr12_23-41-18_ea8d7db94b39/events.out.tfevents.1681343939.ea8d7db94b39.1070.17 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d69f7ef0b101b6cf3e5b7dcc9b40f13edf5f39fe8a7a5b99e1a16384118ee4d0
3
+ size 458
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 10.0,
3
  "total_flos": 2212105531392000.0,
4
  "train_loss": 1.0583241114388184,
5
- "train_runtime": 487.1376,
6
  "train_samples": 6345,
7
- "train_samples_per_second": 130.251,
8
- "train_steps_per_second": 8.15
9
  }
 
2
  "epoch": 10.0,
3
  "total_flos": 2212105531392000.0,
4
  "train_loss": 1.0583241114388184,
5
+ "train_runtime": 517.4021,
6
  "train_samples": 6345,
7
+ "train_samples_per_second": 122.632,
8
+ "train_steps_per_second": 7.673
9
  }
trainer_state.json CHANGED
@@ -54,9 +54,9 @@
54
  "step": 3970,
55
  "total_flos": 2212105531392000.0,
56
  "train_loss": 1.0583241114388184,
57
- "train_runtime": 487.1376,
58
- "train_samples_per_second": 130.251,
59
- "train_steps_per_second": 8.15
60
  }
61
  ],
62
  "max_steps": 3970,
 
54
  "step": 3970,
55
  "total_flos": 2212105531392000.0,
56
  "train_loss": 1.0583241114388184,
57
+ "train_runtime": 517.4021,
58
+ "train_samples_per_second": 122.632,
59
+ "train_steps_per_second": 7.673
60
  }
61
  ],
62
  "max_steps": 3970,