dq158 commited on
Commit
f1e3bc2
1 Parent(s): 059506a

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/global_step1581/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc367dbb9740751e85fd2958b4867e548bfad92bc52db0c411c1d02943344cf
3
- size 56626640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91eacaa84680aa82b20a3df8182dc93d0ce047ed089f25b4731f846316837cb6
3
+ size 28315088
last-checkpoint/global_step1581/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9f583059aebf5c348baa213b71780e3beb0ed5bea73b3eaaae551a3ee1fa3b6
3
- size 56626640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c297b2e76489c10915e73396d64e91d8ba6c2dd74c95b2df3b59bea16e5b8948
3
+ size 28315088
last-checkpoint/global_step1581/zero_pp_rank_0_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5d9a7306e11004f5b07be5adcf241a253323781fe1b13c03e09f6e9c51b1a4f
3
- size 11136132566
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cfb139e4c8441e6ab6518b594be2262638a6362ae882d8fa66311577c967c5
3
+ size 2850543502
last-checkpoint/global_step1581/zero_pp_rank_1_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a71dd45f808f455f2743152cc5c2c5389f3f01dc02239d02a8986dbc29229ccf
3
- size 11136132374
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a28252787a7c5302116de0d3ed69eaebf466c0d5f045b2ab3ef114b4d7e96000
3
+ size 2850543310
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step790
 
1
+ global_step1581
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16a7aa1548464e6c880ebd42aba4c07ee57fb38feb25979f4c82c0f0bec9f0e6
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08f5222c04ba8a25b70996b406c1e95dc834c73f15f3e7248da94c55fa3d6d3
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38d40d71f540bdd7fdb4af7e756f13010eb6a96ab531b88e35903b066788ad63
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707f46a874ccec694d9a9ebdb8230159a79dc68e5bca12742f90f6e6d892b27e
3
  size 14512
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.392251491546631,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-790",
4
- "epoch": 0.9993674889310563,
5
  "eval_steps": 500,
6
- "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -32,13 +32,44 @@
32
  "eval_steps_per_second": 0.126,
33
  "eval_translation_length": 53098,
34
  "step": 790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
  ],
37
  "logging_steps": 500,
38
  "max_steps": 23700,
39
  "num_train_epochs": 30,
40
  "save_steps": 500,
41
- "total_flos": 385850466631680.0,
42
  "trial_name": null,
43
  "trial_params": null
44
  }
 
1
  {
2
+ "best_metric": 2.288722038269043,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-1581",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 1581,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
32
  "eval_steps_per_second": 0.126,
33
  "eval_translation_length": 53098,
34
  "step": 790
35
+ },
36
+ {
37
+ "epoch": 1.27,
38
+ "learning_rate": 9.445572420019074e-05,
39
+ "loss": 2.5652,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 1.9,
44
+ "learning_rate": 9.999999999999999e-05,
45
+ "loss": 2.468,
46
+ "step": 1500
47
+ },
48
+ {
49
+ "epoch": 2.0,
50
+ "eval_bleu": 1.0,
51
+ "eval_brevity_penalty": 1.0,
52
+ "eval_length_ratio": 1.0,
53
+ "eval_loss": 2.288722038269043,
54
+ "eval_precisions": [
55
+ 1.0,
56
+ 1.0,
57
+ 1.0,
58
+ 1.0
59
+ ],
60
+ "eval_reference_length": 53391,
61
+ "eval_runtime": 1340.9293,
62
+ "eval_samples_per_second": 2.096,
63
+ "eval_steps_per_second": 0.131,
64
+ "eval_translation_length": 53391,
65
+ "step": 1581
66
  }
67
  ],
68
  "logging_steps": 500,
69
  "max_steps": 23700,
70
  "num_train_epochs": 30,
71
  "save_steps": 500,
72
+ "total_flos": 771945142419456.0,
73
  "trial_name": null,
74
  "trial_params": null
75
  }