dq158 commited on
Commit
927cc30
1 Parent(s): c7656e4

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eaa51169fb5f5f33b328f26090dcd19a47bde0a9efe64e78856dbbe04a07e7a
3
- size 888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9389791a3d0c9a79fbca9528f0f75ee24f377d83500dbcb49237253a3e3582
3
+ size 18980874
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:904b02e1e6c26215ea888d3f8f6d7aafb77206ed1937def3937a940637da0f1c
3
- size 2372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026c40db959a890a16600f2337a05279f713e0057d5184b566a36aa8e88d4325
3
+ size 37990394
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07f9db347331ffb79c2e2cf5b87f5ee3885eb3ce501d58242d83ec8e3cae758d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f8922c352ab63d1593bcb944e77d461472df192dcdc1202984e9e08f16a111
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae3274533ddd7159e50e851656ea6e92a09b25d95e547026de312a25757234d5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf928e904a1da7eec5bc881ec510beaeb0bdf78d28158a3fd93dd9f7b26f2b8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,100 +1,110 @@
1
  {
2
- "best_metric": 2.2401010990142822,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-2371",
4
- "epoch": 2.9993674889310564,
5
  "eval_steps": 500,
6
- "global_step": 2371,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.63,
13
- "learning_rate": 8.49777221180502e-05,
14
- "loss": 2.8794,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 1.0,
19
- "eval_bleu": 1.0,
20
- "eval_brevity_penalty": 1.0,
21
- "eval_length_ratio": 1.0,
22
- "eval_loss": 2.392251491546631,
23
- "eval_precisions": [
24
- 1.0,
25
- 1.0,
26
- 1.0,
27
- 1.0
28
- ],
29
- "eval_reference_length": 53098,
30
- "eval_runtime": 1401.0268,
31
- "eval_samples_per_second": 2.006,
32
- "eval_steps_per_second": 0.126,
33
- "eval_translation_length": 53098,
34
- "step": 790
35
- },
36
- {
37
- "epoch": 1.27,
38
- "learning_rate": 9.445572420019074e-05,
39
- "loss": 2.5652,
40
  "step": 1000
41
  },
42
  {
43
- "epoch": 1.9,
44
- "learning_rate": 9.999999999999999e-05,
45
- "loss": 2.468,
46
  "step": 1500
47
  },
48
  {
49
- "epoch": 2.0,
50
- "eval_bleu": 1.0,
51
- "eval_brevity_penalty": 1.0,
52
- "eval_length_ratio": 1.0,
53
- "eval_loss": 2.288722038269043,
54
- "eval_precisions": [
55
- 1.0,
56
- 1.0,
57
- 1.0,
58
- 1.0
59
- ],
60
- "eval_reference_length": 53391,
61
- "eval_runtime": 1340.9293,
62
- "eval_samples_per_second": 2.096,
63
- "eval_steps_per_second": 0.131,
64
- "eval_translation_length": 53391,
65
- "step": 1581
66
  },
67
  {
68
- "epoch": 2.53,
69
- "learning_rate": 0.0001,
70
- "loss": 2.4165,
71
- "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  },
73
  {
74
- "epoch": 3.0,
 
 
 
 
 
 
 
 
 
 
 
 
75
  "eval_bleu": 1.0,
76
  "eval_brevity_penalty": 1.0,
77
  "eval_length_ratio": 1.0,
78
- "eval_loss": 2.2401010990142822,
79
  "eval_precisions": [
80
  1.0,
81
  1.0,
82
  1.0,
83
  1.0
84
  ],
85
- "eval_reference_length": 53401,
86
- "eval_runtime": 1342.5303,
87
- "eval_samples_per_second": 2.094,
88
- "eval_steps_per_second": 0.131,
89
- "eval_translation_length": 53401,
90
- "step": 2371
91
  }
92
  ],
93
  "logging_steps": 500,
94
- "max_steps": 23700,
95
  "num_train_epochs": 30,
96
  "save_steps": 500,
97
- "total_flos": 1158039818207232.0,
98
  "trial_name": null,
99
  "trial_params": null
100
  }
 
1
  {
2
+ "best_metric": 2.4926838874816895,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 6323,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.08,
13
+ "learning_rate": 0.0003333333333333333,
14
+ "loss": 3.14,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.16,
19
+ "learning_rate": 0.0006666666666666666,
20
+ "loss": 2.6622,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.24,
25
+ "learning_rate": 0.001,
26
+ "loss": 2.7054,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 0.32,
31
+ "learning_rate": 0.0009999825825666724,
32
+ "loss": 2.6612,
33
+ "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  },
35
  {
36
+ "epoch": 0.4,
37
+ "learning_rate": 0.0009999303314801573,
38
+ "loss": 2.612,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.47,
43
+ "learning_rate": 0.000999843250380774,
44
+ "loss": 2.6543,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.55,
49
+ "learning_rate": 0.0009997213453354398,
50
+ "loss": 2.6882,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.63,
55
+ "learning_rate": 0.000999564624837246,
56
+ "loss": 2.6739,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.71,
61
+ "learning_rate": 0.0009993730998048686,
62
+ "loss": 2.6112,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.79,
67
+ "learning_rate": 0.0009991467835818054,
68
+ "loss": 2.5963,
69
+ "step": 5000
70
  },
71
  {
72
+ "epoch": 0.87,
73
+ "learning_rate": 0.000998885691935447,
74
+ "loss": 2.5945,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.95,
79
+ "learning_rate": 0.000998589843055979,
80
+ "loss": 2.5457,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 1.0,
85
  "eval_bleu": 1.0,
86
  "eval_brevity_penalty": 1.0,
87
  "eval_length_ratio": 1.0,
88
+ "eval_loss": 2.4926838874816895,
89
  "eval_precisions": [
90
  1.0,
91
  1.0,
92
  1.0,
93
  1.0
94
  ],
95
+ "eval_reference_length": 50598,
96
+ "eval_runtime": 1532.5219,
97
+ "eval_samples_per_second": 1.834,
98
+ "eval_steps_per_second": 0.917,
99
+ "eval_translation_length": 50598,
100
+ "step": 6323
101
  }
102
  ],
103
  "logging_steps": 500,
104
+ "max_steps": 189690,
105
  "num_train_epochs": 30,
106
  "save_steps": 500,
107
+ "total_flos": 2.1666322696686797e+17,
108
  "trial_name": null,
109
  "trial_params": null
110
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7619c25d1f961a939120b81d521c14f9d13e3f5fcb5ceba87a0c9e7d8c62891c
3
- size 6648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6b5c798b857e4f110f443e0cb3a626e3d708db5518d90ee31d9206407bec991
3
+ size 4664