daydrill commited on
Commit
88625f1
1 Parent(s): d559279

Training in progress, step 5000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20ab356fa18f110500a5c92dc47dd1e4511f4ece5e195bf6f39801135def9e7a
3
  size 943333453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2815fe401ec6ed13fa8496b8a130981165e9740f25a9aa0f5046eb298d76e89d
3
  size 943333453
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1aace261b8e6cad923ee6661d1c7271017bc8d22f018c02ff43351ce6bc4e31
3
  size 471708325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d467d75f3fdef469e5bc56ca1fac1d83e9042d52441105a2d20418fb1a0436
3
  size 471708325
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bc19aba973831004a77f20c88b8e3e066a8a55403bc0ad8b246efb14f309ecd
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2124064e0e4e5ab159e152ae1f2b839ea342c8916f3940e7326b5af6d56d60f7
3
+ size 14503
last-checkpoint/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3fad2f6fce6a53bcc536267fb713fc98b77d91952cd1de74b585c2aab05034
3
+ size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:586f9b073107d2287ab918b8e3a6c9ec40a7767baa6b65a691f978a7a7ba2ab5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:856d2b6c50571997ea86916bdeeedeb8e6b238155372c64b4e024237aa1cc790
3
  size 623
last-checkpoint/tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 4096,
6
  "strategy": "OnlySecond",
7
  "stride": 128
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 4096
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 2048,
6
  "strategy": "OnlySecond",
7
  "stride": 128
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 2048
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
last-checkpoint/trainer_state.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.008156606851549755,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
- "learning_rate": 2.9918433931484505e-05,
13
- "loss": 3.6749,
14
- "step": 500
15
  },
16
  {
17
- "epoch": 0.01,
18
- "eval_exact_match": 25.56077203964528,
19
- "eval_f1": 30.48757258245567,
20
- "eval_loss": 2.4391441345214844,
21
- "eval_runtime": 1683.1549,
22
- "eval_samples_per_second": 6.834,
23
- "eval_steps_per_second": 6.834,
24
- "step": 500
25
  }
26
  ],
27
- "max_steps": 183900,
28
  "num_train_epochs": 3,
29
- "total_flos": 1103235145728000.0,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.16288767266093301,
5
+ "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.16,
12
+ "learning_rate": 2.8373729476153247e-05,
13
+ "loss": 1.6126,
14
+ "step": 5000
15
  },
16
  {
17
+ "epoch": 0.16,
18
+ "eval_exact_match": 68.15336463223787,
19
+ "eval_f1": 73.98256617857639,
20
+ "eval_loss": 1.189887523651123,
21
+ "eval_runtime": 1542.7266,
22
+ "eval_samples_per_second": 7.463,
23
+ "eval_steps_per_second": 3.732,
24
+ "step": 5000
25
  }
26
  ],
27
+ "max_steps": 92088,
28
  "num_train_epochs": 3,
29
+ "total_flos": 1.103235145728e+16,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce3df7dacb746d38927d7b4b5c96e23b1180109e65ab3c5eda16560beab01da4
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a307dd73542378086e35083774070bcc9254069583647323f4d6017ba44c0d
3
  size 3439
nbest_predictions.json CHANGED
The diff for this file is too large to render. See raw diff
 
predictions.json CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1aace261b8e6cad923ee6661d1c7271017bc8d22f018c02ff43351ce6bc4e31
3
  size 471708325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d467d75f3fdef469e5bc56ca1fac1d83e9042d52441105a2d20418fb1a0436
3
  size 471708325
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 4096,
6
  "strategy": "OnlySecond",
7
  "stride": 128
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 4096
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 2048,
6
  "strategy": "OnlySecond",
7
  "stride": 128
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 2048
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce3df7dacb746d38927d7b4b5c96e23b1180109e65ab3c5eda16560beab01da4
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a307dd73542378086e35083774070bcc9254069583647323f4d6017ba44c0d
3
  size 3439