maesneako commited on
Commit
ff69ed6
1 Parent(s): f576a03

Training in progress, step 4000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:047d719f73cd36ac9f58a2203fa5e0c98b8b1d4370508542a1a633fb075d3753
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c18ec9ae19fb67dde669cb881dc949405131436f9cc1761e33a79f52d52e874a
3
  size 995604017
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d90bbdc010d82ccf4c49b054ebb3c8c9600f1f85cdd8548e8b1cf667068e70a
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42fe0c2ec41d656453cec73abd968b21f89daa58209662a04075026845f4c5d
3
  size 510396521
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d28830c94d92b6d19326a0184936f2862087877e8af1a427622e66aefb96fa
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430f1248620a7a9f128f21dc3d873f638e60a994e72fbe17888d55a4b3b61863
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:360bfe5d7b0b6d589270f3932ba49712e88c4c1d12dc82eb69c2d144a105fa45
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94329f00f4cdc9834d8c689e9e9178c26b5c51fa127b64c75940601b3f9f205
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 4.06580114364624,
3
- "best_model_checkpoint": "./ES_corlec/checkpoint-2000",
4
- "epoch": 0.3965107057890563,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -38,11 +38,43 @@
38
  "eval_samples_per_second": 42.075,
39
  "eval_steps_per_second": 2.631,
40
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "max_steps": 35308,
44
  "num_train_epochs": 7,
45
- "total_flos": 2449612800000000.0,
46
  "trial_name": null,
47
  "trial_params": null
48
  }
 
1
  {
2
+ "best_metric": 4.000585079193115,
3
+ "best_model_checkpoint": "./ES_corlec/checkpoint-4000",
4
+ "epoch": 0.7930214115781126,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
38
  "eval_samples_per_second": 42.075,
39
  "eval_steps_per_second": 2.631,
40
  "step": 2000
41
+ },
42
+ {
43
+ "epoch": 0.5,
44
+ "learning_rate": 9.344878660134443e-07,
45
+ "loss": 4.0586,
46
+ "step": 2500
47
+ },
48
+ {
49
+ "epoch": 0.59,
50
+ "learning_rate": 9.202460977554973e-07,
51
+ "loss": 4.022,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 0.69,
56
+ "learning_rate": 9.060043294975503e-07,
57
+ "loss": 4.0038,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.79,
62
+ "learning_rate": 8.917625612396035e-07,
63
+ "loss": 3.9852,
64
+ "step": 4000
65
+ },
66
+ {
67
+ "epoch": 0.79,
68
+ "eval_loss": 4.000585079193115,
69
+ "eval_runtime": 962.3175,
70
+ "eval_samples_per_second": 42.066,
71
+ "eval_steps_per_second": 2.63,
72
+ "step": 4000
73
  }
74
  ],
75
  "max_steps": 35308,
76
  "num_train_epochs": 7,
77
+ "total_flos": 4899225600000000.0,
78
  "trial_name": null,
79
  "trial_params": null
80
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d90bbdc010d82ccf4c49b054ebb3c8c9600f1f85cdd8548e8b1cf667068e70a
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42fe0c2ec41d656453cec73abd968b21f89daa58209662a04075026845f4c5d
3
  size 510396521
stderr.slurm CHANGED
The diff for this file is too large to render. See raw diff
 
stdout.slurm CHANGED
@@ -2,3 +2,8 @@
2
  {'loss': 4.1911, 'learning_rate': 9.77213170787285e-07, 'epoch': 0.2}
3
  {'loss': 4.1368, 'learning_rate': 9.62971402529338e-07, 'epoch': 0.3}
4
  {'loss': 4.0957, 'learning_rate': 9.487296342713911e-07, 'epoch': 0.4}
 
 
 
 
 
 
2
  {'loss': 4.1911, 'learning_rate': 9.77213170787285e-07, 'epoch': 0.2}
3
  {'loss': 4.1368, 'learning_rate': 9.62971402529338e-07, 'epoch': 0.3}
4
  {'loss': 4.0957, 'learning_rate': 9.487296342713911e-07, 'epoch': 0.4}
5
+ {'eval_loss': 4.06580114364624, 'eval_runtime': 962.1135, 'eval_samples_per_second': 42.075, 'eval_steps_per_second': 2.631, 'epoch': 0.4}
6
+ {'loss': 4.0586, 'learning_rate': 9.344878660134443e-07, 'epoch': 0.5}
7
+ {'loss': 4.022, 'learning_rate': 9.202460977554973e-07, 'epoch': 0.59}
8
+ {'loss': 4.0038, 'learning_rate': 9.060043294975503e-07, 'epoch': 0.69}
9
+ {'loss': 3.9852, 'learning_rate': 8.917625612396035e-07, 'epoch': 0.79}