HikasaHana commited on
Commit
81dd747
·
verified ·
1 Parent(s): 9d68399

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01943b36fe2d099072ef77b2811221d172183457a8423ec8462545bbe2d496e3
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa79b65d0ba7529aee0550dc910f9468bf4b0ad08164867f686e834e0e9fd408
3
  size 409103316
run-3/checkpoint-534/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fed076af4c274336a1bbda01e7b9e95519d0bf0a1fe5c7f712970d2aa901f093
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61096730720c51ac6648ccb652d6e8883705568c6be1662245a2d864a0f89450
3
  size 409103316
run-3/checkpoint-534/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2699e5ce5a66cbca0b105d6408b39db28914becd331e717afd744ad8431c462
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01d1170bce624b8918138e8cc63259f0004659314d431d193f9b965250a6158b
3
  size 818327802
run-3/checkpoint-534/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9844fc449bc7d92d10fa8131195695656fc5bae595590761350f9f767121bb98
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a56050495e1938ae70b239ae8ece3c43ac800b58f0a6e02bd7ed6cd1669797d
3
  size 1064
run-3/checkpoint-534/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5784130692481995,
3
  "best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,39 +10,39 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7549295774647887,
14
- "eval_loss": 0.603171169757843,
15
- "eval_runtime": 2.3997,
16
- "eval_samples_per_second": 887.602,
17
- "eval_steps_per_second": 55.84,
18
  "step": 267
19
  },
20
  {
21
  "epoch": 1.87,
22
- "grad_norm": 9.50742244720459,
23
- "learning_rate": 8.206819746059557e-06,
24
- "loss": 0.6057,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.7676056338028169,
30
- "eval_loss": 0.5784130692481995,
31
- "eval_runtime": 2.4488,
32
- "eval_samples_per_second": 869.828,
33
- "eval_steps_per_second": 54.722,
34
  "step": 534
35
  }
36
  ],
37
  "logging_steps": 500,
38
- "max_steps": 1068,
39
  "num_input_tokens_seen": 0,
40
- "num_train_epochs": 4,
41
  "save_steps": 500,
42
  "total_flos": 349221394826640.0,
43
  "train_batch_size": 32,
44
  "trial_name": null,
45
  "trial_params": {
46
- "num_train_epochs": 4
47
  }
48
  }
 
1
  {
2
+ "best_metric": 0.6037538051605225,
3
  "best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7530516431924883,
14
+ "eval_loss": 0.6065873503684998,
15
+ "eval_runtime": 2.5921,
16
+ "eval_samples_per_second": 821.724,
17
+ "eval_steps_per_second": 51.695,
18
  "step": 267
19
  },
20
  {
21
  "epoch": 1.87,
22
+ "grad_norm": 9.447236061096191,
23
+ "learning_rate": 9.825065893169892e-07,
24
+ "loss": 0.6083,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.7615023474178404,
30
+ "eval_loss": 0.6037538051605225,
31
+ "eval_runtime": 2.5455,
32
+ "eval_samples_per_second": 836.779,
33
+ "eval_steps_per_second": 52.642,
34
  "step": 534
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 534,
39
  "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 2,
41
  "save_steps": 500,
42
  "total_flos": 349221394826640.0,
43
  "train_batch_size": 32,
44
  "trial_name": null,
45
  "trial_params": {
46
+ "weight_decay": 0.021167311357718124
47
  }
48
  }
run-3/checkpoint-534/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8c637656beacaec83a46c1d786535828a5c615851f4d21b0069fcadd723fa83
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1238f3e9130ff13042dcb9e0ded88b87b25d59c0eb7bc889417ea846582c3c9b
3
  size 4856
run-4/checkpoint-267/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:709e0174af7e7bfc36b2ca42177cb07663d435c4830e3ef2fd95156cc7684138
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0dc6ea72694ff54a88f458db9aff463c496bd6d83166f6fb57a4e51c5bb172
3
  size 409103316
run-4/checkpoint-267/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ef4431d2c45c5021200b1e749d9a863141163026b276944e492c1db9ee3f59
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5921b1879136a2de0f32be39d0aa1e2c1df44b7767609fbb8ab7984ca270865
3
  size 818327802
run-4/checkpoint-267/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c8fc955f6ab66538b705aa40f6ebb08474ec445464a7a19ca29971b9fc42d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3be8be9037b2d6a9348611f95c19fb1d17ca7cb1da92486df22b7e0a3c655a
3
  size 1064
run-4/checkpoint-267/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6081846952438354,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-267",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,23 +10,23 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7577464788732394,
14
- "eval_loss": 0.6081846952438354,
15
- "eval_runtime": 2.4392,
16
- "eval_samples_per_second": 873.231,
17
- "eval_steps_per_second": 54.936,
18
  "step": 267
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 1335,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 5,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
  "train_batch_size": 32,
28
  "trial_name": null,
29
  "trial_params": {
30
- "num_train_epochs": 5
31
  }
32
  }
 
1
  {
2
+ "best_metric": 0.6065725684165955,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-267",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7530516431924883,
14
+ "eval_loss": 0.6065725684165955,
15
+ "eval_runtime": 2.5417,
16
+ "eval_samples_per_second": 838.036,
17
+ "eval_steps_per_second": 52.722,
18
  "step": 267
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 534,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
  "train_batch_size": 32,
28
  "trial_name": null,
29
  "trial_params": {
30
+ "weight_decay": 4.018191065683781e-05
31
  }
32
  }
run-4/checkpoint-267/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42e5e5ec734e47895bff309ce0f98b97c457fc4dc71b42d53c94016f0c7a855
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f2b8ff5812e9a8e681e14d8d4a502beb7efad66683c16ee39765361b35b768
3
  size 4856
run-4/checkpoint-534/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:976557a36bef1a4a492485f3f6cb8948205b7b3a9ea05ce8ce0eb8982fcc1d9e
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5b3ada415323df4681659f8a3c19121e9379a2b0c7922cade957c3cc9526017
3
  size 409103316
run-4/checkpoint-534/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:890b6f5cc14af75c85002de6e2745609f1bf02f4a33a71cd0af847d31243fe8a
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:492785e0b2d4bc2fddbf03fc7aa5070a9d5992be16a4b8b568d67b7ad8b49cd5
3
  size 818327802
run-4/checkpoint-534/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e2aa06609570d504e0cfaf6c3bd801e1681706921a7512c6ccb9eded784901f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a56050495e1938ae70b239ae8ece3c43ac800b58f0a6e02bd7ed6cd1669797d
3
  size 1064
run-4/checkpoint-534/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5776590704917908,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,39 +10,39 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7577464788732394,
14
- "eval_loss": 0.6081846952438354,
15
- "eval_runtime": 2.4392,
16
- "eval_samples_per_second": 873.231,
17
- "eval_steps_per_second": 54.936,
18
  "step": 267
19
  },
20
  {
21
  "epoch": 1.87,
22
- "grad_norm": 9.532360076904297,
23
- "learning_rate": 9.65168237740807e-06,
24
- "loss": 0.6057,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.7657276995305164,
30
- "eval_loss": 0.5776590704917908,
31
- "eval_runtime": 2.4594,
32
- "eval_samples_per_second": 866.069,
33
- "eval_steps_per_second": 54.485,
34
  "step": 534
35
  }
36
  ],
37
  "logging_steps": 500,
38
- "max_steps": 1335,
39
  "num_input_tokens_seen": 0,
40
- "num_train_epochs": 5,
41
  "save_steps": 500,
42
  "total_flos": 349221394826640.0,
43
  "train_batch_size": 32,
44
  "trial_name": null,
45
  "trial_params": {
46
- "num_train_epochs": 5
47
  }
48
  }
 
1
  {
2
+ "best_metric": 0.6037774682044983,
3
  "best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7530516431924883,
14
+ "eval_loss": 0.6065725684165955,
15
+ "eval_runtime": 2.5417,
16
+ "eval_samples_per_second": 838.036,
17
+ "eval_steps_per_second": 52.722,
18
  "step": 267
19
  },
20
  {
21
  "epoch": 1.87,
22
+ "grad_norm": 9.447751998901367,
23
+ "learning_rate": 9.825065893169892e-07,
24
+ "loss": 0.6083,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.7615023474178404,
30
+ "eval_loss": 0.6037774682044983,
31
+ "eval_runtime": 2.5779,
32
+ "eval_samples_per_second": 826.263,
33
+ "eval_steps_per_second": 51.981,
34
  "step": 534
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 534,
39
  "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 2,
41
  "save_steps": 500,
42
  "total_flos": 349221394826640.0,
43
  "train_batch_size": 32,
44
  "trial_name": null,
45
  "trial_params": {
46
+ "weight_decay": 4.018191065683781e-05
47
  }
48
  }
run-4/checkpoint-534/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42e5e5ec734e47895bff309ce0f98b97c457fc4dc71b42d53c94016f0c7a855
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f2b8ff5812e9a8e681e14d8d4a502beb7efad66683c16ee39765361b35b768
3
  size 4856
run-5/checkpoint-267/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:971a3ddf817b712dc07195c844772ef5d1ec4a8fd2faa3cfc511b714130228d0
3
  size 409103316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa79b65d0ba7529aee0550dc910f9468bf4b0ad08164867f686e834e0e9fd408
3
  size 409103316
run-5/checkpoint-267/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8b54fc5e8c9423b8eb78032472c72dce9ff03db39846d1622f31a3d6911c46
3
  size 818327802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47a133704ee5d893b30a7dc35c9b1d607c0d15dd4e32e8a90cf3cdd87699884
3
  size 818327802
run-5/checkpoint-267/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d605219d0cdfe1356d19dfc261c4f045664dbc44ddf04517d0b2ebd1dab264c7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3be8be9037b2d6a9348611f95c19fb1d17ca7cb1da92486df22b7e0a3c655a
3
  size 1064
run-5/checkpoint-267/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6019130349159241,
3
  "best_model_checkpoint": "BERT-WMM/run-5/checkpoint-267",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,23 +10,23 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.7563380281690141,
14
- "eval_loss": 0.6019130349159241,
15
- "eval_runtime": 2.4929,
16
- "eval_samples_per_second": 854.419,
17
- "eval_steps_per_second": 53.752,
18
  "step": 267
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 801,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 3,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
  "train_batch_size": 32,
28
  "trial_name": null,
29
  "trial_params": {
30
- "per_device_train_batch_size": 32
31
  }
32
  }
 
1
  {
2
+ "best_metric": 0.6066888570785522,
3
  "best_model_checkpoint": "BERT-WMM/run-5/checkpoint-267",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7539906103286385,
14
+ "eval_loss": 0.6066888570785522,
15
+ "eval_runtime": 2.5768,
16
+ "eval_samples_per_second": 826.614,
17
+ "eval_steps_per_second": 52.003,
18
  "step": 267
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 534,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
  "train_batch_size": 32,
28
  "trial_name": null,
29
  "trial_params": {
30
+ "weight_decay": 0.08935453780478574
31
  }
32
  }
run-5/checkpoint-267/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cc12ec9008fe3ca8c11132361abe51e0525170497895a06dfb1609f916fd77f
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57316b4d399893de9d059984f406888c25d2a6d5ffb20848fef7218735cf9bd
3
  size 4856
runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522305.dcdaa3e6ec43.789.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5908acf12b09c9364f37fdcc1828450a70123a37e71027bfb4acc38d127db0d0
3
- size 5233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5988cc2775814fe44975831f32c32caf7771d6edd2d5bc5052cdfabafbbe56d1
3
+ size 6121
runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522401.dcdaa3e6ec43.789.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba934ae41c63dd7574183a022397f725e91e4705deaa7cfb417b01fd8f39be9
3
+ size 6122
runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522502.dcdaa3e6ec43.789.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb70d05343967559454757ce9e9c2dca96ac89f196cec4d89f868a35bc3f8c21
3
+ size 5766
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1238f3e9130ff13042dcb9e0ded88b87b25d59c0eb7bc889417ea846582c3c9b
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57316b4d399893de9d059984f406888c25d2a6d5ffb20848fef7218735cf9bd
3
  size 4856