Training in progress, epoch 1
Browse files- model.safetensors +1 -1
- run-3/checkpoint-534/model.safetensors +1 -1
- run-3/checkpoint-534/optimizer.pt +1 -1
- run-3/checkpoint-534/scheduler.pt +1 -1
- run-3/checkpoint-534/trainer_state.json +17 -17
- run-3/checkpoint-534/training_args.bin +1 -1
- run-4/checkpoint-267/model.safetensors +1 -1
- run-4/checkpoint-267/optimizer.pt +1 -1
- run-4/checkpoint-267/scheduler.pt +1 -1
- run-4/checkpoint-267/trainer_state.json +9 -9
- run-4/checkpoint-267/training_args.bin +1 -1
- run-4/checkpoint-534/model.safetensors +1 -1
- run-4/checkpoint-534/optimizer.pt +1 -1
- run-4/checkpoint-534/scheduler.pt +1 -1
- run-4/checkpoint-534/trainer_state.json +17 -17
- run-4/checkpoint-534/training_args.bin +1 -1
- run-5/checkpoint-267/model.safetensors +1 -1
- run-5/checkpoint-267/optimizer.pt +1 -1
- run-5/checkpoint-267/scheduler.pt +1 -1
- run-5/checkpoint-267/trainer_state.json +9 -9
- run-5/checkpoint-267/training_args.bin +1 -1
- runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522305.dcdaa3e6ec43.789.3 +2 -2
- runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522401.dcdaa3e6ec43.789.4 +3 -0
- runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522502.dcdaa3e6ec43.789.5 +3 -0
- training_args.bin +1 -1
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa79b65d0ba7529aee0550dc910f9468bf4b0ad08164867f686e834e0e9fd408
|
3 |
size 409103316
|
run-3/checkpoint-534/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61096730720c51ac6648ccb652d6e8883705568c6be1662245a2d864a0f89450
|
3 |
size 409103316
|
run-3/checkpoint-534/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01d1170bce624b8918138e8cc63259f0004659314d431d193f9b965250a6158b
|
3 |
size 818327802
|
run-3/checkpoint-534/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a56050495e1938ae70b239ae8ece3c43ac800b58f0a6e02bd7ed6cd1669797d
|
3 |
size 1064
|
run-3/checkpoint-534/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
@@ -10,39 +10,39 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm": 9.
|
23 |
-
"learning_rate":
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second":
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
-
"max_steps":
|
39 |
"num_input_tokens_seen": 0,
|
40 |
-
"num_train_epochs":
|
41 |
"save_steps": 500,
|
42 |
"total_flos": 349221394826640.0,
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
-
"
|
47 |
}
|
48 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6037538051605225,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7530516431924883,
|
14 |
+
"eval_loss": 0.6065873503684998,
|
15 |
+
"eval_runtime": 2.5921,
|
16 |
+
"eval_samples_per_second": 821.724,
|
17 |
+
"eval_steps_per_second": 51.695,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.447236061096191,
|
23 |
+
"learning_rate": 9.825065893169892e-07,
|
24 |
+
"loss": 0.6083,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7615023474178404,
|
30 |
+
"eval_loss": 0.6037538051605225,
|
31 |
+
"eval_runtime": 2.5455,
|
32 |
+
"eval_samples_per_second": 836.779,
|
33 |
+
"eval_steps_per_second": 52.642,
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
+
"max_steps": 534,
|
39 |
"num_input_tokens_seen": 0,
|
40 |
+
"num_train_epochs": 2,
|
41 |
"save_steps": 500,
|
42 |
"total_flos": 349221394826640.0,
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
+
"weight_decay": 0.021167311357718124
|
47 |
}
|
48 |
}
|
run-3/checkpoint-534/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1238f3e9130ff13042dcb9e0ded88b87b25d59c0eb7bc889417ea846582c3c9b
|
3 |
size 4856
|
run-4/checkpoint-267/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0dc6ea72694ff54a88f458db9aff463c496bd6d83166f6fb57a4e51c5bb172
|
3 |
size 409103316
|
run-4/checkpoint-267/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5921b1879136a2de0f32be39d0aa1e2c1df44b7767609fbb8ab7984ca270865
|
3 |
size 818327802
|
run-4/checkpoint-267/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef3be8be9037b2d6a9348611f95c19fb1d17ca7cb1da92486df22b7e0a3c655a
|
3 |
size 1064
|
run-4/checkpoint-267/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-267",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
@@ -10,23 +10,23 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
}
|
20 |
],
|
21 |
"logging_steps": 500,
|
22 |
-
"max_steps":
|
23 |
"num_input_tokens_seen": 0,
|
24 |
-
"num_train_epochs":
|
25 |
"save_steps": 500,
|
26 |
"total_flos": 0,
|
27 |
"train_batch_size": 32,
|
28 |
"trial_name": null,
|
29 |
"trial_params": {
|
30 |
-
"
|
31 |
}
|
32 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6065725684165955,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-267",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7530516431924883,
|
14 |
+
"eval_loss": 0.6065725684165955,
|
15 |
+
"eval_runtime": 2.5417,
|
16 |
+
"eval_samples_per_second": 838.036,
|
17 |
+
"eval_steps_per_second": 52.722,
|
18 |
"step": 267
|
19 |
}
|
20 |
],
|
21 |
"logging_steps": 500,
|
22 |
+
"max_steps": 534,
|
23 |
"num_input_tokens_seen": 0,
|
24 |
+
"num_train_epochs": 2,
|
25 |
"save_steps": 500,
|
26 |
"total_flos": 0,
|
27 |
"train_batch_size": 32,
|
28 |
"trial_name": null,
|
29 |
"trial_params": {
|
30 |
+
"weight_decay": 4.018191065683781e-05
|
31 |
}
|
32 |
}
|
run-4/checkpoint-267/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61f2b8ff5812e9a8e681e14d8d4a502beb7efad66683c16ee39765361b35b768
|
3 |
size 4856
|
run-4/checkpoint-534/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5b3ada415323df4681659f8a3c19121e9379a2b0c7922cade957c3cc9526017
|
3 |
size 409103316
|
run-4/checkpoint-534/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:492785e0b2d4bc2fddbf03fc7aa5070a9d5992be16a4b8b568d67b7ad8b49cd5
|
3 |
size 818327802
|
run-4/checkpoint-534/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a56050495e1938ae70b239ae8ece3c43ac800b58f0a6e02bd7ed6cd1669797d
|
3 |
size 1064
|
run-4/checkpoint-534/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
@@ -10,39 +10,39 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm": 9.
|
23 |
-
"learning_rate": 9.
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second":
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
-
"max_steps":
|
39 |
"num_input_tokens_seen": 0,
|
40 |
-
"num_train_epochs":
|
41 |
"save_steps": 500,
|
42 |
"total_flos": 349221394826640.0,
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
-
"
|
47 |
}
|
48 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6037774682044983,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7530516431924883,
|
14 |
+
"eval_loss": 0.6065725684165955,
|
15 |
+
"eval_runtime": 2.5417,
|
16 |
+
"eval_samples_per_second": 838.036,
|
17 |
+
"eval_steps_per_second": 52.722,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.447751998901367,
|
23 |
+
"learning_rate": 9.825065893169892e-07,
|
24 |
+
"loss": 0.6083,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7615023474178404,
|
30 |
+
"eval_loss": 0.6037774682044983,
|
31 |
+
"eval_runtime": 2.5779,
|
32 |
+
"eval_samples_per_second": 826.263,
|
33 |
+
"eval_steps_per_second": 51.981,
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
+
"max_steps": 534,
|
39 |
"num_input_tokens_seen": 0,
|
40 |
+
"num_train_epochs": 2,
|
41 |
"save_steps": 500,
|
42 |
"total_flos": 349221394826640.0,
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
+
"weight_decay": 4.018191065683781e-05
|
47 |
}
|
48 |
}
|
run-4/checkpoint-534/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61f2b8ff5812e9a8e681e14d8d4a502beb7efad66683c16ee39765361b35b768
|
3 |
size 4856
|
run-5/checkpoint-267/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa79b65d0ba7529aee0550dc910f9468bf4b0ad08164867f686e834e0e9fd408
|
3 |
size 409103316
|
run-5/checkpoint-267/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d47a133704ee5d893b30a7dc35c9b1d607c0d15dd4e32e8a90cf3cdd87699884
|
3 |
size 818327802
|
run-5/checkpoint-267/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef3be8be9037b2d6a9348611f95c19fb1d17ca7cb1da92486df22b7e0a3c655a
|
3 |
size 1064
|
run-5/checkpoint-267/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-5/checkpoint-267",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
@@ -10,23 +10,23 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
}
|
20 |
],
|
21 |
"logging_steps": 500,
|
22 |
-
"max_steps":
|
23 |
"num_input_tokens_seen": 0,
|
24 |
-
"num_train_epochs":
|
25 |
"save_steps": 500,
|
26 |
"total_flos": 0,
|
27 |
"train_batch_size": 32,
|
28 |
"trial_name": null,
|
29 |
"trial_params": {
|
30 |
-
"
|
31 |
}
|
32 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6066888570785522,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-5/checkpoint-267",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7539906103286385,
|
14 |
+
"eval_loss": 0.6066888570785522,
|
15 |
+
"eval_runtime": 2.5768,
|
16 |
+
"eval_samples_per_second": 826.614,
|
17 |
+
"eval_steps_per_second": 52.003,
|
18 |
"step": 267
|
19 |
}
|
20 |
],
|
21 |
"logging_steps": 500,
|
22 |
+
"max_steps": 534,
|
23 |
"num_input_tokens_seen": 0,
|
24 |
+
"num_train_epochs": 2,
|
25 |
"save_steps": 500,
|
26 |
"total_flos": 0,
|
27 |
"train_batch_size": 32,
|
28 |
"trial_name": null,
|
29 |
"trial_params": {
|
30 |
+
"weight_decay": 0.08935453780478574
|
31 |
}
|
32 |
}
|
run-5/checkpoint-267/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b57316b4d399893de9d059984f406888c25d2a6d5ffb20848fef7218735cf9bd
|
3 |
size 4856
|
runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522305.dcdaa3e6ec43.789.3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5988cc2775814fe44975831f32c32caf7771d6edd2d5bc5052cdfabafbbe56d1
|
3 |
+
size 6121
|
runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522401.dcdaa3e6ec43.789.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fba934ae41c63dd7574183a022397f725e91e4705deaa7cfb417b01fd8f39be9
|
3 |
+
size 6122
|
runs/Apr19_10-19-01_dcdaa3e6ec43/events.out.tfevents.1713522502.dcdaa3e6ec43.789.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb70d05343967559454757ce9e9c2dca96ac89f196cec4d89f868a35bc3f8c21
|
3 |
+
size 5766
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b57316b4d399893de9d059984f406888c25d2a6d5ffb20848fef7218735cf9bd
|
3 |
size 4856
|