HikasaHana
commited on
Commit
•
6b15685
1
Parent(s):
39bc368
Training in progress, epoch 1
Browse files- model.safetensors +1 -1
- run-3/checkpoint-1068/model.safetensors +1 -1
- run-3/checkpoint-1068/optimizer.pt +1 -1
- run-3/checkpoint-1068/rng_state.pth +1 -1
- run-3/checkpoint-1068/scheduler.pt +1 -1
- run-3/checkpoint-1068/trainer_state.json +28 -31
- run-3/checkpoint-1068/training_args.bin +1 -1
- run-3/checkpoint-534/model.safetensors +1 -1
- run-3/checkpoint-534/optimizer.pt +1 -1
- run-3/checkpoint-534/rng_state.pth +1 -1
- run-3/checkpoint-534/scheduler.pt +1 -1
- run-3/checkpoint-534/trainer_state.json +15 -18
- run-3/checkpoint-534/training_args.bin +1 -1
- run-3/checkpoint-801/model.safetensors +1 -1
- run-3/checkpoint-801/optimizer.pt +1 -1
- run-3/checkpoint-801/rng_state.pth +1 -1
- run-3/checkpoint-801/scheduler.pt +1 -1
- run-3/checkpoint-801/trainer_state.json +20 -23
- run-3/checkpoint-801/training_args.bin +1 -1
- run-4/checkpoint-267/model.safetensors +1 -1
- run-4/checkpoint-267/optimizer.pt +1 -1
- run-4/checkpoint-267/scheduler.pt +1 -1
- run-4/checkpoint-267/trainer_state.json +9 -9
- run-4/checkpoint-267/training_args.bin +1 -1
- run-4/checkpoint-534/model.safetensors +1 -1
- run-4/checkpoint-534/optimizer.pt +1 -1
- run-4/checkpoint-534/scheduler.pt +1 -1
- run-4/checkpoint-534/trainer_state.json +17 -17
- run-4/checkpoint-534/training_args.bin +1 -1
- run-4/checkpoint-801/model.safetensors +1 -1
- run-4/checkpoint-801/optimizer.pt +1 -1
- run-4/checkpoint-801/scheduler.pt +1 -1
- run-4/checkpoint-801/trainer_state.json +22 -22
- run-4/checkpoint-801/training_args.bin +1 -1
- runs/Apr19_08-35-52_21d31c708e60/events.out.tfevents.1713517318.21d31c708e60.946.24 +3 -0
- training_args.bin +1 -1
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:709e0174af7e7bfc36b2ca42177cb07663d435c4830e3ef2fd95156cc7684138
|
3 |
size 409103316
|
run-3/checkpoint-1068/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:638ef2c8256344c11b1952e1c4803f20ccd2b0e0c4c754cc573884f7f3399cea
|
3 |
size 409103316
|
run-3/checkpoint-1068/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bee566473dc2dae2be0d80c7c09cd94e61ef20ac726fbf26c3b5573a0884d8b7
|
3 |
size 818327802
|
run-3/checkpoint-1068/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88efce74fbb2721dc5ac34e297a693ee9d8b613381cc20bc06917c3f551f452b
|
3 |
size 14244
|
run-3/checkpoint-1068/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf5c025422936aa654887c4e9b654e6c7b7b7d0a40a731b6b887b2c5d246abef
|
3 |
size 1064
|
run-3/checkpoint-1068/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 4.0,
|
5 |
"eval_steps": 500,
|
@@ -10,52 +10,52 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm":
|
23 |
-
"learning_rate":
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second":
|
34 |
"step": 534
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
-
"eval_accuracy": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_runtime":
|
41 |
-
"eval_samples_per_second":
|
42 |
-
"eval_steps_per_second":
|
43 |
"step": 801
|
44 |
},
|
45 |
{
|
46 |
"epoch": 3.75,
|
47 |
-
"grad_norm":
|
48 |
-
"learning_rate":
|
49 |
-
"loss": 0.
|
50 |
"step": 1000
|
51 |
},
|
52 |
{
|
53 |
"epoch": 4.0,
|
54 |
-
"eval_accuracy": 0.
|
55 |
-
"eval_loss": 0.
|
56 |
-
"eval_runtime": 2.
|
57 |
-
"eval_samples_per_second":
|
58 |
-
"eval_steps_per_second":
|
59 |
"step": 1068
|
60 |
}
|
61 |
],
|
@@ -68,9 +68,6 @@
|
|
68 |
"train_batch_size": 32,
|
69 |
"trial_name": null,
|
70 |
"trial_params": {
|
71 |
-
"
|
72 |
-
"num_train_epochs": 4,
|
73 |
-
"per_device_train_batch_size": 32,
|
74 |
-
"weight_decay": 1.7684940065509674e-05
|
75 |
}
|
76 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5784130692481995,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 4.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7549295774647887,
|
14 |
+
"eval_loss": 0.603171169757843,
|
15 |
+
"eval_runtime": 2.3997,
|
16 |
+
"eval_samples_per_second": 887.602,
|
17 |
+
"eval_steps_per_second": 55.84,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.50742244720459,
|
23 |
+
"learning_rate": 8.206819746059557e-06,
|
24 |
+
"loss": 0.6057,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7676056338028169,
|
30 |
+
"eval_loss": 0.5784130692481995,
|
31 |
+
"eval_runtime": 2.4488,
|
32 |
+
"eval_samples_per_second": 869.828,
|
33 |
+
"eval_steps_per_second": 54.722,
|
34 |
"step": 534
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
+
"eval_accuracy": 0.7657276995305164,
|
39 |
+
"eval_loss": 0.6564009189605713,
|
40 |
+
"eval_runtime": 2.4626,
|
41 |
+
"eval_samples_per_second": 864.929,
|
42 |
+
"eval_steps_per_second": 54.413,
|
43 |
"step": 801
|
44 |
},
|
45 |
{
|
46 |
"epoch": 3.75,
|
47 |
+
"grad_norm": 23.906795501708984,
|
48 |
+
"learning_rate": 9.825065893169892e-07,
|
49 |
+
"loss": 0.2903,
|
50 |
"step": 1000
|
51 |
},
|
52 |
{
|
53 |
"epoch": 4.0,
|
54 |
+
"eval_accuracy": 0.7544600938967136,
|
55 |
+
"eval_loss": 0.7257726788520813,
|
56 |
+
"eval_runtime": 2.4681,
|
57 |
+
"eval_samples_per_second": 863.014,
|
58 |
+
"eval_steps_per_second": 54.293,
|
59 |
"step": 1068
|
60 |
}
|
61 |
],
|
|
|
68 |
"train_batch_size": 32,
|
69 |
"trial_name": null,
|
70 |
"trial_params": {
|
71 |
+
"num_train_epochs": 4
|
|
|
|
|
|
|
72 |
}
|
73 |
}
|
run-3/checkpoint-1068/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c637656beacaec83a46c1d786535828a5c615851f4d21b0069fcadd723fa83
|
3 |
size 4856
|
run-3/checkpoint-534/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fed076af4c274336a1bbda01e7b9e95519d0bf0a1fe5c7f712970d2aa901f093
|
3 |
size 409103316
|
run-3/checkpoint-534/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2699e5ce5a66cbca0b105d6408b39db28914becd331e717afd744ad8431c462
|
3 |
size 818327802
|
run-3/checkpoint-534/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50b42ca1a83d9e0487eef7bc4978f59ad97364890e28f585669fc8e766195ed9
|
3 |
size 14244
|
run-3/checkpoint-534/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9844fc449bc7d92d10fa8131195695656fc5bae595590761350f9f767121bb98
|
3 |
size 1064
|
run-3/checkpoint-534/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
@@ -10,27 +10,27 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm":
|
23 |
-
"learning_rate":
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second":
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
@@ -43,9 +43,6 @@
|
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
-
"
|
47 |
-
"num_train_epochs": 4,
|
48 |
-
"per_device_train_batch_size": 32,
|
49 |
-
"weight_decay": 1.7684940065509674e-05
|
50 |
}
|
51 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5784130692481995,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7549295774647887,
|
14 |
+
"eval_loss": 0.603171169757843,
|
15 |
+
"eval_runtime": 2.3997,
|
16 |
+
"eval_samples_per_second": 887.602,
|
17 |
+
"eval_steps_per_second": 55.84,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.50742244720459,
|
23 |
+
"learning_rate": 8.206819746059557e-06,
|
24 |
+
"loss": 0.6057,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7676056338028169,
|
30 |
+
"eval_loss": 0.5784130692481995,
|
31 |
+
"eval_runtime": 2.4488,
|
32 |
+
"eval_samples_per_second": 869.828,
|
33 |
+
"eval_steps_per_second": 54.722,
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
|
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
+
"num_train_epochs": 4
|
|
|
|
|
|
|
47 |
}
|
48 |
}
|
run-3/checkpoint-534/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c637656beacaec83a46c1d786535828a5c615851f4d21b0069fcadd723fa83
|
3 |
size 4856
|
run-3/checkpoint-801/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abcee7b655d87d819ba748aa8f11b89f8184a75c41ea3f5cc09ed28b8dd04dde
|
3 |
size 409103316
|
run-3/checkpoint-801/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b4d09b23a9d36890a6ed4f07e4aa3231a59c99ec420ee4282010c427b8c1c25
|
3 |
size 818327802
|
run-3/checkpoint-801/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f2e88e818a2ba76e34870352cb30ede96939fd3b25fc6442146490e0caecf29
|
3 |
size 14244
|
run-3/checkpoint-801/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60b556c69d4b9a7a2ce657180051c90827c184737105bee587768464e8ffe74c
|
3 |
size 1064
|
run-3/checkpoint-801/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
@@ -10,36 +10,36 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second":
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm":
|
23 |
-
"learning_rate":
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second":
|
34 |
"step": 534
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
-
"eval_accuracy": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_runtime":
|
41 |
-
"eval_samples_per_second":
|
42 |
-
"eval_steps_per_second":
|
43 |
"step": 801
|
44 |
}
|
45 |
],
|
@@ -52,9 +52,6 @@
|
|
52 |
"train_batch_size": 32,
|
53 |
"trial_name": null,
|
54 |
"trial_params": {
|
55 |
-
"
|
56 |
-
"num_train_epochs": 4,
|
57 |
-
"per_device_train_batch_size": 32,
|
58 |
-
"weight_decay": 1.7684940065509674e-05
|
59 |
}
|
60 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5784130692481995,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-3/checkpoint-534",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7549295774647887,
|
14 |
+
"eval_loss": 0.603171169757843,
|
15 |
+
"eval_runtime": 2.3997,
|
16 |
+
"eval_samples_per_second": 887.602,
|
17 |
+
"eval_steps_per_second": 55.84,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.50742244720459,
|
23 |
+
"learning_rate": 8.206819746059557e-06,
|
24 |
+
"loss": 0.6057,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7676056338028169,
|
30 |
+
"eval_loss": 0.5784130692481995,
|
31 |
+
"eval_runtime": 2.4488,
|
32 |
+
"eval_samples_per_second": 869.828,
|
33 |
+
"eval_steps_per_second": 54.722,
|
34 |
"step": 534
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
+
"eval_accuracy": 0.7657276995305164,
|
39 |
+
"eval_loss": 0.6564009189605713,
|
40 |
+
"eval_runtime": 2.4626,
|
41 |
+
"eval_samples_per_second": 864.929,
|
42 |
+
"eval_steps_per_second": 54.413,
|
43 |
"step": 801
|
44 |
}
|
45 |
],
|
|
|
52 |
"train_batch_size": 32,
|
53 |
"trial_name": null,
|
54 |
"trial_params": {
|
55 |
+
"num_train_epochs": 4
|
|
|
|
|
|
|
56 |
}
|
57 |
}
|
run-3/checkpoint-801/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8c637656beacaec83a46c1d786535828a5c615851f4d21b0069fcadd723fa83
|
3 |
size 4856
|
run-4/checkpoint-267/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:709e0174af7e7bfc36b2ca42177cb07663d435c4830e3ef2fd95156cc7684138
|
3 |
size 409103316
|
run-4/checkpoint-267/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52ef4431d2c45c5021200b1e749d9a863141163026b276944e492c1db9ee3f59
|
3 |
size 818327802
|
run-4/checkpoint-267/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44c8fc955f6ab66538b705aa40f6ebb08474ec445464a7a19ca29971b9fc42d4
|
3 |
size 1064
|
run-4/checkpoint-267/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-267",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
@@ -10,23 +10,23 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second": 54.
|
18 |
"step": 267
|
19 |
}
|
20 |
],
|
21 |
"logging_steps": 500,
|
22 |
-
"max_steps":
|
23 |
"num_input_tokens_seen": 0,
|
24 |
-
"num_train_epochs":
|
25 |
"save_steps": 500,
|
26 |
"total_flos": 0,
|
27 |
"train_batch_size": 32,
|
28 |
"trial_name": null,
|
29 |
"trial_params": {
|
30 |
-
"
|
31 |
}
|
32 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6081846952438354,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-267",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7577464788732394,
|
14 |
+
"eval_loss": 0.6081846952438354,
|
15 |
+
"eval_runtime": 2.4392,
|
16 |
+
"eval_samples_per_second": 873.231,
|
17 |
+
"eval_steps_per_second": 54.936,
|
18 |
"step": 267
|
19 |
}
|
20 |
],
|
21 |
"logging_steps": 500,
|
22 |
+
"max_steps": 1335,
|
23 |
"num_input_tokens_seen": 0,
|
24 |
+
"num_train_epochs": 5,
|
25 |
"save_steps": 500,
|
26 |
"total_flos": 0,
|
27 |
"train_batch_size": 32,
|
28 |
"trial_name": null,
|
29 |
"trial_params": {
|
30 |
+
"num_train_epochs": 5
|
31 |
}
|
32 |
}
|
run-4/checkpoint-267/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d42e5e5ec734e47895bff309ce0f98b97c457fc4dc71b42d53c94016f0c7a855
|
3 |
size 4856
|
run-4/checkpoint-534/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:976557a36bef1a4a492485f3f6cb8948205b7b3a9ea05ce8ce0eb8982fcc1d9e
|
3 |
size 409103316
|
run-4/checkpoint-534/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:890b6f5cc14af75c85002de6e2745609f1bf02f4a33a71cd0af847d31243fe8a
|
3 |
size 818327802
|
run-4/checkpoint-534/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e2aa06609570d504e0cfaf6c3bd801e1681706921a7512c6ccb9eded784901f
|
3 |
size 1064
|
run-4/checkpoint-534/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
@@ -10,39 +10,39 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second": 54.
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm": 9.
|
23 |
-
"learning_rate":
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second": 54.
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
-
"max_steps":
|
39 |
"num_input_tokens_seen": 0,
|
40 |
-
"num_train_epochs":
|
41 |
"save_steps": 500,
|
42 |
"total_flos": 349221394826640.0,
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
-
"
|
47 |
}
|
48 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5776590704917908,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
|
4 |
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7577464788732394,
|
14 |
+
"eval_loss": 0.6081846952438354,
|
15 |
+
"eval_runtime": 2.4392,
|
16 |
+
"eval_samples_per_second": 873.231,
|
17 |
+
"eval_steps_per_second": 54.936,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.532360076904297,
|
23 |
+
"learning_rate": 9.65168237740807e-06,
|
24 |
+
"loss": 0.6057,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7657276995305164,
|
30 |
+
"eval_loss": 0.5776590704917908,
|
31 |
+
"eval_runtime": 2.4594,
|
32 |
+
"eval_samples_per_second": 866.069,
|
33 |
+
"eval_steps_per_second": 54.485,
|
34 |
"step": 534
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
+
"max_steps": 1335,
|
39 |
"num_input_tokens_seen": 0,
|
40 |
+
"num_train_epochs": 5,
|
41 |
"save_steps": 500,
|
42 |
"total_flos": 349221394826640.0,
|
43 |
"train_batch_size": 32,
|
44 |
"trial_name": null,
|
45 |
"trial_params": {
|
46 |
+
"num_train_epochs": 5
|
47 |
}
|
48 |
}
|
run-4/checkpoint-534/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d42e5e5ec734e47895bff309ce0f98b97c457fc4dc71b42d53c94016f0c7a855
|
3 |
size 4856
|
run-4/checkpoint-801/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 409103316
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22b99fd8821bb6a7a58373b3e00909c671d6605dea0dd95a2c9cb3761b3b7f4e
|
3 |
size 409103316
|
run-4/checkpoint-801/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818327802
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62a624295ad87feed2451e9981cc71ab700084e2468c6ef4e3d6a128b0f68b6c
|
3 |
size 818327802
|
run-4/checkpoint-801/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d4fd24f628450570301e96568f256a50cad9a43cd0a510b6a50e86be9f9a3a5
|
3 |
size 1064
|
run-4/checkpoint-801/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
@@ -10,48 +10,48 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_loss": 0.
|
15 |
-
"eval_runtime": 2.
|
16 |
-
"eval_samples_per_second":
|
17 |
-
"eval_steps_per_second": 54.
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
-
"grad_norm": 9.
|
23 |
-
"learning_rate":
|
24 |
-
"loss": 0.
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
-
"eval_accuracy": 0.
|
30 |
-
"eval_loss": 0.
|
31 |
-
"eval_runtime": 2.
|
32 |
-
"eval_samples_per_second":
|
33 |
-
"eval_steps_per_second": 54.
|
34 |
"step": 534
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
-
"eval_accuracy": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_runtime": 2.
|
41 |
-
"eval_samples_per_second":
|
42 |
-
"eval_steps_per_second":
|
43 |
"step": 801
|
44 |
}
|
45 |
],
|
46 |
"logging_steps": 500,
|
47 |
-
"max_steps":
|
48 |
"num_input_tokens_seen": 0,
|
49 |
-
"num_train_epochs":
|
50 |
"save_steps": 500,
|
51 |
"total_flos": 349221394826640.0,
|
52 |
"train_batch_size": 32,
|
53 |
"trial_name": null,
|
54 |
"trial_params": {
|
55 |
-
"
|
56 |
}
|
57 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.5776590704917908,
|
3 |
"best_model_checkpoint": "BERT-WMM/run-4/checkpoint-534",
|
4 |
"epoch": 3.0,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.7577464788732394,
|
14 |
+
"eval_loss": 0.6081846952438354,
|
15 |
+
"eval_runtime": 2.4392,
|
16 |
+
"eval_samples_per_second": 873.231,
|
17 |
+
"eval_steps_per_second": 54.936,
|
18 |
"step": 267
|
19 |
},
|
20 |
{
|
21 |
"epoch": 1.87,
|
22 |
+
"grad_norm": 9.532360076904297,
|
23 |
+
"learning_rate": 9.65168237740807e-06,
|
24 |
+
"loss": 0.6057,
|
25 |
"step": 500
|
26 |
},
|
27 |
{
|
28 |
"epoch": 2.0,
|
29 |
+
"eval_accuracy": 0.7657276995305164,
|
30 |
+
"eval_loss": 0.5776590704917908,
|
31 |
+
"eval_runtime": 2.4594,
|
32 |
+
"eval_samples_per_second": 866.069,
|
33 |
+
"eval_steps_per_second": 54.485,
|
34 |
"step": 534
|
35 |
},
|
36 |
{
|
37 |
"epoch": 3.0,
|
38 |
+
"eval_accuracy": 0.7690140845070422,
|
39 |
+
"eval_loss": 0.6647635102272034,
|
40 |
+
"eval_runtime": 2.4523,
|
41 |
+
"eval_samples_per_second": 868.556,
|
42 |
+
"eval_steps_per_second": 54.642,
|
43 |
"step": 801
|
44 |
}
|
45 |
],
|
46 |
"logging_steps": 500,
|
47 |
+
"max_steps": 1335,
|
48 |
"num_input_tokens_seen": 0,
|
49 |
+
"num_train_epochs": 5,
|
50 |
"save_steps": 500,
|
51 |
"total_flos": 349221394826640.0,
|
52 |
"train_batch_size": 32,
|
53 |
"trial_name": null,
|
54 |
"trial_params": {
|
55 |
+
"num_train_epochs": 5
|
56 |
}
|
57 |
}
|
run-4/checkpoint-801/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d42e5e5ec734e47895bff309ce0f98b97c457fc4dc71b42d53c94016f0c7a855
|
3 |
size 4856
|
runs/Apr19_08-35-52_21d31c708e60/events.out.tfevents.1713517318.21d31c708e60.946.24
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3dc83e8492bb33036fe1c271a9d1d2b3531582185b77e860f0ebc378b9e5bb4
|
3 |
+
size 7285
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d42e5e5ec734e47895bff309ce0f98b97c457fc4dc71b42d53c94016f0c7a855
|
3 |
size 4856
|