Commit
·
451893a
1
Parent(s):
1718eb7
End of training
Browse files- checkpoint-12838/config.json +31 -0
- checkpoint-12838/optimizer.pt +3 -0
- checkpoint-12838/pytorch_model.bin +3 -0
- checkpoint-12838/rng_state.pth +3 -0
- checkpoint-12838/scaler.pt +3 -0
- checkpoint-12838/scheduler.pt +3 -0
- checkpoint-12838/trainer_state.json +44 -0
- checkpoint-12838/training_args.bin +3 -0
- checkpoint-19257/config.json +31 -0
- checkpoint-19257/optimizer.pt +3 -0
- checkpoint-19257/pytorch_model.bin +3 -0
- checkpoint-19257/rng_state.pth +3 -0
- checkpoint-19257/scaler.pt +3 -0
- checkpoint-19257/scheduler.pt +3 -0
- checkpoint-19257/trainer_state.json +58 -0
- checkpoint-19257/training_args.bin +3 -0
- checkpoint-25676/config.json +31 -0
- checkpoint-25676/optimizer.pt +3 -0
- checkpoint-25676/pytorch_model.bin +3 -0
- checkpoint-25676/rng_state.pth +3 -0
- checkpoint-25676/scaler.pt +3 -0
- checkpoint-25676/scheduler.pt +3 -0
- checkpoint-25676/trainer_state.json +72 -0
- checkpoint-25676/training_args.bin +3 -0
- checkpoint-32095/config.json +31 -0
- checkpoint-32095/optimizer.pt +3 -0
- checkpoint-32095/pytorch_model.bin +3 -0
- checkpoint-32095/rng_state.pth +3 -0
- checkpoint-32095/scaler.pt +3 -0
- checkpoint-32095/scheduler.pt +3 -0
- checkpoint-32095/trainer_state.json +86 -0
- checkpoint-32095/training_args.bin +3 -0
- pytorch_model.bin +1 -1
- runs/May30_14-29-18_9a4805da8a6d/events.out.tfevents.1653920983.9a4805da8a6d.80.3 +2 -2
checkpoint-12838/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/content/models/bert_modif_emb",
|
3 |
+
"architectures": [
|
4 |
+
"BertForMaskedLM"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pooler_fc_size": 768,
|
21 |
+
"pooler_num_attention_heads": 12,
|
22 |
+
"pooler_num_fc_layers": 3,
|
23 |
+
"pooler_size_per_head": 128,
|
24 |
+
"pooler_type": "first_token_transform",
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.19.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 32000
|
31 |
+
}
|
checkpoint-12838/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ea61bf44d61641d59103e829adc93090b8d7eb92f1505e9b815b6731dec3cbd
|
3 |
+
size 885324121
|
checkpoint-12838/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f606c8d4900a8209c79fbcf5235e936287b216774982c26f3f734d30ea91e23
|
3 |
+
size 442675179
|
checkpoint-12838/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccd73439f3d9f0a1b3501ea081c02b7b7471ec025c2fbebecd68b79e7c35bf3a
|
3 |
+
size 14503
|
checkpoint-12838/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6017fdcf5f56dbcc9a92708c8c0ceded87054b03a1ffd0cd7fc66c53ff5ec716
|
3 |
+
size 559
|
checkpoint-12838/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d35205cd0217c54f2c2ba9dcdf2a3d3976b059cc6222cff585d4954618b8d0c4
|
3 |
+
size 623
|
checkpoint-12838/trainer_state.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"global_step": 12838,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 1.0,
|
12 |
+
"learning_rate": 8.889735156655704e-06,
|
13 |
+
"loss": 7.3112,
|
14 |
+
"step": 6419
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.0,
|
18 |
+
"eval_loss": 6.181427955627441,
|
19 |
+
"eval_runtime": 64.709,
|
20 |
+
"eval_samples_per_second": 198.396,
|
21 |
+
"eval_steps_per_second": 9.921,
|
22 |
+
"step": 6419
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"epoch": 2.0,
|
26 |
+
"learning_rate": 6.668513069066991e-06,
|
27 |
+
"loss": 5.8524,
|
28 |
+
"step": 12838
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_loss": 5.407498359680176,
|
33 |
+
"eval_runtime": 64.7053,
|
34 |
+
"eval_samples_per_second": 198.407,
|
35 |
+
"eval_steps_per_second": 9.922,
|
36 |
+
"step": 12838
|
37 |
+
}
|
38 |
+
],
|
39 |
+
"max_steps": 32095,
|
40 |
+
"num_train_epochs": 5,
|
41 |
+
"total_flos": 1.689540838293504e+16,
|
42 |
+
"trial_name": null,
|
43 |
+
"trial_params": null
|
44 |
+
}
|
checkpoint-12838/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc6a6e3558851af45bb5a2bf276230e5bbacabfdee3309629484b8049458d984
|
3 |
+
size 3247
|
checkpoint-19257/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/content/models/bert_modif_emb",
|
3 |
+
"architectures": [
|
4 |
+
"BertForMaskedLM"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pooler_fc_size": 768,
|
21 |
+
"pooler_num_attention_heads": 12,
|
22 |
+
"pooler_num_fc_layers": 3,
|
23 |
+
"pooler_size_per_head": 128,
|
24 |
+
"pooler_type": "first_token_transform",
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.19.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 32000
|
31 |
+
}
|
checkpoint-19257/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fba999c3c1a2f308f80b5d8f367693c35e7c07ebb9d0fcd2e1cc1fa51d831c77
|
3 |
+
size 885324121
|
checkpoint-19257/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e516c7051df0161c608388e3b3c76ac27ad4aa82542e5e94ec299d8824b759e
|
3 |
+
size 442675179
|
checkpoint-19257/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bb60a63e8305a0c2468f008d689d5a382029bb9705d7b6233766a7d33af3c2f
|
3 |
+
size 14503
|
checkpoint-19257/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa387421e8fe914b42d6078556cf371000d33113d1ad9297d06114cb86b119fb
|
3 |
+
size 559
|
checkpoint-19257/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1f5089926f44d1d87946b35c57b32d5688daa313074a5417b78cd3064326b65
|
3 |
+
size 623
|
checkpoint-19257/trainer_state.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0,
|
5 |
+
"global_step": 19257,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 1.0,
|
12 |
+
"learning_rate": 8.889735156655704e-06,
|
13 |
+
"loss": 7.3112,
|
14 |
+
"step": 6419
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.0,
|
18 |
+
"eval_loss": 6.181427955627441,
|
19 |
+
"eval_runtime": 64.709,
|
20 |
+
"eval_samples_per_second": 198.396,
|
21 |
+
"eval_steps_per_second": 9.921,
|
22 |
+
"step": 6419
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"epoch": 2.0,
|
26 |
+
"learning_rate": 6.668513069066991e-06,
|
27 |
+
"loss": 5.8524,
|
28 |
+
"step": 12838
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_loss": 5.407498359680176,
|
33 |
+
"eval_runtime": 64.7053,
|
34 |
+
"eval_samples_per_second": 198.407,
|
35 |
+
"eval_steps_per_second": 9.922,
|
36 |
+
"step": 12838
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 3.0,
|
40 |
+
"learning_rate": 4.446944781028216e-06,
|
41 |
+
"loss": 5.3392,
|
42 |
+
"step": 19257
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"epoch": 3.0,
|
46 |
+
"eval_loss": 5.080959320068359,
|
47 |
+
"eval_runtime": 64.6173,
|
48 |
+
"eval_samples_per_second": 198.677,
|
49 |
+
"eval_steps_per_second": 9.935,
|
50 |
+
"step": 19257
|
51 |
+
}
|
52 |
+
],
|
53 |
+
"max_steps": 32095,
|
54 |
+
"num_train_epochs": 5,
|
55 |
+
"total_flos": 2.534311257440256e+16,
|
56 |
+
"trial_name": null,
|
57 |
+
"trial_params": null
|
58 |
+
}
|
checkpoint-19257/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc6a6e3558851af45bb5a2bf276230e5bbacabfdee3309629484b8049458d984
|
3 |
+
size 3247
|
checkpoint-25676/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/content/models/bert_modif_emb",
|
3 |
+
"architectures": [
|
4 |
+
"BertForMaskedLM"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pooler_fc_size": 768,
|
21 |
+
"pooler_num_attention_heads": 12,
|
22 |
+
"pooler_num_fc_layers": 3,
|
23 |
+
"pooler_size_per_head": 128,
|
24 |
+
"pooler_type": "first_token_transform",
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.19.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 32000
|
31 |
+
}
|
checkpoint-25676/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1783ea3d8ddef597c36cf4796d000e8bc78b7ef67b81447dac38840f4f554da
|
3 |
+
size 885324121
|
checkpoint-25676/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34265a9a39e091e716cb212258982ed86c6fe6a820c6c01fc203b4cd63e01312
|
3 |
+
size 442675179
|
checkpoint-25676/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26102bb97204416066f0b77f4c616d6ed8a4bf9b0b9d81f125050fc47b6422af
|
3 |
+
size 14503
|
checkpoint-25676/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49785070d9351e7bf61535d02deb7a939472be3ed62e579420dd3ca01272af6b
|
3 |
+
size 559
|
checkpoint-25676/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96739ca6df7177c5a0f01b44a659f5de9f9f62a41bc1c71ede2605e338ea6225
|
3 |
+
size 623
|
checkpoint-25676/trainer_state.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.0,
|
5 |
+
"global_step": 25676,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 1.0,
|
12 |
+
"learning_rate": 8.889735156655704e-06,
|
13 |
+
"loss": 7.3112,
|
14 |
+
"step": 6419
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.0,
|
18 |
+
"eval_loss": 6.181427955627441,
|
19 |
+
"eval_runtime": 64.709,
|
20 |
+
"eval_samples_per_second": 198.396,
|
21 |
+
"eval_steps_per_second": 9.921,
|
22 |
+
"step": 6419
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"epoch": 2.0,
|
26 |
+
"learning_rate": 6.668513069066991e-06,
|
27 |
+
"loss": 5.8524,
|
28 |
+
"step": 12838
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_loss": 5.407498359680176,
|
33 |
+
"eval_runtime": 64.7053,
|
34 |
+
"eval_samples_per_second": 198.407,
|
35 |
+
"eval_steps_per_second": 9.922,
|
36 |
+
"step": 12838
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 3.0,
|
40 |
+
"learning_rate": 4.446944781028216e-06,
|
41 |
+
"loss": 5.3392,
|
42 |
+
"step": 19257
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"epoch": 3.0,
|
46 |
+
"eval_loss": 5.080959320068359,
|
47 |
+
"eval_runtime": 64.6173,
|
48 |
+
"eval_samples_per_second": 198.677,
|
49 |
+
"eval_steps_per_second": 9.935,
|
50 |
+
"step": 19257
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 4.0,
|
54 |
+
"learning_rate": 2.2257226934395014e-06,
|
55 |
+
"loss": 5.0958,
|
56 |
+
"step": 25676
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 4.0,
|
60 |
+
"eval_loss": 4.901454448699951,
|
61 |
+
"eval_runtime": 64.6233,
|
62 |
+
"eval_samples_per_second": 198.659,
|
63 |
+
"eval_steps_per_second": 9.934,
|
64 |
+
"step": 25676
|
65 |
+
}
|
66 |
+
],
|
67 |
+
"max_steps": 32095,
|
68 |
+
"num_train_epochs": 5,
|
69 |
+
"total_flos": 3.379081676587008e+16,
|
70 |
+
"trial_name": null,
|
71 |
+
"trial_params": null
|
72 |
+
}
|
checkpoint-25676/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc6a6e3558851af45bb5a2bf276230e5bbacabfdee3309629484b8049458d984
|
3 |
+
size 3247
|
checkpoint-32095/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/content/models/bert_modif_emb",
|
3 |
+
"architectures": [
|
4 |
+
"BertForMaskedLM"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pooler_fc_size": 768,
|
21 |
+
"pooler_num_attention_heads": 12,
|
22 |
+
"pooler_num_fc_layers": 3,
|
23 |
+
"pooler_size_per_head": 128,
|
24 |
+
"pooler_type": "first_token_transform",
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.19.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 32000
|
31 |
+
}
|
checkpoint-32095/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaa394a7c8aec3a985b42599594f002ba9e313953930a7c953ae21adf27ef3c8
|
3 |
+
size 885324121
|
checkpoint-32095/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd771f458e46629f7fec5a86b90f3cfffb7e9b86f545ceeb7ce7499afb73ebe1
|
3 |
+
size 442675179
|
checkpoint-32095/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fdcc33a7aaa71308c29d1b50659ebc6a78284fa1a8c59733ef3edc8276bedb0
|
3 |
+
size 14503
|
checkpoint-32095/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d71b5382592d2ff7e32a0603d0e9db0a81425654dc7c7b16b5692a252cf24e3
|
3 |
+
size 559
|
checkpoint-32095/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd5ebce471016eec2f5eba2e97fee128ad5ffece685a5a95de6042eaea5877ae
|
3 |
+
size 623
|
checkpoint-32095/trainer_state.json
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"global_step": 32095,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 1.0,
|
12 |
+
"learning_rate": 8.889735156655704e-06,
|
13 |
+
"loss": 7.3112,
|
14 |
+
"step": 6419
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.0,
|
18 |
+
"eval_loss": 6.181427955627441,
|
19 |
+
"eval_runtime": 64.709,
|
20 |
+
"eval_samples_per_second": 198.396,
|
21 |
+
"eval_steps_per_second": 9.921,
|
22 |
+
"step": 6419
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"epoch": 2.0,
|
26 |
+
"learning_rate": 6.668513069066991e-06,
|
27 |
+
"loss": 5.8524,
|
28 |
+
"step": 12838
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_loss": 5.407498359680176,
|
33 |
+
"eval_runtime": 64.7053,
|
34 |
+
"eval_samples_per_second": 198.407,
|
35 |
+
"eval_steps_per_second": 9.922,
|
36 |
+
"step": 12838
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 3.0,
|
40 |
+
"learning_rate": 4.446944781028216e-06,
|
41 |
+
"loss": 5.3392,
|
42 |
+
"step": 19257
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"epoch": 3.0,
|
46 |
+
"eval_loss": 5.080959320068359,
|
47 |
+
"eval_runtime": 64.6173,
|
48 |
+
"eval_samples_per_second": 198.677,
|
49 |
+
"eval_steps_per_second": 9.935,
|
50 |
+
"step": 19257
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 4.0,
|
54 |
+
"learning_rate": 2.2257226934395014e-06,
|
55 |
+
"loss": 5.0958,
|
56 |
+
"step": 25676
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 4.0,
|
60 |
+
"eval_loss": 4.901454448699951,
|
61 |
+
"eval_runtime": 64.6233,
|
62 |
+
"eval_samples_per_second": 198.659,
|
63 |
+
"eval_steps_per_second": 9.934,
|
64 |
+
"step": 25676
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 5.0,
|
68 |
+
"learning_rate": 4.154405400727021e-09,
|
69 |
+
"loss": 4.9897,
|
70 |
+
"step": 32095
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 5.0,
|
74 |
+
"eval_loss": 4.8497138023376465,
|
75 |
+
"eval_runtime": 64.6531,
|
76 |
+
"eval_samples_per_second": 198.568,
|
77 |
+
"eval_steps_per_second": 9.93,
|
78 |
+
"step": 32095
|
79 |
+
}
|
80 |
+
],
|
81 |
+
"max_steps": 32095,
|
82 |
+
"num_train_epochs": 5,
|
83 |
+
"total_flos": 4.22385209573376e+16,
|
84 |
+
"trial_name": null,
|
85 |
+
"trial_params": null
|
86 |
+
}
|
checkpoint-32095/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc6a6e3558851af45bb5a2bf276230e5bbacabfdee3309629484b8049458d984
|
3 |
+
size 3247
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 442675179
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd771f458e46629f7fec5a86b90f3cfffb7e9b86f545ceeb7ce7499afb73ebe1
|
3 |
size 442675179
|
runs/May30_14-29-18_9a4805da8a6d/events.out.tfevents.1653920983.9a4805da8a6d.80.3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96fe8d6bc35252e7b3d6b0d20ee5d996b6ea657f82decf53cd95ab548a54be3d
|
3 |
+
size 6328
|