Training in progress, epoch 8
Browse files- checkpoint-490/added_tokens.json +1 -0
- checkpoint-490/bpe.codes +0 -0
- checkpoint-490/config.json +30 -0
- checkpoint-490/optimizer.pt +3 -0
- checkpoint-490/pytorch_model.bin +3 -0
- checkpoint-490/rng_state.pth +3 -0
- checkpoint-490/scaler.pt +3 -0
- checkpoint-490/scheduler.pt +3 -0
- checkpoint-490/special_tokens_map.json +1 -0
- checkpoint-490/tokenizer_config.json +1 -0
- checkpoint-490/trainer_state.json +128 -0
- checkpoint-490/training_args.bin +3 -0
- checkpoint-490/vocab.txt +0 -0
- checkpoint-560/added_tokens.json +1 -0
- checkpoint-560/bpe.codes +0 -0
- checkpoint-560/config.json +30 -0
- checkpoint-560/optimizer.pt +3 -0
- checkpoint-560/pytorch_model.bin +3 -0
- checkpoint-560/rng_state.pth +3 -0
- checkpoint-560/scaler.pt +3 -0
- checkpoint-560/scheduler.pt +3 -0
- checkpoint-560/special_tokens_map.json +1 -0
- checkpoint-560/tokenizer_config.json +1 -0
- checkpoint-560/trainer_state.json +144 -0
- checkpoint-560/training_args.bin +3 -0
- checkpoint-560/vocab.txt +0 -0
- checkpoint-630/added_tokens.json +1 -0
- checkpoint-630/bpe.codes +0 -0
- checkpoint-630/config.json +30 -0
- checkpoint-630/optimizer.pt +3 -0
- checkpoint-630/pytorch_model.bin +3 -0
- checkpoint-630/rng_state.pth +3 -0
- checkpoint-630/scaler.pt +3 -0
- checkpoint-630/scheduler.pt +3 -0
- checkpoint-630/special_tokens_map.json +1 -0
- checkpoint-630/tokenizer_config.json +1 -0
- checkpoint-630/trainer_state.json +160 -0
- checkpoint-630/training_args.bin +3 -0
- checkpoint-630/vocab.txt +0 -0
- pytorch_model.bin +1 -1
checkpoint-490/added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<mask>": 64000}
|
checkpoint-490/bpe.codes
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-490/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "vinai/bertweet-covid19-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 130,
|
18 |
+
"model_type": "roberta",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_hidden_layers": 12,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"problem_type": "multi_label_classification",
|
24 |
+
"tokenizer_class": "BertweetTokenizer",
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.17.0",
|
27 |
+
"type_vocab_size": 1,
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 64001
|
30 |
+
}
|
checkpoint-490/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b6cca65e48afd84c968b05ea9d830f649a42c0d8ff900bbfb80f4f13aa085f1
|
3 |
+
size 1079327581
|
checkpoint-490/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a73e0815918bf34d607238230fa7e8c1a6b45e3dd18e4e1e7ff27622f92f55c2
|
3 |
+
size 539689325
|
checkpoint-490/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d83257f141c405f56ebf1fd0c66157688c5100db8f619a195f01d84adae4c732
|
3 |
+
size 14503
|
checkpoint-490/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11979c764c292bfef1356e2956fd3e04a9906c6b0fade7e93fe8270c7a297284
|
3 |
+
size 559
|
checkpoint-490/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:000c8a98942c0dc9ccd77fe00e03af1d353b776ed9e00bef3da51a275dbb6ba9
|
3 |
+
size 623
|
checkpoint-490/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
|
checkpoint-490/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"normalization": true, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "vinai/bertweet-covid19-base-cased", "tokenizer_class": "BertweetTokenizer"}
|
checkpoint-490/trainer_state.json
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7381492469031885,
|
3 |
+
"best_model_checkpoint": "outputs/bertweet/checkpoint-490",
|
4 |
+
"epoch": 6.99290780141844,
|
5 |
+
"global_step": 490,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.99,
|
12 |
+
"learning_rate": 9.666666666666666e-07,
|
13 |
+
"loss": 0.6925,
|
14 |
+
"step": 70
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.99,
|
18 |
+
"eval_accuracy": 0.573,
|
19 |
+
"eval_f1": 0.3642720915448188,
|
20 |
+
"eval_loss": 0.6826930046081543,
|
21 |
+
"eval_runtime": 2.9877,
|
22 |
+
"eval_samples_per_second": 334.703,
|
23 |
+
"eval_steps_per_second": 41.838,
|
24 |
+
"step": 70
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 1.99,
|
28 |
+
"learning_rate": 9.333333333333333e-07,
|
29 |
+
"loss": 0.6823,
|
30 |
+
"step": 140
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 1.99,
|
34 |
+
"eval_accuracy": 0.573,
|
35 |
+
"eval_f1": 0.3642720915448188,
|
36 |
+
"eval_loss": 0.6736124753952026,
|
37 |
+
"eval_runtime": 3.0497,
|
38 |
+
"eval_samples_per_second": 327.901,
|
39 |
+
"eval_steps_per_second": 40.988,
|
40 |
+
"step": 140
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.99,
|
44 |
+
"learning_rate": 9e-07,
|
45 |
+
"loss": 0.6713,
|
46 |
+
"step": 210
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 2.99,
|
50 |
+
"eval_accuracy": 0.587,
|
51 |
+
"eval_f1": 0.39928932873127865,
|
52 |
+
"eval_loss": 0.656767725944519,
|
53 |
+
"eval_runtime": 3.0012,
|
54 |
+
"eval_samples_per_second": 333.2,
|
55 |
+
"eval_steps_per_second": 41.65,
|
56 |
+
"step": 210
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 3.99,
|
60 |
+
"learning_rate": 8.666666666666667e-07,
|
61 |
+
"loss": 0.6468,
|
62 |
+
"step": 280
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 3.99,
|
66 |
+
"eval_accuracy": 0.7,
|
67 |
+
"eval_f1": 0.6707623826267894,
|
68 |
+
"eval_loss": 0.6210379600524902,
|
69 |
+
"eval_runtime": 3.0442,
|
70 |
+
"eval_samples_per_second": 328.492,
|
71 |
+
"eval_steps_per_second": 41.062,
|
72 |
+
"step": 280
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 4.99,
|
76 |
+
"learning_rate": 8.333333333333333e-07,
|
77 |
+
"loss": 0.6047,
|
78 |
+
"step": 350
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 4.99,
|
82 |
+
"eval_accuracy": 0.732,
|
83 |
+
"eval_f1": 0.7285955019767969,
|
84 |
+
"eval_loss": 0.5784569382667542,
|
85 |
+
"eval_runtime": 3.0658,
|
86 |
+
"eval_samples_per_second": 326.18,
|
87 |
+
"eval_steps_per_second": 40.773,
|
88 |
+
"step": 350
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 5.99,
|
92 |
+
"learning_rate": 8e-07,
|
93 |
+
"loss": 0.5648,
|
94 |
+
"step": 420
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"epoch": 5.99,
|
98 |
+
"eval_accuracy": 0.733,
|
99 |
+
"eval_f1": 0.7318671386608421,
|
100 |
+
"eval_loss": 0.5537055730819702,
|
101 |
+
"eval_runtime": 3.061,
|
102 |
+
"eval_samples_per_second": 326.692,
|
103 |
+
"eval_steps_per_second": 40.836,
|
104 |
+
"step": 420
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 6.99,
|
108 |
+
"learning_rate": 7.666666666666667e-07,
|
109 |
+
"loss": 0.536,
|
110 |
+
"step": 490
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 6.99,
|
114 |
+
"eval_accuracy": 0.739,
|
115 |
+
"eval_f1": 0.7381492469031885,
|
116 |
+
"eval_loss": 0.5406165719032288,
|
117 |
+
"eval_runtime": 3.3555,
|
118 |
+
"eval_samples_per_second": 298.016,
|
119 |
+
"eval_steps_per_second": 37.252,
|
120 |
+
"step": 490
|
121 |
+
}
|
122 |
+
],
|
123 |
+
"max_steps": 2100,
|
124 |
+
"num_train_epochs": 30,
|
125 |
+
"total_flos": 6215998682880000.0,
|
126 |
+
"trial_name": null,
|
127 |
+
"trial_params": null
|
128 |
+
}
|
checkpoint-490/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8be60ed58549fe1b3185c4d116e2ed7593eafbe162cc3300999b64c649675d04
|
3 |
+
size 3119
|
checkpoint-490/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-560/added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<mask>": 64000}
|
checkpoint-560/bpe.codes
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-560/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "vinai/bertweet-covid19-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 130,
|
18 |
+
"model_type": "roberta",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_hidden_layers": 12,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"problem_type": "multi_label_classification",
|
24 |
+
"tokenizer_class": "BertweetTokenizer",
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.17.0",
|
27 |
+
"type_vocab_size": 1,
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 64001
|
30 |
+
}
|
checkpoint-560/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea0af2742714fc9813ca53320a44cae3c405a49e0b36321a924af03f000d43f2
|
3 |
+
size 1079327581
|
checkpoint-560/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3392dc2290586b73cc7916fafb51981e5fb4ecb7cda0a83f08409e2a009f33f1
|
3 |
+
size 539689325
|
checkpoint-560/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ca12a0b1d8dae51670b8290d0295c0b10ee4e96d2d8d15692d929bbab3c8dcf
|
3 |
+
size 14503
|
checkpoint-560/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ca221068257890306847f8d3ee335b25a16ad3c8d4a625f911e0389b9d1b6f
|
3 |
+
size 559
|
checkpoint-560/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:799c60f894579b51eeaadbf08b4332c24ed17d6cd503965f0723d32d97cd600b
|
3 |
+
size 623
|
checkpoint-560/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
|
checkpoint-560/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"normalization": true, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "vinai/bertweet-covid19-base-cased", "tokenizer_class": "BertweetTokenizer"}
|
checkpoint-560/trainer_state.json
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7430750702529104,
|
3 |
+
"best_model_checkpoint": "outputs/bertweet/checkpoint-560",
|
4 |
+
"epoch": 7.99290780141844,
|
5 |
+
"global_step": 560,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.99,
|
12 |
+
"learning_rate": 9.666666666666666e-07,
|
13 |
+
"loss": 0.6925,
|
14 |
+
"step": 70
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.99,
|
18 |
+
"eval_accuracy": 0.573,
|
19 |
+
"eval_f1": 0.3642720915448188,
|
20 |
+
"eval_loss": 0.6826930046081543,
|
21 |
+
"eval_runtime": 2.9877,
|
22 |
+
"eval_samples_per_second": 334.703,
|
23 |
+
"eval_steps_per_second": 41.838,
|
24 |
+
"step": 70
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 1.99,
|
28 |
+
"learning_rate": 9.333333333333333e-07,
|
29 |
+
"loss": 0.6823,
|
30 |
+
"step": 140
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 1.99,
|
34 |
+
"eval_accuracy": 0.573,
|
35 |
+
"eval_f1": 0.3642720915448188,
|
36 |
+
"eval_loss": 0.6736124753952026,
|
37 |
+
"eval_runtime": 3.0497,
|
38 |
+
"eval_samples_per_second": 327.901,
|
39 |
+
"eval_steps_per_second": 40.988,
|
40 |
+
"step": 140
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.99,
|
44 |
+
"learning_rate": 9e-07,
|
45 |
+
"loss": 0.6713,
|
46 |
+
"step": 210
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 2.99,
|
50 |
+
"eval_accuracy": 0.587,
|
51 |
+
"eval_f1": 0.39928932873127865,
|
52 |
+
"eval_loss": 0.656767725944519,
|
53 |
+
"eval_runtime": 3.0012,
|
54 |
+
"eval_samples_per_second": 333.2,
|
55 |
+
"eval_steps_per_second": 41.65,
|
56 |
+
"step": 210
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 3.99,
|
60 |
+
"learning_rate": 8.666666666666667e-07,
|
61 |
+
"loss": 0.6468,
|
62 |
+
"step": 280
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 3.99,
|
66 |
+
"eval_accuracy": 0.7,
|
67 |
+
"eval_f1": 0.6707623826267894,
|
68 |
+
"eval_loss": 0.6210379600524902,
|
69 |
+
"eval_runtime": 3.0442,
|
70 |
+
"eval_samples_per_second": 328.492,
|
71 |
+
"eval_steps_per_second": 41.062,
|
72 |
+
"step": 280
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 4.99,
|
76 |
+
"learning_rate": 8.333333333333333e-07,
|
77 |
+
"loss": 0.6047,
|
78 |
+
"step": 350
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 4.99,
|
82 |
+
"eval_accuracy": 0.732,
|
83 |
+
"eval_f1": 0.7285955019767969,
|
84 |
+
"eval_loss": 0.5784569382667542,
|
85 |
+
"eval_runtime": 3.0658,
|
86 |
+
"eval_samples_per_second": 326.18,
|
87 |
+
"eval_steps_per_second": 40.773,
|
88 |
+
"step": 350
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 5.99,
|
92 |
+
"learning_rate": 8e-07,
|
93 |
+
"loss": 0.5648,
|
94 |
+
"step": 420
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"epoch": 5.99,
|
98 |
+
"eval_accuracy": 0.733,
|
99 |
+
"eval_f1": 0.7318671386608421,
|
100 |
+
"eval_loss": 0.5537055730819702,
|
101 |
+
"eval_runtime": 3.061,
|
102 |
+
"eval_samples_per_second": 326.692,
|
103 |
+
"eval_steps_per_second": 40.836,
|
104 |
+
"step": 420
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 6.99,
|
108 |
+
"learning_rate": 7.666666666666667e-07,
|
109 |
+
"loss": 0.536,
|
110 |
+
"step": 490
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 6.99,
|
114 |
+
"eval_accuracy": 0.739,
|
115 |
+
"eval_f1": 0.7381492469031885,
|
116 |
+
"eval_loss": 0.5406165719032288,
|
117 |
+
"eval_runtime": 3.3555,
|
118 |
+
"eval_samples_per_second": 298.016,
|
119 |
+
"eval_steps_per_second": 37.252,
|
120 |
+
"step": 490
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 7.99,
|
124 |
+
"learning_rate": 7.333333333333332e-07,
|
125 |
+
"loss": 0.5175,
|
126 |
+
"step": 560
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 7.99,
|
130 |
+
"eval_accuracy": 0.744,
|
131 |
+
"eval_f1": 0.7430750702529104,
|
132 |
+
"eval_loss": 0.5307690501213074,
|
133 |
+
"eval_runtime": 3.1364,
|
134 |
+
"eval_samples_per_second": 318.842,
|
135 |
+
"eval_steps_per_second": 39.855,
|
136 |
+
"step": 560
|
137 |
+
}
|
138 |
+
],
|
139 |
+
"max_steps": 2100,
|
140 |
+
"num_train_epochs": 30,
|
141 |
+
"total_flos": 7103998494720000.0,
|
142 |
+
"trial_name": null,
|
143 |
+
"trial_params": null
|
144 |
+
}
|
checkpoint-560/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8be60ed58549fe1b3185c4d116e2ed7593eafbe162cc3300999b64c649675d04
|
3 |
+
size 3119
|
checkpoint-560/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-630/added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<mask>": 64000}
|
checkpoint-630/bpe.codes
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-630/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "vinai/bertweet-covid19-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 130,
|
18 |
+
"model_type": "roberta",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_hidden_layers": 12,
|
21 |
+
"pad_token_id": 1,
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"problem_type": "multi_label_classification",
|
24 |
+
"tokenizer_class": "BertweetTokenizer",
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.17.0",
|
27 |
+
"type_vocab_size": 1,
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 64001
|
30 |
+
}
|
checkpoint-630/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed3275b6aa834b6e3731e0c09acb3cc7ce4de1843bb8e285c82980e6e8cdfb24
|
3 |
+
size 1079327581
|
checkpoint-630/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:340ff4f94bbd81a9356df698f57832fd0cd040d275e66581a4a35af4c555835f
|
3 |
+
size 539689325
|
checkpoint-630/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf2cef1bd2e8debc8dfd06090f4a9e33703d3bd8a60c5f233ea9bc207d960164
|
3 |
+
size 14503
|
checkpoint-630/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62ca5700851802910eef70fadd0dbe9dc923a65ce51e988971904b33dee70ff3
|
3 |
+
size 559
|
checkpoint-630/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c175b756e1d994b8675da4169311fa522310810e074f17bbabbc64f9f178114d
|
3 |
+
size 623
|
checkpoint-630/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
|
checkpoint-630/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"normalization": true, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "vinai/bertweet-covid19-base-cased", "tokenizer_class": "BertweetTokenizer"}
|
checkpoint-630/trainer_state.json
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.7504007121097755,
|
3 |
+
"best_model_checkpoint": "outputs/bertweet/checkpoint-630",
|
4 |
+
"epoch": 8.99290780141844,
|
5 |
+
"global_step": 630,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.99,
|
12 |
+
"learning_rate": 9.666666666666666e-07,
|
13 |
+
"loss": 0.6925,
|
14 |
+
"step": 70
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.99,
|
18 |
+
"eval_accuracy": 0.573,
|
19 |
+
"eval_f1": 0.3642720915448188,
|
20 |
+
"eval_loss": 0.6826930046081543,
|
21 |
+
"eval_runtime": 2.9877,
|
22 |
+
"eval_samples_per_second": 334.703,
|
23 |
+
"eval_steps_per_second": 41.838,
|
24 |
+
"step": 70
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 1.99,
|
28 |
+
"learning_rate": 9.333333333333333e-07,
|
29 |
+
"loss": 0.6823,
|
30 |
+
"step": 140
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 1.99,
|
34 |
+
"eval_accuracy": 0.573,
|
35 |
+
"eval_f1": 0.3642720915448188,
|
36 |
+
"eval_loss": 0.6736124753952026,
|
37 |
+
"eval_runtime": 3.0497,
|
38 |
+
"eval_samples_per_second": 327.901,
|
39 |
+
"eval_steps_per_second": 40.988,
|
40 |
+
"step": 140
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.99,
|
44 |
+
"learning_rate": 9e-07,
|
45 |
+
"loss": 0.6713,
|
46 |
+
"step": 210
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 2.99,
|
50 |
+
"eval_accuracy": 0.587,
|
51 |
+
"eval_f1": 0.39928932873127865,
|
52 |
+
"eval_loss": 0.656767725944519,
|
53 |
+
"eval_runtime": 3.0012,
|
54 |
+
"eval_samples_per_second": 333.2,
|
55 |
+
"eval_steps_per_second": 41.65,
|
56 |
+
"step": 210
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 3.99,
|
60 |
+
"learning_rate": 8.666666666666667e-07,
|
61 |
+
"loss": 0.6468,
|
62 |
+
"step": 280
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 3.99,
|
66 |
+
"eval_accuracy": 0.7,
|
67 |
+
"eval_f1": 0.6707623826267894,
|
68 |
+
"eval_loss": 0.6210379600524902,
|
69 |
+
"eval_runtime": 3.0442,
|
70 |
+
"eval_samples_per_second": 328.492,
|
71 |
+
"eval_steps_per_second": 41.062,
|
72 |
+
"step": 280
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 4.99,
|
76 |
+
"learning_rate": 8.333333333333333e-07,
|
77 |
+
"loss": 0.6047,
|
78 |
+
"step": 350
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 4.99,
|
82 |
+
"eval_accuracy": 0.732,
|
83 |
+
"eval_f1": 0.7285955019767969,
|
84 |
+
"eval_loss": 0.5784569382667542,
|
85 |
+
"eval_runtime": 3.0658,
|
86 |
+
"eval_samples_per_second": 326.18,
|
87 |
+
"eval_steps_per_second": 40.773,
|
88 |
+
"step": 350
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 5.99,
|
92 |
+
"learning_rate": 8e-07,
|
93 |
+
"loss": 0.5648,
|
94 |
+
"step": 420
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"epoch": 5.99,
|
98 |
+
"eval_accuracy": 0.733,
|
99 |
+
"eval_f1": 0.7318671386608421,
|
100 |
+
"eval_loss": 0.5537055730819702,
|
101 |
+
"eval_runtime": 3.061,
|
102 |
+
"eval_samples_per_second": 326.692,
|
103 |
+
"eval_steps_per_second": 40.836,
|
104 |
+
"step": 420
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 6.99,
|
108 |
+
"learning_rate": 7.666666666666667e-07,
|
109 |
+
"loss": 0.536,
|
110 |
+
"step": 490
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 6.99,
|
114 |
+
"eval_accuracy": 0.739,
|
115 |
+
"eval_f1": 0.7381492469031885,
|
116 |
+
"eval_loss": 0.5406165719032288,
|
117 |
+
"eval_runtime": 3.3555,
|
118 |
+
"eval_samples_per_second": 298.016,
|
119 |
+
"eval_steps_per_second": 37.252,
|
120 |
+
"step": 490
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 7.99,
|
124 |
+
"learning_rate": 7.333333333333332e-07,
|
125 |
+
"loss": 0.5175,
|
126 |
+
"step": 560
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 7.99,
|
130 |
+
"eval_accuracy": 0.744,
|
131 |
+
"eval_f1": 0.7430750702529104,
|
132 |
+
"eval_loss": 0.5307690501213074,
|
133 |
+
"eval_runtime": 3.1364,
|
134 |
+
"eval_samples_per_second": 318.842,
|
135 |
+
"eval_steps_per_second": 39.855,
|
136 |
+
"step": 560
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 8.99,
|
140 |
+
"learning_rate": 7e-07,
|
141 |
+
"loss": 0.5018,
|
142 |
+
"step": 630
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 8.99,
|
146 |
+
"eval_accuracy": 0.751,
|
147 |
+
"eval_f1": 0.7504007121097755,
|
148 |
+
"eval_loss": 0.5235322117805481,
|
149 |
+
"eval_runtime": 2.9996,
|
150 |
+
"eval_samples_per_second": 333.377,
|
151 |
+
"eval_steps_per_second": 41.672,
|
152 |
+
"step": 630
|
153 |
+
}
|
154 |
+
],
|
155 |
+
"max_steps": 2100,
|
156 |
+
"num_train_epochs": 30,
|
157 |
+
"total_flos": 7991998306560000.0,
|
158 |
+
"trial_name": null,
|
159 |
+
"trial_params": null
|
160 |
+
}
|
checkpoint-630/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8be60ed58549fe1b3185c4d116e2ed7593eafbe162cc3300999b64c649675d04
|
3 |
+
size 3119
|
checkpoint-630/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 539689325
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:340ff4f94bbd81a9356df698f57832fd0cd040d275e66581a4a35af4c555835f
|
3 |
size 539689325
|