yoshitomo-matsubara
commited on
Commit
•
209e213
1
Parent(s):
1d587b2
added files
Browse files- config.json +26 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- training.log +50 -0
- vocab.txt +0 -0
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-large-uncased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"finetuning_task": "qnli",
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 24,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"problem_type": "single_label_classification",
|
22 |
+
"transformers_version": "4.6.1",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c56b0e5ff5f633a50e058d03a2f352413dcfaaa5be4340d71ab956d58ab39c1
|
3 |
+
size 1340746825
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_lower": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-large-uncased"}
|
training.log
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-05-22 19:12:22,619 INFO __main__ Namespace(adjust_lr=False, config='torchdistill/configs/sample/glue/qnli/ce/bert_large_uncased.yaml', log='log/glue/qnli/ce/bert_large_uncased.txt', private_output='leaderboard/glue/standard/bert_large_uncased/', seed=None, student_only=False, task_name='qnli', test_only=False, world_size=1)
|
2 |
+
2021-05-22 19:12:22,665 INFO __main__ Distributed environment: NO
|
3 |
+
Num processes: 1
|
4 |
+
Process index: 0
|
5 |
+
Local process index: 0
|
6 |
+
Device: cuda
|
7 |
+
Use FP16 precision: True
|
8 |
+
|
9 |
+
2021-05-22 19:13:15,971 INFO __main__ Start training
|
10 |
+
2021-05-22 19:13:15,972 INFO torchdistill.models.util [student model]
|
11 |
+
2021-05-22 19:13:15,972 INFO torchdistill.models.util Using the original student model
|
12 |
+
2021-05-22 19:13:15,972 INFO torchdistill.core.training Loss = 1.0 * OrgLoss
|
13 |
+
2021-05-22 19:13:21,553 INFO torchdistill.misc.log Epoch: [0] [ 0/3274] eta: 0:20:16 lr: 1.9997963754836084e-05 sample/s: 11.305340672017941 loss: 0.7982 (0.7982) time: 0.3715 data: 0.0177 max mem: 6528
|
14 |
+
2021-05-22 19:15:17,991 INFO torchdistill.misc.log Epoch: [0] [ 500/3274] eta: 0:10:46 lr: 1.8979841172877215e-05 sample/s: 20.366457991155265 loss: 0.3002 (0.4591) time: 0.2386 data: 0.0051 max mem: 12387
|
15 |
+
2021-05-22 19:17:15,461 INFO torchdistill.misc.log Epoch: [0] [1000/3274] eta: 0:08:52 lr: 1.796171859091835e-05 sample/s: 14.86020497731184 loss: 0.2730 (0.3861) time: 0.2408 data: 0.0047 max mem: 12387
|
16 |
+
2021-05-22 19:19:11,888 INFO torchdistill.misc.log Epoch: [0] [1500/3274] eta: 0:06:54 lr: 1.694359600895948e-05 sample/s: 16.38974601447187 loss: 0.2440 (0.3542) time: 0.2363 data: 0.0047 max mem: 12387
|
17 |
+
2021-05-22 19:21:08,394 INFO torchdistill.misc.log Epoch: [0] [2000/3274] eta: 0:04:57 lr: 1.5925473427000613e-05 sample/s: 17.12289296313987 loss: 0.2741 (0.3352) time: 0.2356 data: 0.0047 max mem: 12387
|
18 |
+
2021-05-22 19:23:05,117 INFO torchdistill.misc.log Epoch: [0] [2500/3274] eta: 0:03:00 lr: 1.4907350845041744e-05 sample/s: 21.28394165862149 loss: 0.2282 (0.3246) time: 0.2402 data: 0.0048 max mem: 12387
|
19 |
+
2021-05-22 19:25:01,923 INFO torchdistill.misc.log Epoch: [0] [3000/3274] eta: 0:01:03 lr: 1.3889228263082878e-05 sample/s: 18.714775413316023 loss: 0.2385 (0.3134) time: 0.2286 data: 0.0049 max mem: 12387
|
20 |
+
2021-05-22 19:26:05,878 INFO torchdistill.misc.log Epoch: [0] Total time: 0:12:44
|
21 |
+
2021-05-22 19:26:17,529 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
|
22 |
+
2021-05-22 19:26:17,529 INFO __main__ Validation: accuracy = 0.9150649826102873
|
23 |
+
2021-05-22 19:26:17,529 INFO __main__ Updating ckpt
|
24 |
+
2021-05-22 19:26:22,622 INFO torchdistill.misc.log Epoch: [1] [ 0/3274] eta: 0:11:40 lr: 1.3331297088169417e-05 sample/s: 19.677133558129306 loss: 0.1484 (0.1484) time: 0.2138 data: 0.0105 max mem: 12387
|
25 |
+
2021-05-22 19:28:19,309 INFO torchdistill.misc.log Epoch: [1] [ 500/3274] eta: 0:10:47 lr: 1.2313174506210548e-05 sample/s: 17.8955634464596 loss: 0.0962 (0.1458) time: 0.2408 data: 0.0048 max mem: 12387
|
26 |
+
2021-05-22 19:30:15,867 INFO torchdistill.misc.log Epoch: [1] [1000/3274] eta: 0:08:50 lr: 1.1295051924251682e-05 sample/s: 18.815583413520308 loss: 0.0378 (0.1418) time: 0.2301 data: 0.0046 max mem: 12387
|
27 |
+
2021-05-22 19:32:11,984 INFO torchdistill.misc.log Epoch: [1] [1500/3274] eta: 0:06:53 lr: 1.0276929342292811e-05 sample/s: 16.385696175580676 loss: 0.1266 (0.1432) time: 0.2332 data: 0.0046 max mem: 12387
|
28 |
+
2021-05-22 19:34:09,106 INFO torchdistill.misc.log Epoch: [1] [2000/3274] eta: 0:04:57 lr: 9.258806760333945e-06 sample/s: 20.490180057719105 loss: 0.1234 (0.1409) time: 0.2426 data: 0.0047 max mem: 12387
|
29 |
+
2021-05-22 19:36:05,522 INFO torchdistill.misc.log Epoch: [1] [2500/3274] eta: 0:03:00 lr: 8.240684178375076e-06 sample/s: 18.810963296837595 loss: 0.0786 (0.1388) time: 0.2322 data: 0.0049 max mem: 12387
|
30 |
+
2021-05-22 19:38:03,104 INFO torchdistill.misc.log Epoch: [1] [3000/3274] eta: 0:01:03 lr: 7.222561596416208e-06 sample/s: 17.176536111520633 loss: 0.1368 (0.1376) time: 0.2340 data: 0.0049 max mem: 12387
|
31 |
+
2021-05-22 19:39:06,921 INFO torchdistill.misc.log Epoch: [1] Total time: 0:12:44
|
32 |
+
2021-05-22 19:39:18,573 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
|
33 |
+
2021-05-22 19:39:18,573 INFO __main__ Validation: accuracy = 0.9172615778876075
|
34 |
+
2021-05-22 19:39:18,573 INFO __main__ Updating ckpt
|
35 |
+
2021-05-22 19:39:23,961 INFO torchdistill.misc.log Epoch: [2] [ 0/3274] eta: 0:13:22 lr: 6.6646304215027494e-06 sample/s: 18.064977953408743 loss: 0.0858 (0.0858) time: 0.2453 data: 0.0238 max mem: 12387
|
36 |
+
2021-05-22 19:41:20,819 INFO torchdistill.misc.log Epoch: [2] [ 500/3274] eta: 0:10:48 lr: 5.646507839543881e-06 sample/s: 20.492357451881382 loss: 0.0001 (0.0684) time: 0.2278 data: 0.0046 max mem: 12387
|
37 |
+
2021-05-22 19:43:17,647 INFO torchdistill.misc.log Epoch: [2] [1000/3274] eta: 0:08:51 lr: 4.628385257585013e-06 sample/s: 17.50339693191113 loss: 0.0000 (0.0857) time: 0.2364 data: 0.0047 max mem: 12387
|
38 |
+
2021-05-22 19:45:13,046 INFO torchdistill.misc.log Epoch: [2] [1500/3274] eta: 0:06:52 lr: 3.6102626756261456e-06 sample/s: 15.964860060672729 loss: 0.0000 (0.0954) time: 0.2298 data: 0.0047 max mem: 12387
|
39 |
+
2021-05-22 19:47:08,839 INFO torchdistill.misc.log Epoch: [2] [2000/3274] eta: 0:04:56 lr: 2.5921400936672775e-06 sample/s: 19.177665171901772 loss: 0.0000 (0.0998) time: 0.2280 data: 0.0047 max mem: 12387
|
40 |
+
2021-05-22 19:49:03,179 INFO torchdistill.misc.log Epoch: [2] [2500/3274] eta: 0:02:59 lr: 1.5740175117084096e-06 sample/s: 21.98736368652192 loss: 0.0000 (0.1038) time: 0.2352 data: 0.0047 max mem: 12387
|
41 |
+
2021-05-22 19:50:59,281 INFO torchdistill.misc.log Epoch: [2] [3000/3274] eta: 0:01:03 lr: 5.558949297495419e-07 sample/s: 16.504755496267613 loss: 0.0000 (0.1080) time: 0.2303 data: 0.0047 max mem: 12387
|
42 |
+
2021-05-22 19:52:01,461 INFO torchdistill.misc.log Epoch: [2] Total time: 0:12:37
|
43 |
+
2021-05-22 19:52:13,103 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
|
44 |
+
2021-05-22 19:52:13,104 INFO __main__ Validation: accuracy = 0.9214717188358045
|
45 |
+
2021-05-22 19:52:13,104 INFO __main__ Updating ckpt
|
46 |
+
2021-05-22 19:52:26,136 INFO __main__ [Student: bert-large-uncased]
|
47 |
+
2021-05-22 19:52:37,770 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/qnli/default_experiment-1-0.arrow
|
48 |
+
2021-05-22 19:52:37,771 INFO __main__ Test: accuracy = 0.9214717188358045
|
49 |
+
2021-05-22 19:52:37,771 INFO __main__ Start prediction for private dataset(s)
|
50 |
+
2021-05-22 19:52:37,772 INFO __main__ qnli/test: 5463 samples
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|