yoshitomo-matsubara commited on
Commit
5abc4dc
1 Parent(s): e1cc68b

added files

Browse files
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "finetuning_task": "wnli",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "transformers_version": "4.6.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:856817085b699fd4adb637df47fc40368882e64810ea4160594506261539ccab
3
+ size 1340746825
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_lower": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-large-uncased"}
training.log ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-05-22 19:56:24,477 INFO __main__ Namespace(adjust_lr=False, config='torchdistill/configs/sample/glue/wnli/ce/bert_large_uncased.yaml', log='log/glue/wnli/ce/bert_large_uncased.txt', private_output='leaderboard/glue/standard/bert_large_uncased/', seed=None, student_only=False, task_name='wnli', test_only=False, world_size=1)
2
+ 2021-05-22 19:56:24,533 INFO __main__ Distributed environment: NO
3
+ Num processes: 1
4
+ Process index: 0
5
+ Local process index: 0
6
+ Device: cuda
7
+ Use FP16 precision: True
8
+
9
+ 2021-05-22 19:56:48,842 INFO __main__ Start training
10
+ 2021-05-22 19:56:48,842 INFO torchdistill.models.util [student model]
11
+ 2021-05-22 19:56:48,843 INFO torchdistill.models.util Using the original student model
12
+ 2021-05-22 19:56:48,843 INFO torchdistill.core.training Loss = 1.0 * OrgLoss
13
+ 2021-05-22 19:56:53,550 INFO torchdistill.misc.log Epoch: [0] [ 0/80] eta: 0:00:17 lr: 3.990000000000001e-05 sample/s: 18.767552475088035 loss: 0.6687 (0.6687) time: 0.2162 data: 0.0030 max mem: 5387
14
+ 2021-05-22 19:57:00,019 INFO torchdistill.misc.log Epoch: [0] [50/80] eta: 0:00:03 lr: 3.490000000000001e-05 sample/s: 30.58460562463654 loss: 0.6938 (0.6991) time: 0.1276 data: 0.0015 max mem: 7070
15
+ 2021-05-22 19:57:03,804 INFO torchdistill.misc.log Epoch: [0] Total time: 0:00:10
16
+ 2021-05-22 19:57:03,958 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/wnli/default_experiment-1-0.arrow
17
+ 2021-05-22 19:57:03,958 INFO __main__ Validation: accuracy = 0.5492957746478874
18
+ 2021-05-22 19:57:03,958 INFO __main__ Updating ckpt
19
+ 2021-05-22 19:57:10,821 INFO torchdistill.misc.log Epoch: [1] [ 0/80] eta: 0:00:12 lr: 3.19e-05 sample/s: 29.384120134089486 loss: 0.6909 (0.6909) time: 0.1566 data: 0.0205 max mem: 7186
20
+ 2021-05-22 19:57:17,272 INFO torchdistill.misc.log Epoch: [1] [50/80] eta: 0:00:03 lr: 2.6900000000000003e-05 sample/s: 32.50642486243509 loss: 0.6932 (0.6934) time: 0.1310 data: 0.0014 max mem: 7188
21
+ 2021-05-22 19:57:20,973 INFO torchdistill.misc.log Epoch: [1] Total time: 0:00:10
22
+ 2021-05-22 19:57:21,128 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/wnli/default_experiment-1-0.arrow
23
+ 2021-05-22 19:57:21,128 INFO __main__ Validation: accuracy = 0.5633802816901409
24
+ 2021-05-22 19:57:21,128 INFO __main__ Updating ckpt
25
+ 2021-05-22 19:57:28,122 INFO torchdistill.misc.log Epoch: [2] [ 0/80] eta: 0:00:10 lr: 2.39e-05 sample/s: 30.32127450005874 loss: 0.6824 (0.6824) time: 0.1339 data: 0.0020 max mem: 7188
26
+ 2021-05-22 19:57:34,515 INFO torchdistill.misc.log Epoch: [2] [50/80] eta: 0:00:03 lr: 1.8900000000000002e-05 sample/s: 31.630715165089583 loss: 0.6937 (0.6948) time: 0.1281 data: 0.0014 max mem: 7188
27
+ 2021-05-22 19:57:38,235 INFO torchdistill.misc.log Epoch: [2] Total time: 0:00:10
28
+ 2021-05-22 19:57:38,390 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/wnli/default_experiment-1-0.arrow
29
+ 2021-05-22 19:57:38,390 INFO __main__ Validation: accuracy = 0.5633802816901409
30
+ 2021-05-22 19:57:38,521 INFO torchdistill.misc.log Epoch: [3] [ 0/80] eta: 0:00:10 lr: 1.5900000000000004e-05 sample/s: 31.18180821306768 loss: 0.6948 (0.6948) time: 0.1297 data: 0.0014 max mem: 7188
31
+ 2021-05-22 19:57:44,900 INFO torchdistill.misc.log Epoch: [3] [50/80] eta: 0:00:03 lr: 1.0900000000000002e-05 sample/s: 31.89390759497521 loss: 0.6927 (0.6954) time: 0.1278 data: 0.0014 max mem: 7188
32
+ 2021-05-22 19:57:48,641 INFO torchdistill.misc.log Epoch: [3] Total time: 0:00:10
33
+ 2021-05-22 19:57:48,792 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/wnli/default_experiment-1-0.arrow
34
+ 2021-05-22 19:57:48,793 INFO __main__ Validation: accuracy = 0.5633802816901409
35
+ 2021-05-22 19:57:48,917 INFO torchdistill.misc.log Epoch: [4] [ 0/80] eta: 0:00:09 lr: 7.9e-06 sample/s: 32.742675586850794 loss: 0.6934 (0.6934) time: 0.1236 data: 0.0014 max mem: 7188
36
+ 2021-05-22 19:57:55,336 INFO torchdistill.misc.log Epoch: [4] [50/80] eta: 0:00:03 lr: 2.9e-06 sample/s: 31.653093952463426 loss: 0.6926 (0.6938) time: 0.1289 data: 0.0014 max mem: 7188
37
+ 2021-05-22 19:57:59,112 INFO torchdistill.misc.log Epoch: [4] Total time: 0:00:10
38
+ 2021-05-22 19:57:59,267 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/wnli/default_experiment-1-0.arrow
39
+ 2021-05-22 19:57:59,267 INFO __main__ Validation: accuracy = 0.5352112676056338
40
+ 2021-05-22 19:58:07,747 INFO __main__ [Student: bert-large-uncased]
41
+ 2021-05-22 19:58:07,913 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/wnli/default_experiment-1-0.arrow
42
+ 2021-05-22 19:58:07,913 INFO __main__ Test: accuracy = 0.5633802816901409
43
+ 2021-05-22 19:58:07,913 INFO __main__ Start prediction for private dataset(s)
44
+ 2021-05-22 19:58:07,914 INFO __main__ wnli/test: 146 samples
vocab.txt ADDED
The diff for this file is too large to render. See raw diff