yoshitomo-matsubara commited on
Commit
50aa989
1 Parent(s): ec14c64

added files

Browse files
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-large-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "finetuning_task": "mnli",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 4096,
19
+ "label2id": {
20
+ "LABEL_0": 0,
21
+ "LABEL_1": 1,
22
+ "LABEL_2": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 16,
28
+ "num_hidden_layers": 24,
29
+ "pad_token_id": 0,
30
+ "position_embedding_type": "absolute",
31
+ "problem_type": "single_label_classification",
32
+ "transformers_version": "4.6.1",
33
+ "type_vocab_size": 2,
34
+ "use_cache": true,
35
+ "vocab_size": 30522
36
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e65b82b971489a2d598bc76b7fd2dbc2666b6e36945ec825c85bcb31a7cd4f
3
+ size 1340750921
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "do_lower": true, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-large-uncased"}
training.log ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-05-22 16:51:27,390 INFO __main__ Namespace(adjust_lr=False, config='torchdistill/configs/sample/glue/mnli/ce/bert_large_uncased.yaml', log='log/glue/mnli/ce/bert_large_uncased.txt', private_output='leaderboard/glue/standard/bert_large_uncased/', seed=None, student_only=False, task_name='mnli', test_only=False, world_size=1)
2
+ 2021-05-22 16:51:27,468 INFO __main__ Distributed environment: NO
3
+ Num processes: 1
4
+ Process index: 0
5
+ Local process index: 0
6
+ Device: cuda
7
+ Use FP16 precision: True
8
+
9
+ 2021-05-22 16:51:28,560 INFO filelock Lock 140388380183568 acquired on /root/.cache/huggingface/transformers/1cf090f220f9674b67b3434decfe4d40a6532d7849653eac435ff94d31a4904c.1d03e5e4fa2db2532c517b2cd98290d8444b237619bd3d2039850a6d5e86473d.lock
10
+ 2021-05-22 16:51:29,127 INFO filelock Lock 140388380183568 released on /root/.cache/huggingface/transformers/1cf090f220f9674b67b3434decfe4d40a6532d7849653eac435ff94d31a4904c.1d03e5e4fa2db2532c517b2cd98290d8444b237619bd3d2039850a6d5e86473d.lock
11
+ 2021-05-22 16:51:30,244 INFO filelock Lock 140388420260240 acquired on /root/.cache/huggingface/transformers/e12f02d630da91a0982ce6db1ad595231d155a2b725ab106971898276d842ecc.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
12
+ 2021-05-22 16:51:31,503 INFO filelock Lock 140388420260240 released on /root/.cache/huggingface/transformers/e12f02d630da91a0982ce6db1ad595231d155a2b725ab106971898276d842ecc.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
13
+ 2021-05-22 16:51:32,063 INFO filelock Lock 140388380324432 acquired on /root/.cache/huggingface/transformers/475d46024228961ca8770cead39e1079f135fd2441d14cf216727ffac8d41d78.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
14
+ 2021-05-22 16:51:33,505 INFO filelock Lock 140388380324432 released on /root/.cache/huggingface/transformers/475d46024228961ca8770cead39e1079f135fd2441d14cf216727ffac8d41d78.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
15
+ 2021-05-22 16:51:35,451 INFO filelock Lock 140388380350864 acquired on /root/.cache/huggingface/transformers/300ecd79785b4602752c0085f8a89c3f0232ef367eda291c79a5600f3778b677.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79.lock
16
+ 2021-05-22 16:51:36,009 INFO filelock Lock 140388380350864 released on /root/.cache/huggingface/transformers/300ecd79785b4602752c0085f8a89c3f0232ef367eda291c79a5600f3778b677.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79.lock
17
+ 2021-05-22 16:51:36,585 INFO filelock Lock 140388380327440 acquired on /root/.cache/huggingface/transformers/1d959166dd7e047e57ea1b2d9b7b9669938a7e90c5e37a03961ad9f15eaea17f.fea64cd906e3766b04c92397f9ad3ff45271749cbe49829a079dd84e34c1697d.lock
18
+ 2021-05-22 16:51:59,770 INFO filelock Lock 140388380327440 released on /root/.cache/huggingface/transformers/1d959166dd7e047e57ea1b2d9b7b9669938a7e90c5e37a03961ad9f15eaea17f.fea64cd906e3766b04c92397f9ad3ff45271749cbe49829a079dd84e34c1697d.lock
19
+ 2021-05-22 16:53:50,809 INFO __main__ Start training
20
+ 2021-05-22 16:53:50,810 INFO torchdistill.models.util [student model]
21
+ 2021-05-22 16:53:50,810 INFO torchdistill.models.util Using the original student model
22
+ 2021-05-22 16:53:50,810 INFO torchdistill.core.training Loss = 1.0 * OrgLoss
23
+ 2021-05-22 16:53:58,061 INFO torchdistill.misc.log Epoch: [0] [ 0/12272] eta: 1:16:07 lr: 1.9999456757931336e-05 sample/s: 12.183084487517528 loss: 1.1712 (1.1712) time: 0.3722 data: 0.0438 max mem: 6528
24
+ 2021-05-22 16:57:39,884 INFO torchdistill.misc.log Epoch: [0] [ 1000/12272] eta: 0:41:42 lr: 1.945621468926554e-05 sample/s: 20.478750076289288 loss: 0.5207 (0.7022) time: 0.2147 data: 0.0042 max mem: 12387
25
+ 2021-05-22 17:01:20,141 INFO torchdistill.misc.log Epoch: [0] [ 2000/12272] eta: 0:37:51 lr: 1.891297262059974e-05 sample/s: 15.393974983851077 loss: 0.4313 (0.6009) time: 0.2211 data: 0.0043 max mem: 12387
26
+ 2021-05-22 17:05:02,731 INFO torchdistill.misc.log Epoch: [0] [ 3000/12272] eta: 0:34:14 lr: 1.8369730551933943e-05 sample/s: 18.06303300441961 loss: 0.4815 (0.5557) time: 0.2245 data: 0.0044 max mem: 12387
27
+ 2021-05-22 17:08:46,475 INFO torchdistill.misc.log Epoch: [0] [ 4000/12272] eta: 0:30:37 lr: 1.7826488483268146e-05 sample/s: 21.499356065605703 loss: 0.3788 (0.5245) time: 0.2279 data: 0.0044 max mem: 12387
28
+ 2021-05-22 17:12:27,148 INFO torchdistill.misc.log Epoch: [0] [ 5000/12272] eta: 0:26:53 lr: 1.728324641460235e-05 sample/s: 18.00671446357275 loss: 0.4535 (0.5039) time: 0.2255 data: 0.0045 max mem: 12387
29
+ 2021-05-22 17:16:11,174 INFO torchdistill.misc.log Epoch: [0] [ 6000/12272] eta: 0:23:13 lr: 1.674000434593655e-05 sample/s: 18.482076681398965 loss: 0.3041 (0.4878) time: 0.2298 data: 0.0048 max mem: 12387
30
+ 2021-05-22 17:19:54,854 INFO torchdistill.misc.log Epoch: [0] [ 7000/12272] eta: 0:19:32 lr: 1.6196762277270753e-05 sample/s: 17.582475809604055 loss: 0.4186 (0.4757) time: 0.2196 data: 0.0043 max mem: 12387
31
+ 2021-05-22 17:23:37,409 INFO torchdistill.misc.log Epoch: [0] [ 8000/12272] eta: 0:15:50 lr: 1.5653520208604957e-05 sample/s: 18.052984294095115 loss: 0.3516 (0.4650) time: 0.2145 data: 0.0044 max mem: 12387
32
+ 2021-05-22 17:27:17,991 INFO torchdistill.misc.log Epoch: [0] [ 9000/12272] eta: 0:12:07 lr: 1.5110278139939158e-05 sample/s: 22.68604630722653 loss: 0.3512 (0.4559) time: 0.2295 data: 0.0043 max mem: 12387
33
+ 2021-05-22 17:31:01,464 INFO torchdistill.misc.log Epoch: [0] [10000/12272] eta: 0:08:25 lr: 1.4567036071273362e-05 sample/s: 16.33036813667613 loss: 0.2831 (0.4491) time: 0.2283 data: 0.0043 max mem: 12387
34
+ 2021-05-22 17:34:44,016 INFO torchdistill.misc.log Epoch: [0] [11000/12272] eta: 0:04:42 lr: 1.4023794002607562e-05 sample/s: 20.46775732289036 loss: 0.3536 (0.4437) time: 0.2124 data: 0.0044 max mem: 12387
35
+ 2021-05-22 17:38:24,552 INFO torchdistill.misc.log Epoch: [0] [12000/12272] eta: 0:01:00 lr: 1.3480551933941765e-05 sample/s: 18.079364121485902 loss: 0.4479 (0.4387) time: 0.2239 data: 0.0043 max mem: 12387
36
+ 2021-05-22 17:39:24,832 INFO torchdistill.misc.log Epoch: [0] Total time: 0:45:27
37
+ 2021-05-22 17:39:43,192 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
38
+ 2021-05-22 17:39:43,193 INFO __main__ Validation: accuracy = 0.8611309220580744
39
+ 2021-05-22 17:39:43,193 INFO __main__ Updating ckpt
40
+ 2021-05-22 17:39:48,350 INFO torchdistill.misc.log Epoch: [1] [ 0/12272] eta: 0:46:53 lr: 1.333279009126467e-05 sample/s: 20.607311527884058 loss: 0.2311 (0.2311) time: 0.2293 data: 0.0352 max mem: 12387
41
+ 2021-05-22 17:43:29,458 INFO torchdistill.misc.log Epoch: [1] [ 1000/12272] eta: 0:41:32 lr: 1.2789548022598873e-05 sample/s: 20.56273287053379 loss: 0.1454 (0.2223) time: 0.2243 data: 0.0042 max mem: 12387
42
+ 2021-05-22 17:47:11,803 INFO torchdistill.misc.log Epoch: [1] [ 2000/12272] eta: 0:37:57 lr: 1.2246305953933073e-05 sample/s: 18.59424568869974 loss: 0.2245 (0.2225) time: 0.2319 data: 0.0048 max mem: 12387
43
+ 2021-05-22 17:50:54,138 INFO torchdistill.misc.log Epoch: [1] [ 3000/12272] eta: 0:34:17 lr: 1.1703063885267276e-05 sample/s: 18.018356427434547 loss: 0.2228 (0.2222) time: 0.2174 data: 0.0043 max mem: 12387
44
+ 2021-05-22 17:54:36,556 INFO torchdistill.misc.log Epoch: [1] [ 4000/12272] eta: 0:30:36 lr: 1.115982181660148e-05 sample/s: 20.332667584495162 loss: 0.1867 (0.2216) time: 0.2261 data: 0.0043 max mem: 12387
45
+ 2021-05-22 17:58:20,409 INFO torchdistill.misc.log Epoch: [1] [ 5000/12272] eta: 0:26:57 lr: 1.061657974793568e-05 sample/s: 17.845956815829588 loss: 0.1976 (0.2198) time: 0.2294 data: 0.0042 max mem: 12387
46
+ 2021-05-22 18:02:02,847 INFO torchdistill.misc.log Epoch: [1] [ 6000/12272] eta: 0:23:14 lr: 1.0073337679269883e-05 sample/s: 21.467115188009178 loss: 0.1879 (0.2201) time: 0.2190 data: 0.0042 max mem: 12387
47
+ 2021-05-22 18:05:42,715 INFO torchdistill.misc.log Epoch: [1] [ 7000/12272] eta: 0:19:30 lr: 9.530095610604087e-06 sample/s: 17.160899388528726 loss: 0.2007 (0.2208) time: 0.2206 data: 0.0043 max mem: 12387
48
+ 2021-05-22 18:09:23,036 INFO torchdistill.misc.log Epoch: [1] [ 8000/12272] eta: 0:15:47 lr: 8.986853541938288e-06 sample/s: 18.76536935211822 loss: 0.1739 (0.2222) time: 0.2313 data: 0.0043 max mem: 12387
49
+ 2021-05-22 18:13:05,941 INFO torchdistill.misc.log Epoch: [1] [ 9000/12272] eta: 0:12:06 lr: 8.44361147327249e-06 sample/s: 21.49795107699799 loss: 0.1878 (0.2220) time: 0.2245 data: 0.0043 max mem: 12387
50
+ 2021-05-22 18:16:47,980 INFO torchdistill.misc.log Epoch: [1] [10000/12272] eta: 0:08:24 lr: 7.900369404606693e-06 sample/s: 14.830137577212604 loss: 0.1875 (0.2221) time: 0.2259 data: 0.0043 max mem: 12387
51
+ 2021-05-22 18:20:30,312 INFO torchdistill.misc.log Epoch: [1] [11000/12272] eta: 0:04:42 lr: 7.357127335940896e-06 sample/s: 20.469380509379288 loss: 0.2438 (0.2223) time: 0.2209 data: 0.0042 max mem: 12387
52
+ 2021-05-22 18:24:12,649 INFO torchdistill.misc.log Epoch: [1] [12000/12272] eta: 0:01:00 lr: 6.813885267275099e-06 sample/s: 22.500544501606683 loss: 0.1945 (0.2219) time: 0.2258 data: 0.0043 max mem: 12387
53
+ 2021-05-22 18:25:12,909 INFO torchdistill.misc.log Epoch: [1] Total time: 0:45:24
54
+ 2021-05-22 18:25:31,246 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
55
+ 2021-05-22 18:25:31,247 INFO __main__ Validation: accuracy = 0.8580743759551707
56
+ 2021-05-22 18:25:31,544 INFO torchdistill.misc.log Epoch: [2] [ 0/12272] eta: 1:00:43 lr: 6.666123424598001e-06 sample/s: 14.723753056256177 loss: 0.2830 (0.2830) time: 0.2969 data: 0.0253 max mem: 12387
57
+ 2021-05-22 18:29:12,317 INFO torchdistill.misc.log Epoch: [2] [ 1000/12272] eta: 0:41:29 lr: 6.122881355932204e-06 sample/s: 21.425308023947203 loss: 0.0852 (0.1137) time: 0.2224 data: 0.0042 max mem: 12387
58
+ 2021-05-22 18:32:53,024 INFO torchdistill.misc.log Epoch: [2] [ 2000/12272] eta: 0:37:47 lr: 5.579639287266406e-06 sample/s: 16.40405652223226 loss: 0.0455 (0.1167) time: 0.2192 data: 0.0041 max mem: 12387
59
+ 2021-05-22 18:36:32,375 INFO torchdistill.misc.log Epoch: [2] [ 3000/12272] eta: 0:34:02 lr: 5.0363972186006095e-06 sample/s: 18.737662697375622 loss: 0.1362 (0.1172) time: 0.2145 data: 0.0043 max mem: 12387
60
+ 2021-05-22 18:40:14,688 INFO torchdistill.misc.log Epoch: [2] [ 4000/12272] eta: 0:30:26 lr: 4.493155149934811e-06 sample/s: 18.113464380838863 loss: 0.0532 (0.1177) time: 0.2220 data: 0.0043 max mem: 12387
61
+ 2021-05-22 18:43:57,248 INFO torchdistill.misc.log Epoch: [2] [ 5000/12272] eta: 0:26:48 lr: 3.949913081269014e-06 sample/s: 14.875937326267126 loss: 0.1144 (0.1180) time: 0.2276 data: 0.0043 max mem: 12387
62
+ 2021-05-22 18:47:39,622 INFO torchdistill.misc.log Epoch: [2] [ 6000/12272] eta: 0:23:08 lr: 3.4066710126032164e-06 sample/s: 16.430757056702372 loss: 0.1580 (0.1191) time: 0.2224 data: 0.0044 max mem: 12387
63
+ 2021-05-22 18:51:21,874 INFO torchdistill.misc.log Epoch: [2] [ 7000/12272] eta: 0:19:27 lr: 2.8634289439374186e-06 sample/s: 18.773201563424898 loss: 0.0951 (0.1196) time: 0.2272 data: 0.0042 max mem: 12387
64
+ 2021-05-22 18:55:04,803 INFO torchdistill.misc.log Epoch: [2] [ 8000/12272] eta: 0:15:46 lr: 2.320186875271621e-06 sample/s: 14.910151872387736 loss: 0.0195 (0.1192) time: 0.2168 data: 0.0042 max mem: 12387
65
+ 2021-05-22 18:58:47,324 INFO torchdistill.misc.log Epoch: [2] [ 9000/12272] eta: 0:12:05 lr: 1.7769448066058238e-06 sample/s: 17.93071695225555 loss: 0.0373 (0.1192) time: 0.2179 data: 0.0042 max mem: 12387
66
+ 2021-05-22 19:02:29,181 INFO torchdistill.misc.log Epoch: [2] [10000/12272] eta: 0:08:23 lr: 1.2337027379400262e-06 sample/s: 14.847908469314245 loss: 0.0573 (0.1188) time: 0.2322 data: 0.0042 max mem: 12387
67
+ 2021-05-22 19:06:11,866 INFO torchdistill.misc.log Epoch: [2] [11000/12272] eta: 0:04:42 lr: 6.904606692742287e-07 sample/s: 14.793588146235695 loss: 0.0291 (0.1186) time: 0.2280 data: 0.0043 max mem: 12387
68
+ 2021-05-22 19:09:52,653 INFO torchdistill.misc.log Epoch: [2] [12000/12272] eta: 0:01:00 lr: 1.4721860060843112e-07 sample/s: 20.479150035276856 loss: 0.0900 (0.1190) time: 0.2209 data: 0.0042 max mem: 12387
69
+ 2021-05-22 19:10:51,967 INFO torchdistill.misc.log Epoch: [2] Total time: 0:45:20
70
+ 2021-05-22 19:11:10,326 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
71
+ 2021-05-22 19:11:10,326 INFO __main__ Validation: accuracy = 0.8541008660213958
72
+ 2021-05-22 19:11:18,288 INFO __main__ [Student: bert-large-uncased]
73
+ 2021-05-22 19:11:36,643 INFO /usr/local/lib/python3.7/dist-packages/datasets/metric.py Removing /root/.cache/huggingface/metrics/glue/mnli/default_experiment-1-0.arrow
74
+ 2021-05-22 19:11:36,643 INFO __main__ Test: accuracy = 0.8611309220580744
75
+ 2021-05-22 19:11:36,644 INFO __main__ Start prediction for private dataset(s)
76
+ 2021-05-22 19:11:36,645 INFO __main__ mnli/test_m: 9796 samples
77
+ 2021-05-22 19:11:54,962 INFO __main__ mnli/test_mm: 9847 samples
78
+ 2021-05-22 19:12:13,295 INFO __main__ ax/test_ax: 1104 samples
vocab.txt ADDED
The diff for this file is too large to render. See raw diff