cccjc commited on
Commit
f4fb17d
·
1 Parent(s): 973678d

add weights

Browse files
Files changed (40) hide show
  1. .gitattributes +1 -0
  2. coco_butd_grid_bert/.DS_Store +0 -0
  3. coco_butd_grid_bert/model_best.pth +3 -0
  4. coco_butd_grid_bert/test_log.txt +151 -0
  5. coco_butd_grid_bert/test_log_cxc.txt +62 -0
  6. coco_butd_grid_bert/test_log_ensemble.txt +27 -0
  7. coco_butd_grid_bigru/.DS_Store +0 -0
  8. coco_butd_grid_bigru/model_best.pth +3 -0
  9. coco_butd_grid_bigru/test_log.txt +93 -0
  10. coco_butd_grid_bigru/test_log_ensemble.txt +27 -0
  11. coco_butd_region_bert/.DS_Store +0 -0
  12. coco_butd_region_bert/model_best.pth +3 -0
  13. coco_butd_region_bert/test_log.txt +144 -0
  14. coco_butd_region_bert/test_log_ensemble.txt +5 -0
  15. coco_butd_region_bigru/.DS_Store +0 -0
  16. coco_butd_region_bigru/model_best.pth +3 -0
  17. coco_butd_region_bigru/test_log.txt +82 -0
  18. coco_butd_region_bigru/test_log_ensemble.txt +27 -0
  19. coco_wsl_grid_bert/.DS_Store +0 -0
  20. coco_wsl_grid_bert/model_best.pth +3 -0
  21. coco_wsl_grid_bert/test_log.txt +158 -0
  22. coco_wsl_grid_bert/test_log_cxc.txt +62 -0
  23. coco_wsl_grid_bert/test_log_ensemble.txt +27 -0
  24. f30k_butd_grid_bert/.DS_Store +0 -0
  25. f30k_butd_grid_bert/model_best.pth +3 -0
  26. f30k_butd_grid_bert/test_log.txt +48 -0
  27. f30k_butd_grid_bigru/.DS_Store +0 -0
  28. f30k_butd_grid_bigru/model_best.pth +3 -0
  29. f30k_butd_grid_bigru/test_log.txt +18 -0
  30. f30k_butd_region_bert/.DS_Store +0 -0
  31. f30k_butd_region_bert/model_best.pth +3 -0
  32. f30k_butd_region_bert/test_log.txt +44 -0
  33. f30k_butd_region_bert/test_log_ensemble.txt +5 -0
  34. f30k_butd_region_bigru/.DS_Store +0 -0
  35. f30k_butd_region_bigru/model_best.pth +3 -0
  36. f30k_butd_region_bigru/test_log.txt +14 -0
  37. f30k_wsl_grid_bert/.DS_Store +0 -0
  38. f30k_wsl_grid_bert/model_best.pth +3 -0
  39. f30k_wsl_grid_bert/test_log.txt +50 -0
  40. f30k_wsl_grid_bert/test_log_ensemble.txt +5 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pth filter=lfs diff=lfs merge=lfs -text
coco_butd_grid_bert/.DS_Store ADDED
Binary file (6.15 kB). View file
 
coco_butd_grid_bert/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc2d749ea969fe65145b8864b54470bfd3fea961bcd0738e9f78198a531349d7
3
+ size 628484236
coco_butd_grid_bert/test_log.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-28 11:49:37,019 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2020-09-28 11:49:44,464 Resnet backbone now has fixed blocks 2
3
+ 2020-09-28 11:49:45,925 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
4
+ 2020-09-28 11:49:45,926 Model config {
5
+ "architectures": [
6
+ "BertForMaskedLM"
7
+ ],
8
+ "attention_probs_dropout_prob": 0.1,
9
+ "finetuning_task": null,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "num_labels": 2,
21
+ "output_attentions": false,
22
+ "output_hidden_states": false,
23
+ "pad_token_id": 0,
24
+ "pruned_heads": {},
25
+ "torchscript": false,
26
+ "type_vocab_size": 2,
27
+ "use_bfloat16": false,
28
+ "vocab_size": 30522
29
+ }
30
+
31
+ 2020-09-28 11:49:47,293 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
32
+ 2020-09-28 11:49:49,480 Use adam as the optimizer, with init lr 0.0005
33
+ 2020-09-28 11:49:49,480 Image encoder is data paralleled now.
34
+ 2020-09-28 11:49:49,618 Load full model with backbone
35
+ 2020-09-28 11:49:49,620 Loading dataset
36
+ 2020-09-28 11:49:54,581 Input mode small: scaled by factor 2.0
37
+ 2020-09-28 11:49:59,061 Computing results...
38
+ 2020-09-28 11:50:56,509 Test: [0/196] Le 63.2128 (63.2128) Time 57.444 (0.000)
39
+ 2020-09-28 11:51:01,949 Test: [10/196] Le 61.8044 (62.3709) Time 0.613 (0.000)
40
+ 2020-09-28 11:51:12,583 Test: [20/196] Le 65.8550 (62.5176) Time 2.779 (0.000)
41
+ 2020-09-28 11:51:19,107 Test: [30/196] Le 63.9750 (62.6710) Time 1.101 (0.000)
42
+ 2020-09-28 11:51:26,753 Test: [40/196] Le 62.3553 (62.8770) Time 0.599 (0.000)
43
+ 2020-09-28 11:51:33,178 Test: [50/196] Le 64.0036 (62.9363) Time 1.053 (0.000)
44
+ 2020-09-28 11:51:41,781 Test: [60/196] Le 60.9549 (62.7791) Time 0.520 (0.000)
45
+ 2020-09-28 11:51:47,991 Test: [70/196] Le 60.1947 (62.6902) Time 0.582 (0.000)
46
+ 2020-09-28 11:51:55,115 Test: [80/196] Le 64.8011 (62.8592) Time 0.521 (0.000)
47
+ 2020-09-28 11:52:02,522 Test: [90/196] Le 63.6653 (62.8890) Time 0.523 (0.000)
48
+ 2020-09-28 11:52:11,548 Test: [100/196] Le 63.3472 (62.8601) Time 0.593 (0.000)
49
+ 2020-09-28 11:52:18,060 Test: [110/196] Le 59.5976 (62.7785) Time 0.535 (0.000)
50
+ 2020-09-28 11:52:26,472 Test: [120/196] Le 64.8909 (62.8633) Time 0.524 (0.000)
51
+ 2020-09-28 11:52:32,687 Test: [130/196] Le 64.0768 (62.8651) Time 0.542 (0.000)
52
+ 2020-09-28 11:52:39,706 Test: [140/196] Le 66.7743 (62.8628) Time 0.532 (0.000)
53
+ 2020-09-28 11:52:45,961 Test: [150/196] Le 63.4051 (62.8979) Time 0.518 (0.000)
54
+ 2020-09-28 11:52:53,645 Test: [160/196] Le 62.4264 (62.9269) Time 0.519 (0.000)
55
+ 2020-09-28 11:53:00,141 Test: [170/196] Le 64.1213 (62.9178) Time 0.513 (0.000)
56
+ 2020-09-28 11:53:07,404 Test: [180/196] Le 61.0356 (62.9458) Time 0.516 (0.000)
57
+ 2020-09-28 11:53:13,988 Test: [190/196] Le 61.7361 (62.9015) Time 0.515 (0.000)
58
+ 2020-09-28 11:53:19,655 Images: 5000, Captions: 25000
59
+ 2020-09-28 11:53:50,253 Align loss: 0.9592935465926018
60
+ 2020-09-28 11:53:50,253 Image uniform loss: -3.825332749718092
61
+ 2020-09-28 11:53:50,253 Text uniform loss: -3.885177468724295
62
+ 2020-09-28 11:53:50,294 calculate similarity time:
63
+ 2020-09-28 11:53:50,633 Image to text: 82.9, 98.0, 99.7, 1.0, 1.4
64
+ 2020-09-28 11:53:50,921 Text to image: 67.8, 92.7, 96.7, 1.0, 3.8
65
+ 2020-09-28 11:53:50,922 rsum: 537.8 ar: 93.5 ari: 85.7
66
+ 2020-09-28 11:53:50,993 calculate similarity time:
67
+ 2020-09-28 11:53:51,336 Image to text: 79.4, 96.0, 98.7, 1.0, 1.8
68
+ 2020-09-28 11:53:51,623 Text to image: 65.9, 91.4, 96.3, 1.0, 3.6
69
+ 2020-09-28 11:53:51,623 rsum: 527.6 ar: 91.4 ari: 84.5
70
+ 2020-09-28 11:53:51,694 calculate similarity time:
71
+ 2020-09-28 11:53:52,036 Image to text: 79.8, 97.2, 99.3, 1.0, 1.6
72
+ 2020-09-28 11:53:52,323 Text to image: 66.7, 91.9, 96.9, 1.0, 3.8
73
+ 2020-09-28 11:53:52,323 rsum: 531.8 ar: 92.1 ari: 85.2
74
+ 2020-09-28 11:53:52,396 calculate similarity time:
75
+ 2020-09-28 11:53:52,739 Image to text: 78.8, 96.4, 98.8, 1.0, 1.7
76
+ 2020-09-28 11:53:53,027 Text to image: 64.5, 91.8, 96.6, 1.0, 3.2
77
+ 2020-09-28 11:53:53,027 rsum: 526.9 ar: 91.3 ari: 84.3
78
+ 2020-09-28 11:53:53,100 calculate similarity time:
79
+ 2020-09-28 11:53:53,445 Image to text: 81.2, 96.5, 99.2, 1.0, 1.5
80
+ 2020-09-28 11:53:53,732 Text to image: 67.3, 92.6, 96.9, 1.0, 3.4
81
+ 2020-09-28 11:53:53,733 rsum: 533.8 ar: 92.3 ari: 85.6
82
+ 2020-09-28 11:53:53,733 -----------------------------------
83
+ 2020-09-28 11:53:53,733 Mean metrics:
84
+ 2020-09-28 11:53:53,733 rsum: 531.6
85
+ 2020-09-28 11:53:53,733 Average i2t Recall: 92.1
86
+ 2020-09-28 11:53:53,733 Image to text: 80.4 96.8 99.1 1.0 1.6
87
+ 2020-09-28 11:53:53,733 Average t2i Recall: 85.1
88
+ 2020-09-28 11:53:53,733 Text to image: 66.4 92.1 96.7 1.0 3.5
89
+ 2020-09-28 11:53:55,578 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
90
+ 2020-09-28 11:54:01,413 Resnet backbone now has fixed blocks 2
91
+ 2020-09-28 11:54:02,873 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
92
+ 2020-09-28 11:54:02,874 Model config {
93
+ "architectures": [
94
+ "BertForMaskedLM"
95
+ ],
96
+ "attention_probs_dropout_prob": 0.1,
97
+ "finetuning_task": null,
98
+ "hidden_act": "gelu",
99
+ "hidden_dropout_prob": 0.1,
100
+ "hidden_size": 768,
101
+ "initializer_range": 0.02,
102
+ "intermediate_size": 3072,
103
+ "layer_norm_eps": 1e-12,
104
+ "max_position_embeddings": 512,
105
+ "model_type": "bert",
106
+ "num_attention_heads": 12,
107
+ "num_hidden_layers": 12,
108
+ "num_labels": 2,
109
+ "output_attentions": false,
110
+ "output_hidden_states": false,
111
+ "pad_token_id": 0,
112
+ "pruned_heads": {},
113
+ "torchscript": false,
114
+ "type_vocab_size": 2,
115
+ "use_bfloat16": false,
116
+ "vocab_size": 30522
117
+ }
118
+
119
+ 2020-09-28 11:54:04,385 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
120
+ 2020-09-28 11:54:06,572 Use adam as the optimizer, with init lr 0.0005
121
+ 2020-09-28 11:54:06,573 Image encoder is data paralleled now.
122
+ 2020-09-28 11:54:06,713 Load full model with backbone
123
+ 2020-09-28 11:54:06,716 Loading dataset
124
+ 2020-09-28 11:54:10,054 Input mode small: scaled by factor 2.0
125
+ 2020-09-28 11:54:16,134 Computing results...
126
+ 2020-09-28 11:54:34,686 Test: [0/196] Le 63.2128 (63.2128) Time 18.549 (0.000)
127
+ 2020-09-28 11:54:41,359 Test: [10/196] Le 61.8044 (62.3709) Time 1.064 (0.000)
128
+ 2020-09-28 11:54:49,852 Test: [20/196] Le 65.8550 (62.5176) Time 0.587 (0.000)
129
+ 2020-09-28 11:54:55,446 Test: [30/196] Le 63.9750 (62.6710) Time 0.520 (0.000)
130
+ 2020-09-28 11:55:02,567 Test: [40/196] Le 62.3553 (62.8770) Time 0.527 (0.000)
131
+ 2020-09-28 11:55:11,015 Test: [50/196] Le 64.0036 (62.9363) Time 0.612 (0.000)
132
+ 2020-09-28 11:55:20,332 Test: [60/196] Le 60.9549 (62.7791) Time 0.526 (0.000)
133
+ 2020-09-28 11:55:25,946 Test: [70/196] Le 60.1947 (62.6902) Time 0.582 (0.000)
134
+ 2020-09-28 11:55:34,129 Test: [80/196] Le 64.8011 (62.8592) Time 0.522 (0.000)
135
+ 2020-09-28 11:55:40,196 Test: [90/196] Le 63.6653 (62.8890) Time 0.569 (0.000)
136
+ 2020-09-28 11:55:49,241 Test: [100/196] Le 63.3472 (62.8601) Time 0.585 (0.000)
137
+ 2020-09-28 11:55:54,633 Test: [110/196] Le 59.5976 (62.7785) Time 0.523 (0.000)
138
+ 2020-09-28 11:56:01,693 Test: [120/196] Le 64.8909 (62.8633) Time 0.524 (0.000)
139
+ 2020-09-28 11:56:07,865 Test: [130/196] Le 64.0768 (62.8651) Time 0.519 (0.000)
140
+ 2020-09-28 11:56:16,333 Test: [140/196] Le 66.7743 (62.8628) Time 0.524 (0.000)
141
+ 2020-09-28 11:56:22,923 Test: [150/196] Le 63.4051 (62.8979) Time 0.601 (0.000)
142
+ 2020-09-28 11:56:33,561 Test: [160/196] Le 62.4264 (62.9269) Time 0.530 (0.000)
143
+ 2020-09-28 11:56:40,031 Test: [170/196] Le 64.1213 (62.9178) Time 0.544 (0.000)
144
+ 2020-09-28 11:56:49,370 Test: [180/196] Le 61.0356 (62.9458) Time 0.520 (0.000)
145
+ 2020-09-28 11:56:58,484 Test: [190/196] Le 61.7361 (62.9015) Time 0.522 (0.000)
146
+ 2020-09-28 11:57:03,232 Images: 5000, Captions: 25000
147
+ 2020-09-28 11:57:56,277 rsum: 440.0
148
+ 2020-09-28 11:57:56,277 Average i2t Recall: 79.3
149
+ 2020-09-28 11:57:56,277 Image to text: 59.1 85.9 92.8 1.0 3.9
150
+ 2020-09-28 11:57:56,277 Average t2i Recall: 67.4
151
+ 2020-09-28 11:57:56,277 Text to image: 44.1 74.1 84.0 2.0 13.6
coco_butd_grid_bert/test_log_cxc.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-03-24 02:15:44,036 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2021-03-24 02:15:44,685 Loading pretrained backbone weights from /tmp/data/coco/original_updown/original_updown_backbone.pth for backbone source vsepp_detector
3
+ 2021-03-24 02:15:44,845 Resnet backbone now has fixed blocks 2
4
+ 2021-03-24 02:15:46,432 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
5
+ 2021-03-24 02:15:46,432 Model config {
6
+ "architectures": [
7
+ "BertForMaskedLM"
8
+ ],
9
+ "attention_probs_dropout_prob": 0.1,
10
+ "finetuning_task": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "num_labels": 2,
22
+ "output_attentions": false,
23
+ "output_hidden_states": false,
24
+ "pad_token_id": 0,
25
+ "pruned_heads": {},
26
+ "torchscript": false,
27
+ "type_vocab_size": 2,
28
+ "use_bfloat16": false,
29
+ "vocab_size": 30522
30
+ }
31
+
32
+ 2021-03-24 02:15:47,971 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
33
+ 2021-03-24 02:15:50,479 Use adam as the optimizer, with init lr 0.0005
34
+ 2021-03-24 02:15:50,479 Image encoder is data paralleled now.
35
+ 2021-03-24 02:15:50,588 Loading dataset
36
+ 2021-03-24 02:15:50,627 Input images are scaled by factor 2.0
37
+ 2021-03-24 02:15:50,627 Computing results...
38
+ 2021-03-24 02:16:09,548 Test: [0/196] Le 63.2128 (63.2128) Time 18.919 (0.000)
39
+ 2021-03-24 02:16:13,442 Test: [10/196] Le 64.0036 (62.8289) Time 0.497 (0.000)
40
+ 2021-03-24 02:16:17,822 Test: [20/196] Le 63.3472 (62.6635) Time 0.377 (0.000)
41
+ 2021-03-24 02:16:23,507 Test: [30/196] Le 63.4051 (62.8526) Time 0.409 (0.000)
42
+ 2021-03-24 02:16:28,790 Test: [40/196] Le 62.8664 (62.7377) Time 0.375 (0.000)
43
+ 2021-03-24 02:16:34,061 Test: [50/196] Le 62.3589 (62.7401) Time 0.373 (0.000)
44
+ 2021-03-24 02:16:39,268 Test: [60/196] Le 60.6198 (62.8120) Time 0.374 (0.000)
45
+ 2021-03-24 02:16:44,652 Test: [70/196] Le 65.8031 (62.7668) Time 0.362 (0.000)
46
+ 2021-03-24 02:16:50,069 Test: [80/196] Le 63.0102 (62.8256) Time 0.403 (0.000)
47
+ 2021-03-24 02:16:55,160 Test: [90/196] Le 62.9977 (62.8399) Time 0.373 (0.000)
48
+ 2021-03-24 02:17:00,565 Test: [100/196] Le 64.9580 (62.8687) Time 0.372 (0.000)
49
+ 2021-03-24 02:17:05,758 Test: [110/196] Le 59.6950 (62.8464) Time 0.369 (0.000)
50
+ 2021-03-24 02:17:10,929 Test: [120/196] Le 66.2380 (62.8761) Time 0.398 (0.000)
51
+ 2021-03-24 02:17:16,213 Test: [130/196] Le 61.9589 (62.8930) Time 0.368 (0.000)
52
+ 2021-03-24 02:17:21,050 Test: [140/196] Le 65.5130 (62.9444) Time 0.371 (0.000)
53
+ 2021-03-24 02:17:26,390 Test: [150/196] Le 61.6122 (62.9479) Time 0.367 (0.000)
54
+ 2021-03-24 02:17:31,490 Test: [160/196] Le 64.1749 (62.9468) Time 0.383 (0.000)
55
+ 2021-03-24 02:17:36,813 Test: [170/196] Le 63.6706 (62.9037) Time 0.399 (0.000)
56
+ 2021-03-24 02:17:42,197 Test: [180/196] Le 61.4447 (62.8598) Time 0.364 (0.000)
57
+ 2021-03-24 02:17:47,674 Test: [190/196] Le 61.1852 (62.9282) Time 0.435 (0.000)
58
+ 2021-03-24 02:17:50,464 Images: 5000, Captions: 25000
59
+ 2021-03-24 02:18:26,903 T2I R@1: 46.244, R@5: 76.264, R@10: 85.68
60
+ 2021-03-24 02:18:26,903 I2T R@1: 60.62, R@5: 87.44, R@10: 94.0
61
+ 2021-03-24 02:19:35,544 I2I R@1: 44.41661448549363, R@5: 78.31350448758089, R@10: 87.20517637236485
62
+ 2021-03-24 02:19:35,545 T2T R@1: 45.936, R@5: 68.664, R@10: 77.784
coco_butd_grid_bert/test_log_ensemble.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-28 12:00:52,252 Image to text: 85.4, 98.2, 99.8, 1.0, 1.3
2
+ 2020-09-28 12:00:52,535 Text to image: 69.1, 93.2, 97.1, 1.0, 3.6
3
+ 2020-09-28 12:00:52,535 rsum: 542.7 ar: 94.5 ari: 86.4
4
+ 2020-09-28 12:00:52,870 Image to text: 81.4, 97.3, 99.3, 1.0, 1.6
5
+ 2020-09-28 12:00:53,154 Text to image: 68.1, 92.0, 96.8, 1.0, 3.5
6
+ 2020-09-28 12:00:53,154 rsum: 534.8 ar: 92.7 ari: 85.6
7
+ 2020-09-28 12:00:53,490 Image to text: 82.3, 97.7, 99.3, 1.0, 1.5
8
+ 2020-09-28 12:00:53,773 Text to image: 68.1, 93.1, 97.2, 1.0, 3.3
9
+ 2020-09-28 12:00:53,773 rsum: 537.7 ar: 93.1 ari: 86.1
10
+ 2020-09-28 12:00:54,109 Image to text: 80.5, 96.6, 99.3, 1.0, 1.5
11
+ 2020-09-28 12:00:54,394 Text to image: 66.4, 93.0, 97.3, 1.0, 2.8
12
+ 2020-09-28 12:00:54,394 rsum: 533.1 ar: 92.1 ari: 85.6
13
+ 2020-09-28 12:00:54,731 Image to text: 81.5, 97.6, 99.6, 1.0, 1.4
14
+ 2020-09-28 12:00:55,014 Text to image: 68.8, 93.3, 97.5, 1.0, 3.1
15
+ 2020-09-28 12:00:55,014 rsum: 538.2 ar: 92.9 ari: 86.5
16
+ 2020-09-28 12:00:55,014 -----------------------------------
17
+ 2020-09-28 12:00:55,014 Mean metrics:
18
+ 2020-09-28 12:00:55,014 rsum: 537.3
19
+ 2020-09-28 12:00:55,014 Average i2t Recall: 93.1
20
+ 2020-09-28 12:00:55,015 Image to text: 82.2 97.5 99.5 1.0 1.5
21
+ 2020-09-28 12:00:55,015 Average t2i Recall: 86.1
22
+ 2020-09-28 12:00:55,015 Text to image: 68.1 92.9 97.2 1.0 3.3
23
+ 2020-09-28 12:01:31,140 rsum: 451.8
24
+ 2020-09-28 12:01:31,141 Average i2t Recall: 81.4
25
+ 2020-09-28 12:01:31,141 Image to text: 62.5 87.8 94.0 1.0 3.3
26
+ 2020-09-28 12:01:31,141 Average t2i Recall: 69.2
27
+ 2020-09-28 12:01:31,141 Text to image: 46.0 75.8 85.7 2.0 12.1
coco_butd_grid_bigru/.DS_Store ADDED
Binary file (6.15 kB). View file
 
coco_butd_grid_bigru/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:344412c9afcf65ca7ad74c61f1811fa7ca82d21b1854d8258897c52b3b45339e
3
+ size 233566777
coco_butd_grid_bigru/test_log.txt ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-10 12:44:43,500 Loading pretrained backbone weights from hdfs:///home/byte_arnold_hl_vc/user/chenjiacheng/data/coco/original_updown/original_updown_backbone.pth for backbone source vsepp_detector
2
+ 2020-09-10 12:44:48,017 Resnet backbone now has fixed blocks 2
3
+ 2020-09-10 12:44:48,223 Use adam as the optimizer, with init lr 0.0005
4
+ 2020-09-10 12:44:48,223 Image encoder is data paralleled now.
5
+ 2020-09-10 12:44:48,350 Load full model with backbone
6
+ 2020-09-10 12:44:48,352 Loading dataset
7
+ 2020-09-10 12:44:52,867 Input mode small: scaled by factor 2.0
8
+ 2020-09-10 12:44:57,306 Computing results...
9
+ 2020-09-10 12:45:32,345 Test: [0/196] Le 61.5342 (61.5341) Time 35.037 (0.000)
10
+ 2020-09-10 12:45:35,608 Test: [10/196] Le 62.2255 (62.1526) Time 0.321 (0.000)
11
+ 2020-09-10 12:45:42,461 Test: [20/196] Le 65.6731 (62.1577) Time 0.371 (0.000)
12
+ 2020-09-10 12:45:48,545 Test: [30/196] Le 65.8112 (62.3998) Time 0.599 (0.000)
13
+ 2020-09-10 12:45:56,936 Test: [40/196] Le 63.2921 (62.5376) Time 1.323 (0.000)
14
+ 2020-09-10 12:46:03,611 Test: [50/196] Le 63.3793 (62.4683) Time 1.352 (0.000)
15
+ 2020-09-10 12:46:11,003 Test: [60/196] Le 59.8511 (62.3342) Time 1.421 (0.000)
16
+ 2020-09-10 12:46:17,578 Test: [70/196] Le 60.9663 (62.2750) Time 1.402 (0.000)
17
+ 2020-09-10 12:46:25,291 Test: [80/196] Le 65.1833 (62.4220) Time 0.678 (0.000)
18
+ 2020-09-10 12:46:31,947 Test: [90/196] Le 63.3992 (62.4493) Time 0.986 (0.000)
19
+ 2020-09-10 12:46:39,390 Test: [100/196] Le 62.4223 (62.4397) Time 0.317 (0.000)
20
+ 2020-09-10 12:46:45,981 Test: [110/196] Le 59.4193 (62.3932) Time 0.315 (0.000)
21
+ 2020-09-10 12:46:54,739 Test: [120/196] Le 63.0641 (62.4526) Time 0.324 (0.000)
22
+ 2020-09-10 12:47:00,989 Test: [130/196] Le 65.7017 (62.4476) Time 0.320 (0.000)
23
+ 2020-09-10 12:47:08,321 Test: [140/196] Le 67.1813 (62.4422) Time 0.367 (0.000)
24
+ 2020-09-10 12:47:15,198 Test: [150/196] Le 63.0721 (62.4588) Time 0.319 (0.000)
25
+ 2020-09-10 12:47:22,794 Test: [160/196] Le 61.7248 (62.4631) Time 0.320 (0.000)
26
+ 2020-09-10 12:47:29,238 Test: [170/196] Le 62.3504 (62.4660) Time 0.317 (0.000)
27
+ 2020-09-10 12:47:36,362 Test: [180/196] Le 60.4507 (62.4707) Time 0.321 (0.000)
28
+ 2020-09-10 12:47:42,892 Test: [190/196] Le 61.8195 (62.4209) Time 0.323 (0.000)
29
+ 2020-09-10 12:47:46,943 Images: 5000, Captions: 25000
30
+ 2020-09-10 12:48:20,121 calculate similarity time:
31
+ 2020-09-10 12:48:20,491 Image to text: 78.8, 96.4, 98.2, 1.0, 1.7
32
+ 2020-09-10 12:48:20,777 Text to image: 63.4, 91.3, 96.0, 1.0, 3.9
33
+ 2020-09-10 12:48:20,777 rsum: 524.1 ar: 91.1 ari: 83.6
34
+ 2020-09-10 12:48:20,855 calculate similarity time:
35
+ 2020-09-10 12:48:21,213 Image to text: 79.2, 94.8, 98.3, 1.0, 1.9
36
+ 2020-09-10 12:48:21,507 Text to image: 61.9, 89.8, 95.4, 1.0, 4.4
37
+ 2020-09-10 12:48:21,507 rsum: 519.4 ar: 90.8 ari: 82.4
38
+ 2020-09-10 12:48:21,577 calculate similarity time:
39
+ 2020-09-10 12:48:21,917 Image to text: 79.4, 96.3, 99.2, 1.0, 1.6
40
+ 2020-09-10 12:48:22,210 Text to image: 63.9, 90.4, 96.0, 1.0, 3.7
41
+ 2020-09-10 12:48:22,210 rsum: 525.3 ar: 91.6 ari: 83.5
42
+ 2020-09-10 12:48:22,304 calculate similarity time:
43
+ 2020-09-10 12:48:22,646 Image to text: 75.6, 95.4, 98.0, 1.0, 2.0
44
+ 2020-09-10 12:48:22,939 Text to image: 61.2, 90.1, 95.8, 1.0, 3.5
45
+ 2020-09-10 12:48:22,940 rsum: 516.1 ar: 89.7 ari: 82.4
46
+ 2020-09-10 12:48:23,016 calculate similarity time:
47
+ 2020-09-10 12:48:23,359 Image to text: 76.9, 96.1, 98.7, 1.0, 1.8
48
+ 2020-09-10 12:48:23,651 Text to image: 62.7, 91.4, 96.7, 1.0, 3.4
49
+ 2020-09-10 12:48:23,651 rsum: 522.5 ar: 90.6 ari: 83.6
50
+ 2020-09-10 12:48:23,651 -----------------------------------
51
+ 2020-09-10 12:48:23,651 Mean metrics:
52
+ 2020-09-10 12:48:23,651 rsum: 521.5
53
+ 2020-09-10 12:48:23,651 Average i2t Recall: 90.8
54
+ 2020-09-10 12:48:23,651 Image to text: 78.0 95.8 98.5 1.0 1.8
55
+ 2020-09-10 12:48:23,651 Average t2i Recall: 83.1
56
+ 2020-09-10 12:48:23,651 Text to image: 62.6 90.6 96.0 1.0 3.8
57
+
58
+
59
+ 2020-09-10 12:48:27,204 Loading pretrained backbone weights from hdfs:///home/byte_arnold_hl_vc/user/chenjiacheng/data/coco/original_updown/original_updown_backbone.pth for backbone source vsepp_detector
60
+ 2020-09-10 12:48:32,040 Resnet backbone now has fixed blocks 2
61
+ 2020-09-10 12:48:32,213 Use adam as the optimizer, with init lr 0.0005
62
+ 2020-09-10 12:48:32,213 Image encoder is data paralleled now.
63
+ 2020-09-10 12:48:32,349 Load full model with backbone
64
+ 2020-09-10 12:48:32,351 Loading dataset
65
+ 2020-09-10 12:48:35,620 Input mode small: scaled by factor 2.0
66
+ 2020-09-10 12:48:41,792 Computing results...
67
+ 2020-09-10 12:48:58,912 Test: [0/196] Le 61.5342 (61.5341) Time 17.117 (0.000)
68
+ 2020-09-10 12:49:05,627 Test: [10/196] Le 62.2255 (62.1526) Time 1.811 (0.000)
69
+ 2020-09-10 12:49:12,470 Test: [20/196] Le 65.6731 (62.1577) Time 0.571 (0.000)
70
+ 2020-09-10 12:49:18,959 Test: [30/196] Le 65.8112 (62.3998) Time 0.907 (0.000)
71
+ 2020-09-10 12:49:27,170 Test: [40/196] Le 63.2921 (62.5376) Time 0.355 (0.000)
72
+ 2020-09-10 12:49:33,808 Test: [50/196] Le 63.3793 (62.4683) Time 0.335 (0.000)
73
+ 2020-09-10 12:49:43,444 Test: [60/196] Le 59.8511 (62.3342) Time 0.390 (0.000)
74
+ 2020-09-10 12:49:48,822 Test: [70/196] Le 60.9663 (62.2750) Time 0.338 (0.000)
75
+ 2020-09-10 12:49:56,979 Test: [80/196] Le 65.1833 (62.4220) Time 0.320 (0.000)
76
+ 2020-09-10 12:50:03,598 Test: [90/196] Le 63.3992 (62.4493) Time 0.316 (0.000)
77
+ 2020-09-10 12:50:11,072 Test: [100/196] Le 62.4223 (62.4397) Time 0.326 (0.000)
78
+ 2020-09-10 12:50:17,790 Test: [110/196] Le 59.4193 (62.3932) Time 0.328 (0.000)
79
+ 2020-09-10 12:50:25,012 Test: [120/196] Le 63.0641 (62.4526) Time 0.330 (0.000)
80
+ 2020-09-10 12:50:31,297 Test: [130/196] Le 65.7017 (62.4476) Time 0.325 (0.000)
81
+ 2020-09-10 12:50:38,239 Test: [140/196] Le 67.1813 (62.4422) Time 0.326 (0.000)
82
+ 2020-09-10 12:50:45,220 Test: [150/196] Le 63.0721 (62.4588) Time 0.319 (0.000)
83
+ 2020-09-10 12:50:52,620 Test: [160/196] Le 61.7248 (62.4631) Time 0.321 (0.000)
84
+ 2020-09-10 12:50:59,469 Test: [170/196] Le 62.3504 (62.4660) Time 0.322 (0.000)
85
+ 2020-09-10 12:51:07,511 Test: [180/196] Le 60.4507 (62.4707) Time 0.324 (0.000)
86
+ 2020-09-10 12:51:14,207 Test: [190/196] Le 61.8195 (62.4209) Time 0.322 (0.000)
87
+ 2020-09-10 12:51:17,881 Images: 5000, Captions: 25000
88
+ 2020-09-10 12:51:45,001 calculate similarity time:
89
+ 2020-09-10 12:52:03,960 rsum: 423.8
90
+ 2020-09-10 12:52:03,960 Average i2t Recall: 77.0
91
+ 2020-09-10 12:52:03,960 Image to text: 56.2 83.7 90.9 1.0 4.9
92
+ 2020-09-10 12:52:03,960 Average t2i Recall: 64.3
93
+ 2020-09-10 12:52:03,960 Text to image: 40.8 70.6 81.5 2.0 14.7
coco_butd_grid_bigru/test_log_ensemble.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-10-29 12:48:04,561 Image to text: 82.2, 97.0, 99.0, 1.0, 1.5
2
+ 2020-10-29 12:48:04,846 Text to image: 66.0, 92.2, 96.5, 1.0, 3.5
3
+ 2020-10-29 12:48:04,846 rsum: 532.8 ar: 92.7 ari: 84.9
4
+ 2020-10-29 12:48:05,242 Image to text: 79.7, 95.8, 99.2, 1.0, 1.7
5
+ 2020-10-29 12:48:05,529 Text to image: 64.0, 90.8, 95.7, 1.0, 4.0
6
+ 2020-10-29 12:48:05,529 rsum: 525.3 ar: 91.6 ari: 83.5
7
+ 2020-10-29 12:48:05,869 Image to text: 79.9, 97.6, 99.6, 1.0, 1.5
8
+ 2020-10-29 12:48:06,158 Text to image: 65.3, 91.8, 96.7, 1.0, 3.3
9
+ 2020-10-29 12:48:06,158 rsum: 530.9 ar: 92.4 ari: 84.6
10
+ 2020-10-29 12:48:06,497 Image to text: 78.6, 96.1, 98.4, 1.0, 1.7
11
+ 2020-10-29 12:48:06,785 Text to image: 63.6, 91.5, 96.7, 1.0, 3.0
12
+ 2020-10-29 12:48:06,785 rsum: 524.9 ar: 91.0 ari: 83.9
13
+ 2020-10-29 12:48:07,125 Image to text: 79.5, 96.6, 98.9, 1.0, 1.6
14
+ 2020-10-29 12:48:07,412 Text to image: 65.0, 92.2, 97.0, 1.0, 3.1
15
+ 2020-10-29 12:48:07,412 rsum: 529.2 ar: 91.7 ari: 84.7
16
+ 2020-10-29 12:48:07,412 -----------------------------------
17
+ 2020-10-29 12:48:07,412 Mean metrics:
18
+ 2020-10-29 12:48:07,412 rsum: 528.6
19
+ 2020-10-29 12:48:07,412 Average i2t Recall: 91.9
20
+ 2020-10-29 12:48:07,412 Image to text: 80.0 96.6 99.0 1.0 1.6
21
+ 2020-10-29 12:48:07,412 Average t2i Recall: 84.3
22
+ 2020-10-29 12:48:07,412 Text to image: 64.8 91.7 96.5 1.0 3.4
23
+ 2020-10-29 12:48:45,395 rsum: 437.5
24
+ 2020-10-29 12:48:45,395 Average i2t Recall: 79.6
25
+ 2020-10-29 12:48:45,395 Image to text: 60.0 86.2 92.7 1.0 3.9
26
+ 2020-10-29 12:48:45,395 Average t2i Recall: 66.2
27
+ 2020-10-29 12:48:45,395 Text to image: 42.7 72.6 83.3 2.0 12.8
coco_butd_region_bert/.DS_Store ADDED
Binary file (6.15 kB). View file
 
coco_butd_region_bert/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32e8660a7dd264b014012a4d3b3564dd76b66f021070e9e2df120e44ceed1f8
3
+ size 455953079
coco_butd_region_bert/test_log.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-19 12:19:01,737 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2020-09-19 12:19:03,261 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
3
+ 2020-09-19 12:19:03,261 Model config {
4
+ "architectures": [
5
+ "BertForMaskedLM"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "finetuning_task": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "num_labels": 2,
20
+ "output_attentions": false,
21
+ "output_hidden_states": false,
22
+ "pad_token_id": 0,
23
+ "pruned_heads": {},
24
+ "torchscript": false,
25
+ "type_vocab_size": 2,
26
+ "use_bfloat16": false,
27
+ "vocab_size": 30522
28
+ }
29
+
30
+ 2020-09-19 12:19:04,756 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
31
+ 2020-09-19 12:19:07,026 Use adam as the optimizer, with init lr 0.0005
32
+ 2020-09-19 12:19:07,026 Image encoder is data paralleled now.
33
+ 2020-09-19 12:19:07,039 Loading dataset
34
+ 2020-09-19 12:21:15,606 Computing results...
35
+ 2020-09-19 12:21:32,949 Test: [0/196] Le 62.1216 (62.1216) Time 17.342 (0.000)
36
+ 2020-09-19 12:21:34,949 Test: [10/196] Le 62.3208 (62.0296) Time 0.212 (0.000)
37
+ 2020-09-19 12:21:36,924 Test: [20/196] Le 61.6259 (61.7868) Time 0.196 (0.000)
38
+ 2020-09-19 12:21:38,923 Test: [30/196] Le 62.5433 (61.8626) Time 0.197 (0.000)
39
+ 2020-09-19 12:21:40,909 Test: [40/196] Le 62.5389 (61.6597) Time 0.192 (0.000)
40
+ 2020-09-19 12:21:42,868 Test: [50/196] Le 62.0505 (61.6832) Time 0.204 (0.000)
41
+ 2020-09-19 12:21:44,851 Test: [60/196] Le 60.7455 (61.7560) Time 0.199 (0.000)
42
+ 2020-09-19 12:21:46,795 Test: [70/196] Le 63.2594 (61.6683) Time 0.194 (0.000)
43
+ 2020-09-19 12:21:48,835 Test: [80/196] Le 62.5849 (61.7312) Time 0.220 (0.000)
44
+ 2020-09-19 12:21:50,822 Test: [90/196] Le 61.6402 (61.7597) Time 0.196 (0.000)
45
+ 2020-09-19 12:21:52,830 Test: [100/196] Le 63.8944 (61.8340) Time 0.198 (0.000)
46
+ 2020-09-19 12:21:54,811 Test: [110/196] Le 61.2093 (61.8118) Time 0.190 (0.000)
47
+ 2020-09-19 12:21:56,822 Test: [120/196] Le 66.6486 (61.8693) Time 0.203 (0.000)
48
+ 2020-09-19 12:21:58,862 Test: [130/196] Le 60.1235 (61.8624) Time 0.202 (0.000)
49
+ 2020-09-19 12:22:00,809 Test: [140/196] Le 64.4691 (61.9337) Time 0.188 (0.000)
50
+ 2020-09-19 12:22:02,771 Test: [150/196] Le 59.6040 (61.9000) Time 0.193 (0.000)
51
+ 2020-09-19 12:22:04,715 Test: [160/196] Le 62.9591 (61.9137) Time 0.191 (0.000)
52
+ 2020-09-19 12:22:06,701 Test: [170/196] Le 61.7752 (61.8860) Time 0.200 (0.000)
53
+ 2020-09-19 12:22:08,728 Test: [180/196] Le 60.3819 (61.8454) Time 0.193 (0.000)
54
+ 2020-09-19 12:22:10,707 Test: [190/196] Le 59.6309 (61.8942) Time 0.193 (0.000)
55
+ 2020-09-19 12:22:11,655 Images: 5000, Captions: 25000
56
+ 2020-09-19 12:22:45,146 calculate similarity time:
57
+ 2020-09-19 12:22:45,524 Image to text: 81.9, 97.3, 99.2, 1.0, 1.5
58
+ 2020-09-19 12:22:45,844 Text to image: 66.7, 92.0, 96.3, 1.0, 4.2
59
+ 2020-09-19 12:22:45,845 rsum: 533.4 ar: 92.8 ari: 85.0
60
+ 2020-09-19 12:22:45,947 calculate similarity time:
61
+ 2020-09-19 12:22:46,351 Image to text: 79.0, 96.2, 98.5, 1.0, 1.8
62
+ 2020-09-19 12:22:46,672 Text to image: 64.7, 90.6, 95.8, 1.0, 4.2
63
+ 2020-09-19 12:22:46,672 rsum: 524.8 ar: 91.2 ari: 83.7
64
+ 2020-09-19 12:22:46,753 calculate similarity time:
65
+ 2020-09-19 12:22:47,156 Image to text: 79.6, 96.2, 98.8, 1.0, 1.6
66
+ 2020-09-19 12:22:47,476 Text to image: 64.4, 91.2, 96.1, 1.0, 3.7
67
+ 2020-09-19 12:22:47,476 rsum: 526.3 ar: 91.5 ari: 83.9
68
+ 2020-09-19 12:22:47,568 calculate similarity time:
69
+ 2020-09-19 12:22:47,974 Image to text: 79.3, 96.1, 98.8, 1.0, 1.7
70
+ 2020-09-19 12:22:48,295 Text to image: 62.4, 91.1, 96.4, 1.0, 3.3
71
+ 2020-09-19 12:22:48,296 rsum: 524.2 ar: 91.4 ari: 83.3
72
+ 2020-09-19 12:22:48,375 calculate similarity time:
73
+ 2020-09-19 12:22:48,778 Image to text: 78.5, 96.2, 99.1, 1.0, 1.6
74
+ 2020-09-19 12:22:49,099 Text to image: 65.7, 92.2, 96.8, 1.0, 3.6
75
+ 2020-09-19 12:22:49,099 rsum: 528.6 ar: 91.3 ari: 84.9
76
+ 2020-09-19 12:22:49,099 -----------------------------------
77
+ 2020-09-19 12:22:49,099 Mean metrics:
78
+ 2020-09-19 12:22:49,099 rsum: 527.5
79
+ 2020-09-19 12:22:49,099 Average i2t Recall: 91.6
80
+ 2020-09-19 12:22:49,099 Image to text: 79.7 96.4 98.9 1.0 1.6
81
+ 2020-09-19 12:22:49,099 Average t2i Recall: 84.2
82
+ 2020-09-19 12:22:49,099 Text to image: 64.8 91.4 96.3 1.0 3.8
83
+
84
+
85
+ 2020-09-19 12:22:51,067 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
86
+ 2020-09-19 12:22:52,532 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
87
+ 2020-09-19 12:22:52,533 Model config {
88
+ "architectures": [
89
+ "BertForMaskedLM"
90
+ ],
91
+ "attention_probs_dropout_prob": 0.1,
92
+ "finetuning_task": null,
93
+ "hidden_act": "gelu",
94
+ "hidden_dropout_prob": 0.1,
95
+ "hidden_size": 768,
96
+ "initializer_range": 0.02,
97
+ "intermediate_size": 3072,
98
+ "layer_norm_eps": 1e-12,
99
+ "max_position_embeddings": 512,
100
+ "model_type": "bert",
101
+ "num_attention_heads": 12,
102
+ "num_hidden_layers": 12,
103
+ "num_labels": 2,
104
+ "output_attentions": false,
105
+ "output_hidden_states": false,
106
+ "pad_token_id": 0,
107
+ "pruned_heads": {},
108
+ "torchscript": false,
109
+ "type_vocab_size": 2,
110
+ "use_bfloat16": false,
111
+ "vocab_size": 30522
112
+ }
113
+
114
+ 2020-09-19 12:22:53,926 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
115
+ 2020-09-19 12:22:56,137 Use adam as the optimizer, with init lr 0.0005
116
+ 2020-09-19 12:22:56,137 Image encoder is data paralleled now.
117
+ 2020-09-19 12:22:56,150 Loading dataset
118
+ 2020-09-19 12:25:11,707 Computing results...
119
+ 2020-09-19 12:25:11,921 Test: [0/196] Le 62.1216 (62.1216) Time 0.212 (0.000)
120
+ 2020-09-19 12:25:13,893 Test: [10/196] Le 62.3208 (62.0296) Time 0.210 (0.000)
121
+ 2020-09-19 12:25:15,880 Test: [20/196] Le 61.6259 (61.7868) Time 0.197 (0.000)
122
+ 2020-09-19 12:25:17,889 Test: [30/196] Le 62.5433 (61.8626) Time 0.230 (0.000)
123
+ 2020-09-19 12:25:19,883 Test: [40/196] Le 62.5389 (61.6597) Time 0.192 (0.000)
124
+ 2020-09-19 12:25:21,849 Test: [50/196] Le 62.0505 (61.6832) Time 0.198 (0.000)
125
+ 2020-09-19 12:25:23,844 Test: [60/196] Le 60.7455 (61.7560) Time 0.232 (0.000)
126
+ 2020-09-19 12:25:25,804 Test: [70/196] Le 63.2594 (61.6683) Time 0.197 (0.000)
127
+ 2020-09-19 12:25:27,861 Test: [80/196] Le 62.5849 (61.7312) Time 0.220 (0.000)
128
+ 2020-09-19 12:25:29,850 Test: [90/196] Le 61.6402 (61.7597) Time 0.201 (0.000)
129
+ 2020-09-19 12:25:31,876 Test: [100/196] Le 63.8944 (61.8340) Time 0.201 (0.000)
130
+ 2020-09-19 12:25:33,838 Test: [110/196] Le 61.2093 (61.8118) Time 0.191 (0.000)
131
+ 2020-09-19 12:25:35,804 Test: [120/196] Le 66.6486 (61.8693) Time 0.202 (0.000)
132
+ 2020-09-19 12:25:37,812 Test: [130/196] Le 60.1235 (61.8624) Time 0.200 (0.000)
133
+ 2020-09-19 12:25:39,779 Test: [140/196] Le 64.4691 (61.9337) Time 0.192 (0.000)
134
+ 2020-09-19 12:25:41,746 Test: [150/196] Le 59.6040 (61.9000) Time 0.198 (0.000)
135
+ 2020-09-19 12:25:43,717 Test: [160/196] Le 62.9591 (61.9137) Time 0.197 (0.000)
136
+ 2020-09-19 12:25:45,698 Test: [170/196] Le 61.7752 (61.8860) Time 0.203 (0.000)
137
+ 2020-09-19 12:25:47,779 Test: [180/196] Le 60.3819 (61.8454) Time 0.198 (0.000)
138
+ 2020-09-19 12:25:49,797 Test: [190/196] Le 59.6309 (61.8942) Time 0.194 (0.000)
139
+ 2020-09-19 12:25:50,722 Images: 5000, Captions: 25000
140
+ 2020-09-19 12:26:45,893 rsum: 434.3
141
+ 2020-09-19 12:26:45,894 Average i2t Recall: 78.7
142
+ 2020-09-19 12:26:45,894 Image to text: 58.3 85.3 92.3 1.0 4.1
143
+ 2020-09-19 12:26:45,894 Average t2i Recall: 66.1
144
+ 2020-09-19 12:26:45,894 Text to image: 42.4 72.7 83.2 2.0 14.8
coco_butd_region_bert/test_log_ensemble.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ 2020-09-20 13:00:44,911 rsum: 472.2
2
+ 2020-09-20 13:00:44,911 Average i2t Recall: 84.6
3
+ 2020-09-20 13:00:44,911 Image to text: 67.6 90.5 95.6 1.0 2.8
4
+ 2020-09-20 13:00:44,911 Average t2i Recall: 72.8
5
+ 2020-09-20 13:00:44,911 Text to image: 50.9 79.6 87.9 1.0 10.9
coco_butd_region_bigru/.DS_Store ADDED
Binary file (6.15 kB). View file
 
coco_butd_region_bigru/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5c36b2d806daa99302e01460fb341e9d50e39844f8b4b938d5cbbe666de746
3
+ size 61036407
coco_butd_region_bigru/test_log.txt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-11-06 00:21:29,612 Use adam as the optimizer, with init lr 0.0005
2
+ 2020-11-06 00:21:29,613 Loading dataset
3
+ 2020-11-06 00:23:17,657 Computing results...
4
+ 2020-11-06 00:23:18,885 Test: [0/196] Le 60.7781 (60.7781) Time 1.227 (0.000)
5
+ 2020-11-06 00:23:19,546 Test: [10/196] Le 61.8171 (61.7981) Time 0.055 (0.000)
6
+ 2020-11-06 00:23:20,014 Test: [20/196] Le 61.3542 (61.4344) Time 0.036 (0.000)
7
+ 2020-11-06 00:23:20,480 Test: [30/196] Le 62.5104 (61.6404) Time 0.052 (0.000)
8
+ 2020-11-06 00:23:20,935 Test: [40/196] Le 62.1959 (61.4912) Time 0.054 (0.000)
9
+ 2020-11-06 00:23:21,401 Test: [50/196] Le 60.8244 (61.4676) Time 0.054 (0.000)
10
+ 2020-11-06 00:23:21,861 Test: [60/196] Le 61.1740 (61.5172) Time 0.040 (0.000)
11
+ 2020-11-06 00:23:22,311 Test: [70/196] Le 62.1174 (61.4565) Time 0.036 (0.000)
12
+ 2020-11-06 00:23:22,845 Test: [80/196] Le 61.9037 (61.4808) Time 0.061 (0.000)
13
+ 2020-11-06 00:23:23,301 Test: [90/196] Le 61.3814 (61.4537) Time 0.044 (0.000)
14
+ 2020-11-06 00:23:23,749 Test: [100/196] Le 62.1759 (61.4920) Time 0.042 (0.000)
15
+ 2020-11-06 00:23:24,220 Test: [110/196] Le 60.1641 (61.4669) Time 0.047 (0.000)
16
+ 2020-11-06 00:23:24,693 Test: [120/196] Le 65.6565 (61.4961) Time 0.043 (0.000)
17
+ 2020-11-06 00:23:25,149 Test: [130/196] Le 61.1231 (61.5035) Time 0.055 (0.000)
18
+ 2020-11-06 00:23:25,603 Test: [140/196] Le 64.9178 (61.5482) Time 0.041 (0.000)
19
+ 2020-11-06 00:23:26,071 Test: [150/196] Le 60.8170 (61.5275) Time 0.039 (0.000)
20
+ 2020-11-06 00:23:26,539 Test: [160/196] Le 61.9198 (61.5027) Time 0.047 (0.000)
21
+ 2020-11-06 00:23:27,010 Test: [170/196] Le 62.3629 (61.4701) Time 0.053 (0.000)
22
+ 2020-11-06 00:23:27,498 Test: [180/196] Le 59.6263 (61.4354) Time 0.044 (0.000)
23
+ 2020-11-06 00:23:27,923 Test: [190/196] Le 60.3929 (61.4824) Time 0.037 (0.000)
24
+ 2020-11-06 00:23:28,369 Images: 5000, Captions: 25000
25
+ 2020-11-06 00:23:58,870 Image to text: 81.4, 95.9, 98.6, 1.0, 1.7
26
+ 2020-11-06 00:23:59,186 Text to image: 62.7, 90.3, 95.3, 1.0, 4.2
27
+ 2020-11-06 00:23:59,186 rsum: 524.2 ar: 92.0 ari: 82.8
28
+ 2020-11-06 00:23:59,292 calculate similarity time:
29
+ 2020-11-06 00:23:59,634 Image to text: 78.5, 95.4, 98.6, 1.0, 1.8
30
+ 2020-11-06 00:23:59,925 Text to image: 62.1, 89.4, 95.0, 1.0, 4.3
31
+ 2020-11-06 00:23:59,925 rsum: 519.0 ar: 90.8 ari: 82.2
32
+ 2020-11-06 00:24:00,015 calculate similarity time:
33
+ 2020-11-06 00:24:00,384 Image to text: 78.0, 96.7, 99.0, 1.0, 1.7
34
+ 2020-11-06 00:24:00,674 Text to image: 61.9, 90.6, 95.7, 1.0, 4.0
35
+ 2020-11-06 00:24:00,674 rsum: 521.9 ar: 91.2 ari: 82.7
36
+ 2020-11-06 00:24:00,757 calculate similarity time:
37
+ 2020-11-06 00:24:01,119 Image to text: 76.3, 96.3, 98.6, 1.0, 1.7
38
+ 2020-11-06 00:24:01,410 Text to image: 60.2, 90.2, 96.0, 1.0, 3.5
39
+ 2020-11-06 00:24:01,411 rsum: 517.6 ar: 90.4 ari: 82.1
40
+ 2020-11-06 00:24:01,489 calculate similarity time:
41
+ 2020-11-06 00:24:01,860 Image to text: 78.5, 95.6, 98.6, 1.0, 1.7
42
+ 2020-11-06 00:24:02,151 Text to image: 61.5, 90.7, 96.0, 1.0, 3.9
43
+ 2020-11-06 00:24:02,151 rsum: 521.0 ar: 90.9 ari: 82.8
44
+ 2020-11-06 00:24:02,152 -----------------------------------
45
+ 2020-11-06 00:24:02,152 Mean metrics:
46
+ 2020-11-06 00:24:02,152 rsum: 520.7
47
+ 2020-11-06 00:24:02,152 Average i2t Recall: 91.1
48
+ 2020-11-06 00:24:02,152 Image to text: 78.5 96.0 98.7 1.0 1.7
49
+ 2020-11-06 00:24:02,152 Average t2i Recall: 82.5
50
+ 2020-11-06 00:24:02,152 Text to image: 61.7 90.3 95.6 1.0 4.0
51
+
52
+
53
+
54
+ 2020-11-06 00:24:05,288 Use adam as the optimizer, with init lr 0.0005
55
+ 2020-11-06 00:24:05,289 Loading dataset
56
+ 2020-11-06 00:26:08,926 Computing results...
57
+ 2020-11-06 00:26:10,093 Test: [0/196] Le 60.7781 (60.7781) Time 1.166 (0.000)
58
+ 2020-11-06 00:26:10,566 Test: [10/196] Le 61.8171 (61.7981) Time 0.044 (0.000)
59
+ 2020-11-06 00:26:11,029 Test: [20/196] Le 61.3542 (61.4344) Time 0.046 (0.000)
60
+ 2020-11-06 00:26:11,498 Test: [30/196] Le 62.5104 (61.6404) Time 0.043 (0.000)
61
+ 2020-11-06 00:26:11,986 Test: [40/196] Le 62.1959 (61.4912) Time 0.048 (0.000)
62
+ 2020-11-06 00:26:12,456 Test: [50/196] Le 60.8244 (61.4676) Time 0.041 (0.000)
63
+ 2020-11-06 00:26:12,926 Test: [60/196] Le 61.1740 (61.5172) Time 0.055 (0.000)
64
+ 2020-11-06 00:26:13,382 Test: [70/196] Le 62.1174 (61.4565) Time 0.052 (0.000)
65
+ 2020-11-06 00:26:13,892 Test: [80/196] Le 61.9037 (61.4808) Time 0.053 (0.000)
66
+ 2020-11-06 00:26:14,379 Test: [90/196] Le 61.3814 (61.4537) Time 0.046 (0.000)
67
+ 2020-11-06 00:26:14,878 Test: [100/196] Le 62.1759 (61.4920) Time 0.062 (0.000)
68
+ 2020-11-06 00:26:15,332 Test: [110/196] Le 60.1641 (61.4669) Time 0.044 (0.000)
69
+ 2020-11-06 00:26:15,804 Test: [120/196] Le 65.6565 (61.4961) Time 0.053 (0.000)
70
+ 2020-11-06 00:26:16,282 Test: [130/196] Le 61.1231 (61.5035) Time 0.056 (0.000)
71
+ 2020-11-06 00:26:16,748 Test: [140/196] Le 64.9178 (61.5482) Time 0.051 (0.000)
72
+ 2020-11-06 00:26:17,250 Test: [150/196] Le 60.8170 (61.5275) Time 0.060 (0.000)
73
+ 2020-11-06 00:26:17,729 Test: [160/196] Le 61.9198 (61.5027) Time 0.056 (0.000)
74
+ 2020-11-06 00:26:18,199 Test: [170/196] Le 62.3629 (61.4701) Time 0.045 (0.000)
75
+ 2020-11-06 00:26:18,665 Test: [180/196] Le 59.6263 (61.4354) Time 0.047 (0.000)
76
+ 2020-11-06 00:26:19,073 Test: [190/196] Le 60.3929 (61.4824) Time 0.032 (0.000)
77
+ 2020-11-06 00:26:19,566 Images: 5000, Captions: 25000
78
+ 2020-11-06 00:27:13,640 rsum: 422.1
79
+ 2020-11-06 00:27:13,640 Average i2t Recall: 77.2
80
+ 2020-11-06 00:27:13,640 Image to text: 56.6 83.6 91.4 1.0 4.5
81
+ 2020-11-06 00:27:13,640 Average t2i Recall: 63.5
82
+ 2020-11-06 00:27:13,640 Text to image: 39.3 69.9 81.1 2.0 15.7
coco_butd_region_bigru/test_log_ensemble.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-11-06 15:18:38,637 Image to text: 82.5, 97.5, 99.3, 1.0, 1.5
2
+ 2020-11-06 15:18:38,920 Text to image: 65.6, 92.0, 96.4, 1.0, 3.6
3
+ 2020-11-06 15:18:38,921 rsum: 533.3 ar: 93.1 ari: 84.7
4
+ 2020-11-06 15:18:39,257 Image to text: 79.5, 95.9, 98.8, 1.0, 1.7
5
+ 2020-11-06 15:18:39,542 Text to image: 64.3, 90.8, 95.7, 1.0, 3.9
6
+ 2020-11-06 15:18:39,542 rsum: 525.1 ar: 91.4 ari: 83.6
7
+ 2020-11-06 15:18:39,879 Image to text: 81.2, 97.7, 99.5, 1.0, 1.5
8
+ 2020-11-06 15:18:40,161 Text to image: 65.4, 91.6, 96.6, 1.0, 3.5
9
+ 2020-11-06 15:18:40,161 rsum: 532.0 ar: 92.8 ari: 84.5
10
+ 2020-11-06 15:18:40,500 Image to text: 78.5, 96.5, 98.6, 1.0, 1.7
11
+ 2020-11-06 15:18:40,785 Text to image: 63.3, 91.4, 96.5, 1.0, 3.1
12
+ 2020-11-06 15:18:40,785 rsum: 524.8 ar: 91.2 ari: 83.7
13
+ 2020-11-06 15:18:41,122 Image to text: 78.4, 97.2, 99.0, 1.0, 1.6
14
+ 2020-11-06 15:18:41,406 Text to image: 65.1, 92.2, 97.0, 1.0, 3.2
15
+ 2020-11-06 15:18:41,406 rsum: 529.0 ar: 91.5 ari: 84.8
16
+ 2020-11-06 15:18:41,406 -----------------------------------
17
+ 2020-11-06 15:18:41,406 Mean metrics:
18
+ 2020-11-06 15:18:41,406 rsum: 528.8
19
+ 2020-11-06 15:18:41,406 Average i2t Recall: 92.0
20
+ 2020-11-06 15:18:41,406 Image to text: 80.0 97.0 99.0 1.0 1.6
21
+ 2020-11-06 15:18:41,406 Average t2i Recall: 84.3
22
+ 2020-11-06 15:18:41,406 Text to image: 64.8 91.6 96.5 1.0 3.4
23
+ 2020-11-06 15:19:20,010 rsum: 437.5
24
+ 2020-11-06 15:19:20,011 Average i2t Recall: 79.6
25
+ 2020-11-06 15:19:20,011 Image to text: 59.8 86.1 92.8 1.0 3.9
26
+ 2020-11-06 15:19:20,011 Average t2i Recall: 66.3
27
+ 2020-11-06 15:19:20,011 Text to image: 42.7 72.8 83.3 2.0 13.0
coco_wsl_grid_bert/.DS_Store ADDED
Binary file (6.15 kB). View file
 
coco_wsl_grid_bert/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd28ba22b741a90e0871b9d0660197fecab7789dcc5078b6a9c94a936ef80715
3
+ size 805843314
coco_wsl_grid_bert/test_log.txt ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-10-02 01:27:56,847 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2020-10-02 01:27:58,327 Did not load checkpoints
3
+ 2020-10-02 01:27:58,328 Resnet backbone now has fixed blocks 2
4
+ 2020-10-02 01:27:59,708 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
5
+ 2020-10-02 01:27:59,708 Model config {
6
+ "architectures": [
7
+ "BertForMaskedLM"
8
+ ],
9
+ "attention_probs_dropout_prob": 0.1,
10
+ "finetuning_task": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "num_labels": 2,
22
+ "output_attentions": false,
23
+ "output_hidden_states": false,
24
+ "pad_token_id": 0,
25
+ "pruned_heads": {},
26
+ "torchscript": false,
27
+ "type_vocab_size": 2,
28
+ "use_bfloat16": false,
29
+ "vocab_size": 30522
30
+ }
31
+
32
+ 2020-10-02 01:28:01,097 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
33
+ 2020-10-02 01:28:03,256 Use adam as the optimizer, with init lr 0.0005
34
+ 2020-10-02 01:28:03,257 Image encoder is data paralleled now.
35
+ 2020-10-02 01:28:03,400 Load full model with backbone
36
+ 2020-10-02 01:28:03,402 Loading dataset
37
+ 2020-10-02 01:28:07,517 Input mode small: scaled by factor 2.0
38
+ 2020-10-02 01:28:14,586 Computing results...
39
+ 2020-10-02 01:29:11,923 Test: [0/196] Le 63.6659 (63.6659) Time 57.334 (0.000)
40
+ 2020-10-02 01:29:19,880 Test: [10/196] Le 60.8184 (62.7525) Time 0.850 (0.000)
41
+ 2020-10-02 01:29:28,004 Test: [20/196] Le 64.4445 (62.7450) Time 0.781 (0.000)
42
+ 2020-10-02 01:29:35,944 Test: [30/196] Le 64.8506 (62.9363) Time 0.825 (0.000)
43
+ 2020-10-02 01:29:43,981 Test: [40/196] Le 63.3589 (63.1086) Time 0.791 (0.000)
44
+ 2020-10-02 01:29:51,976 Test: [50/196] Le 64.0212 (63.1067) Time 0.816 (0.000)
45
+ 2020-10-02 01:29:59,972 Test: [60/196] Le 61.2870 (62.9668) Time 0.794 (0.000)
46
+ 2020-10-02 01:30:07,943 Test: [70/196] Le 59.9160 (62.8829) Time 0.826 (0.000)
47
+ 2020-10-02 01:30:15,991 Test: [80/196] Le 63.5098 (63.0116) Time 0.785 (0.000)
48
+ 2020-10-02 01:30:23,929 Test: [90/196] Le 65.1759 (63.0592) Time 0.811 (0.000)
49
+ 2020-10-02 01:30:32,017 Test: [100/196] Le 62.5716 (63.0479) Time 0.792 (0.000)
50
+ 2020-10-02 01:30:40,103 Test: [110/196] Le 60.8049 (63.0152) Time 0.814 (0.000)
51
+ 2020-10-02 01:30:48,234 Test: [120/196] Le 64.6984 (63.0478) Time 0.788 (0.000)
52
+ 2020-10-02 01:30:56,220 Test: [130/196] Le 66.0548 (63.0548) Time 0.826 (0.000)
53
+ 2020-10-02 01:31:04,246 Test: [140/196] Le 68.9694 (63.0795) Time 0.818 (0.000)
54
+ 2020-10-02 01:31:12,168 Test: [150/196] Le 63.2262 (63.0947) Time 0.801 (0.000)
55
+ 2020-10-02 01:31:20,223 Test: [160/196] Le 61.3962 (63.0856) Time 0.787 (0.000)
56
+ 2020-10-02 01:31:28,184 Test: [170/196] Le 62.4964 (63.0277) Time 0.840 (0.000)
57
+ 2020-10-02 01:31:36,369 Test: [180/196] Le 62.8228 (63.0423) Time 0.840 (0.000)
58
+ 2020-10-02 01:31:44,250 Test: [190/196] Le 62.2450 (62.9804) Time 0.785 (0.000)
59
+ 2020-10-02 01:31:53,422 Images: 5000, Captions: 25000
60
+ 2020-10-02 01:32:23,263 Align loss: 0.8650665053327701
61
+ 2020-10-02 01:32:23,263 Image uniform loss: -3.7929412346140463
62
+ 2020-10-02 01:32:23,263 Text uniform loss: -3.8632661255327365
63
+ 2020-10-02 01:32:23,347 calculate similarity time:
64
+ 2020-10-02 01:32:23,685 Image to text: 86.9, 98.9, 99.6, 1.0, 1.3
65
+ 2020-10-02 01:32:23,972 Text to image: 74.0, 94.7, 97.5, 1.0, 3.0
66
+ 2020-10-02 01:32:23,972 rsum: 551.6 ar: 95.1 ari: 88.7
67
+ 2020-10-02 01:32:24,041 calculate similarity time:
68
+ 2020-10-02 01:32:24,378 Image to text: 83.3, 97.7, 99.0, 1.0, 1.5
69
+ 2020-10-02 01:32:24,666 Text to image: 71.5, 93.4, 97.4, 1.0, 3.3
70
+ 2020-10-02 01:32:24,667 rsum: 542.3 ar: 93.3 ari: 87.4
71
+ 2020-10-02 01:32:24,732 calculate similarity time:
72
+ 2020-10-02 01:32:25,070 Image to text: 85.0, 98.1, 99.7, 1.0, 1.4
73
+ 2020-10-02 01:32:25,358 Text to image: 72.2, 93.7, 97.2, 1.0, 3.6
74
+ 2020-10-02 01:32:25,358 rsum: 545.9 ar: 94.3 ari: 87.7
75
+ 2020-10-02 01:32:25,424 calculate similarity time:
76
+ 2020-10-02 01:32:25,761 Image to text: 83.3, 97.4, 99.2, 1.0, 1.4
77
+ 2020-10-02 01:32:26,049 Text to image: 69.7, 93.5, 97.5, 1.0, 2.7
78
+ 2020-10-02 01:32:26,049 rsum: 540.7 ar: 93.3 ari: 86.9
79
+ 2020-10-02 01:32:26,117 calculate similarity time:
80
+ 2020-10-02 01:32:26,454 Image to text: 84.0, 98.3, 99.6, 1.0, 1.4
81
+ 2020-10-02 01:32:26,741 Text to image: 72.8, 94.2, 97.7, 1.0, 3.1
82
+ 2020-10-02 01:32:26,741 rsum: 546.6 ar: 94.0 ari: 88.2
83
+ 2020-10-02 01:32:26,741 -----------------------------------
84
+ 2020-10-02 01:32:26,741 Mean metrics:
85
+ 2020-10-02 01:32:26,741 rsum: 545.4
86
+ 2020-10-02 01:32:26,741 Average i2t Recall: 94.0
87
+ 2020-10-02 01:32:26,741 Image to text: 84.5 98.1 99.4 1.0 1.4
88
+ 2020-10-02 01:32:26,741 Average t2i Recall: 87.8
89
+ 2020-10-02 01:32:26,741 Text to image: 72.0 93.9 97.5 1.0 3.1
90
+ 2020-10-02 01:32:28,728 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
91
+ 2020-10-02 01:32:30,167 Did not load checkpoints
92
+ 2020-10-02 01:32:30,169 Resnet backbone now has fixed blocks 2
93
+ 2020-10-02 01:32:31,618 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
94
+ 2020-10-02 01:32:31,619 Model config {
95
+ "architectures": [
96
+ "BertForMaskedLM"
97
+ ],
98
+ "attention_probs_dropout_prob": 0.1,
99
+ "finetuning_task": null,
100
+ "hidden_act": "gelu",
101
+ "hidden_dropout_prob": 0.1,
102
+ "hidden_size": 768,
103
+ "initializer_range": 0.02,
104
+ "intermediate_size": 3072,
105
+ "layer_norm_eps": 1e-12,
106
+ "max_position_embeddings": 512,
107
+ "model_type": "bert",
108
+ "num_attention_heads": 12,
109
+ "num_hidden_layers": 12,
110
+ "num_labels": 2,
111
+ "output_attentions": false,
112
+ "output_hidden_states": false,
113
+ "pad_token_id": 0,
114
+ "pruned_heads": {},
115
+ "torchscript": false,
116
+ "type_vocab_size": 2,
117
+ "use_bfloat16": false,
118
+ "vocab_size": 30522
119
+ }
120
+
121
+ 2020-10-02 01:32:32,949 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
122
+ 2020-10-02 01:32:35,071 Use adam as the optimizer, with init lr 0.0005
123
+ 2020-10-02 01:32:35,072 Image encoder is data paralleled now.
124
+ 2020-10-02 01:32:35,210 Load full model with backbone
125
+ 2020-10-02 01:32:35,213 Loading dataset
126
+ 2020-10-02 01:32:38,512 Input mode small: scaled by factor 2.0
127
+ 2020-10-02 01:32:47,138 Computing results...
128
+ 2020-10-02 01:33:05,339 Test: [0/196] Le 63.6659 (63.6659) Time 18.197 (0.000)
129
+ 2020-10-02 01:33:14,825 Test: [10/196] Le 60.8184 (62.7525) Time 0.847 (0.000)
130
+ 2020-10-02 01:33:22,994 Test: [20/196] Le 64.4445 (62.7450) Time 0.791 (0.000)
131
+ 2020-10-02 01:33:30,959 Test: [30/196] Le 64.8506 (62.9363) Time 0.820 (0.000)
132
+ 2020-10-02 01:33:39,122 Test: [40/196] Le 63.3589 (63.1086) Time 0.793 (0.000)
133
+ 2020-10-02 01:33:47,162 Test: [50/196] Le 64.0212 (63.1067) Time 0.833 (0.000)
134
+ 2020-10-02 01:33:55,256 Test: [60/196] Le 61.2870 (62.9668) Time 0.788 (0.000)
135
+ 2020-10-02 01:34:03,214 Test: [70/196] Le 59.9160 (62.8829) Time 0.813 (0.000)
136
+ 2020-10-02 01:34:11,474 Test: [80/196] Le 63.5098 (63.0116) Time 0.813 (0.000)
137
+ 2020-10-02 01:34:19,532 Test: [90/196] Le 65.1759 (63.0592) Time 0.820 (0.000)
138
+ 2020-10-02 01:34:27,738 Test: [100/196] Le 62.5716 (63.0479) Time 0.813 (0.000)
139
+ 2020-10-02 01:34:35,754 Test: [110/196] Le 60.8049 (63.0152) Time 0.831 (0.000)
140
+ 2020-10-02 01:34:43,977 Test: [120/196] Le 64.6984 (63.0478) Time 0.794 (0.000)
141
+ 2020-10-02 01:34:52,081 Test: [130/196] Le 66.0548 (63.0548) Time 0.834 (0.000)
142
+ 2020-10-02 01:35:00,538 Test: [140/196] Le 68.9694 (63.0795) Time 0.878 (0.000)
143
+ 2020-10-02 01:35:08,849 Test: [150/196] Le 63.2262 (63.0947) Time 0.886 (0.000)
144
+ 2020-10-02 01:35:17,131 Test: [160/196] Le 61.3962 (63.0856) Time 0.788 (0.000)
145
+ 2020-10-02 01:35:25,125 Test: [170/196] Le 62.4964 (63.0277) Time 0.822 (0.000)
146
+ 2020-10-02 01:35:33,289 Test: [180/196] Le 62.8228 (63.0423) Time 0.811 (0.000)
147
+ 2020-10-02 01:35:41,210 Test: [190/196] Le 62.2450 (62.9804) Time 0.790 (0.000)
148
+ 2020-10-02 01:35:47,189 Images: 5000, Captions: 25000
149
+ 2020-10-02 01:36:11,797 Align loss: 0.8650665053325651
150
+ 2020-10-02 01:36:11,797 Image uniform loss: -3.7929412346064644
151
+ 2020-10-02 01:36:11,797 Text uniform loss: -3.8632661255327365
152
+ 2020-10-02 01:36:21,609 Save the similarity into runs/coco_vsepp_wsl_bert_var_gpool_2/results_testall_5k.npy
153
+ 2020-10-02 01:36:21,609 calculate similarity time:
154
+ 2020-10-02 01:36:40,196 rsum: 468.9
155
+ 2020-10-02 01:36:40,196 Average i2t Recall: 83.5
156
+ 2020-10-02 01:36:40,196 Image to text: 66.4 89.3 94.6 1.0 3.0
157
+ 2020-10-02 01:36:40,197 Average t2i Recall: 72.9
158
+ 2020-10-02 01:36:40,197 Text to image: 51.6 79.3 87.6 1.0 11.5
coco_wsl_grid_bert/test_log_cxc.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2021-03-24 02:09:23,091 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2021-03-24 02:09:24,637 Did not load external(non-ImageNet) checkpoints
3
+ 2021-03-24 02:09:24,638 Resnet backbone now has fixed blocks 2
4
+ 2021-03-24 02:09:26,206 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
5
+ 2021-03-24 02:09:26,206 Model config {
6
+ "architectures": [
7
+ "BertForMaskedLM"
8
+ ],
9
+ "attention_probs_dropout_prob": 0.1,
10
+ "finetuning_task": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "num_labels": 2,
22
+ "output_attentions": false,
23
+ "output_hidden_states": false,
24
+ "pad_token_id": 0,
25
+ "pruned_heads": {},
26
+ "torchscript": false,
27
+ "type_vocab_size": 2,
28
+ "use_bfloat16": false,
29
+ "vocab_size": 30522
30
+ }
31
+
32
+ 2021-03-24 02:09:27,670 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
33
+ 2021-03-24 02:09:30,120 Use adam as the optimizer, with init lr 0.0005
34
+ 2021-03-24 02:09:30,121 Image encoder is data paralleled now.
35
+ 2021-03-24 02:09:30,233 Loading dataset
36
+ 2021-03-24 02:09:30,271 Input images are scaled by factor 2.0
37
+ 2021-03-24 02:09:30,272 Computing results...
38
+ 2021-03-24 02:09:51,749 Test: [0/196] Le 63.6659 (63.6659) Time 21.475 (0.000)
39
+ 2021-03-24 02:09:59,075 Test: [10/196] Le 64.0212 (62.9529) Time 0.769 (0.000)
40
+ 2021-03-24 02:10:06,372 Test: [20/196] Le 62.5716 (62.7840) Time 0.707 (0.000)
41
+ 2021-03-24 02:10:13,746 Test: [30/196] Le 63.2262 (63.1042) Time 0.732 (0.000)
42
+ 2021-03-24 02:10:21,147 Test: [40/196] Le 64.0326 (62.8167) Time 0.704 (0.000)
43
+ 2021-03-24 02:10:28,558 Test: [50/196] Le 63.5130 (62.8027) Time 0.717 (0.000)
44
+ 2021-03-24 02:10:35,860 Test: [60/196] Le 62.8874 (62.9139) Time 0.715 (0.000)
45
+ 2021-03-24 02:10:43,206 Test: [70/196] Le 63.8606 (62.8671) Time 0.745 (0.000)
46
+ 2021-03-24 02:10:50,685 Test: [80/196] Le 64.2199 (62.9350) Time 0.735 (0.000)
47
+ 2021-03-24 02:10:58,113 Test: [90/196] Le 63.8676 (63.0111) Time 0.772 (0.000)
48
+ 2021-03-24 02:11:05,484 Test: [100/196] Le 64.4704 (63.0609) Time 0.785 (0.000)
49
+ 2021-03-24 02:11:12,759 Test: [110/196] Le 60.2271 (63.0471) Time 0.764 (0.000)
50
+ 2021-03-24 02:11:20,155 Test: [120/196] Le 66.3189 (63.0213) Time 0.725 (0.000)
51
+ 2021-03-24 02:11:27,501 Test: [130/196] Le 61.8337 (63.0454) Time 0.740 (0.000)
52
+ 2021-03-24 02:11:34,893 Test: [140/196] Le 64.4948 (63.0690) Time 0.711 (0.000)
53
+ 2021-03-24 02:11:42,357 Test: [150/196] Le 59.8112 (63.0335) Time 0.715 (0.000)
54
+ 2021-03-24 02:11:49,662 Test: [160/196] Le 64.5806 (63.0258) Time 0.711 (0.000)
55
+ 2021-03-24 02:11:57,051 Test: [170/196] Le 63.7446 (62.9643) Time 0.729 (0.000)
56
+ 2021-03-24 02:12:04,428 Test: [180/196] Le 62.9918 (62.9341) Time 0.792 (0.000)
57
+ 2021-03-24 02:12:11,782 Test: [190/196] Le 61.2165 (63.0057) Time 0.711 (0.000)
58
+ 2021-03-24 02:12:17,785 Images: 5000, Captions: 25000
59
+ 2021-03-24 02:12:54,113 T2I R@1: 53.64, R@5: 81.08, R@10: 88.948
60
+ 2021-03-24 02:12:54,114 I2T R@1: 67.9, R@5: 90.64, R@10: 95.46
61
+ 2021-03-24 02:14:02,666 I2I R@1: 51.283656856606136, R@5: 83.17678981423502, R@10: 90.5030265080359
62
+ 2021-03-24 02:14:02,666 T2T R@1: 46.656, R@5: 69.156, R@10: 78.192
coco_wsl_grid_bert/test_log_ensemble.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-10-02 01:38:58,481 Image to text: 88.3, 98.9, 99.7, 1.0, 1.3
2
+ 2020-10-02 01:38:58,764 Text to image: 74.7, 95.1, 97.6, 1.0, 2.8
3
+ 2020-10-02 01:38:58,764 rsum: 554.3 ar: 95.6 ari: 89.1
4
+ 2020-10-02 01:38:59,100 Image to text: 84.7, 97.0, 99.4, 1.0, 1.5
5
+ 2020-10-02 01:38:59,383 Text to image: 72.9, 93.9, 97.5, 1.0, 3.2
6
+ 2020-10-02 01:38:59,383 rsum: 545.4 ar: 93.7 ari: 88.1
7
+ 2020-10-02 01:38:59,719 Image to text: 85.4, 97.8, 99.7, 1.0, 1.4
8
+ 2020-10-02 01:39:00,002 Text to image: 73.1, 94.2, 97.7, 1.0, 3.4
9
+ 2020-10-02 01:39:00,002 rsum: 547.9 ar: 94.3 ari: 88.3
10
+ 2020-10-02 01:39:00,338 Image to text: 84.1, 97.6, 99.1, 1.0, 1.5
11
+ 2020-10-02 01:39:00,621 Text to image: 70.4, 93.9, 97.6, 1.0, 2.7
12
+ 2020-10-02 01:39:00,621 rsum: 542.7 ar: 93.6 ari: 87.3
13
+ 2020-10-02 01:39:00,957 Image to text: 85.4, 98.5, 99.2, 1.0, 1.4
14
+ 2020-10-02 01:39:01,240 Text to image: 74.3, 94.5, 97.9, 1.0, 3.0
15
+ 2020-10-02 01:39:01,240 rsum: 549.9 ar: 94.4 ari: 88.9
16
+ 2020-10-02 01:39:01,241 -----------------------------------
17
+ 2020-10-02 01:39:01,241 Mean metrics:
18
+ 2020-10-02 01:39:01,241 rsum: 548.0
19
+ 2020-10-02 01:39:01,241 Average i2t Recall: 94.3
20
+ 2020-10-02 01:39:01,241 Image to text: 85.6 98.0 99.4 1.0 1.4
21
+ 2020-10-02 01:39:01,241 Average t2i Recall: 88.4
22
+ 2020-10-02 01:39:01,241 Text to image: 73.1 94.3 97.7 1.0 3.0
23
+ 2020-10-02 01:39:36,343 rsum: 474.8
24
+ 2020-10-02 01:39:36,343 Average i2t Recall: 84.5
25
+ 2020-10-02 01:39:36,343 Image to text: 68.1 90.2 95.2 1.0 2.9
26
+ 2020-10-02 01:39:36,343 Average t2i Recall: 73.7
27
+ 2020-10-02 01:39:36,343 Text to image: 52.7 80.2 88.3 1.0 10.9
f30k_butd_grid_bert/.DS_Store ADDED
Binary file (6.15 kB). View file
 
f30k_butd_grid_bert/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3ff754921c959a751adf641457ecbafb4b38286517eab16aa098eb13640a1d
3
+ size 628484264
f30k_butd_grid_bert/test_log.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-28 13:11:05,720 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2020-09-28 13:11:06,354 Loading pretrained backbone weights from hdfs:///home/byte_arnold_hl_vc/user/chenjiacheng/data/coco/original_updown/original_updown_backbone.pth for backbone source vsepp_detector
3
+ 2020-09-28 13:11:17,472 Resnet backbone now has fixed blocks 2
4
+ 2020-09-28 13:11:18,889 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
5
+ 2020-09-28 13:11:18,889 Model config {
6
+ "architectures": [
7
+ "BertForMaskedLM"
8
+ ],
9
+ "attention_probs_dropout_prob": 0.1,
10
+ "finetuning_task": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "num_labels": 2,
22
+ "output_attentions": false,
23
+ "output_hidden_states": false,
24
+ "pad_token_id": 0,
25
+ "pruned_heads": {},
26
+ "torchscript": false,
27
+ "type_vocab_size": 2,
28
+ "use_bfloat16": false,
29
+ "vocab_size": 30522
30
+ }
31
+
32
+ 2020-09-28 13:11:20,205 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
33
+ 2020-09-28 13:11:22,425 Use adam as the optimizer, with init lr 0.0005
34
+ 2020-09-28 13:11:22,426 Image encoder is data paralleled now.
35
+ 2020-09-28 13:11:22,566 Load full model with backbone
36
+ 2020-09-28 13:11:22,568 Loading dataset
37
+ 2020-09-28 13:11:27,156 Input mode small: scaled by factor 2.0
38
+ 2020-09-28 13:11:43,512 Computing results...
39
+ 2020-09-28 13:12:40,848 Test: [0/40] Le 64.1531 (64.1531) Time 57.332 (0.000)
40
+ 2020-09-28 13:12:46,101 Test: [10/40] Le 63.1351 (61.8695) Time 0.564 (0.000)
41
+ 2020-09-28 13:12:51,829 Test: [20/40] Le 60.7486 (61.3324) Time 0.591 (0.000)
42
+ 2020-09-28 13:12:57,364 Test: [30/40] Le 60.9106 (61.2955) Time 0.576 (0.000)
43
+ 2020-09-28 13:13:03,463 Images: 1000, Captions: 5000
44
+ 2020-09-28 13:13:05,111 rsum: 522.3
45
+ 2020-09-28 13:13:05,111 Average i2t Recall: 92.4
46
+ 2020-09-28 13:13:05,111 Image to text: 81.5 97.1 98.5 1.0 2.0
47
+ 2020-09-28 13:13:05,111 Average t2i Recall: 81.7
48
+ 2020-09-28 13:13:05,111 Text to image: 63.7 88.3 93.2 1.0 5.6
f30k_butd_grid_bigru/.DS_Store ADDED
Binary file (6.15 kB). View file
 
f30k_butd_grid_bigru/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e2ba7e91a15bf21f97fb10e3f29ea31e544cf1fc7fc08516dc68d1e92106e54
3
+ size 230120419
f30k_butd_grid_bigru/test_log.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-15 14:08:25,091 Loading pretrained backbone weights from hdfs:///home/byte_arnold_hl_vc/user/chenjiacheng/data/coco/original_updown/original_updown_backbone.pth for backbone source vsepp_detector
2
+ 2020-09-15 14:08:32,036 Resnet backbone now has fixed blocks 2
3
+ 2020-09-15 14:08:32,288 Use adam as the optimizer, with init lr 0.0005
4
+ 2020-09-15 14:08:32,288 Image encoder is data paralleled now.
5
+ 2020-09-15 14:08:32,453 Load full model with backbone
6
+ 2020-09-15 14:08:32,455 Loading dataset
7
+ 2020-09-15 14:08:37,097 Input mode small: scaled by factor 2.0
8
+ 2020-09-15 14:08:42,274 Computing results...
9
+ 2020-09-15 14:09:22,303 Test: [0/40] Le 62.6873 (62.6873) Time 40.026 (0.000)
10
+ 2020-09-15 14:09:26,212 Test: [10/40] Le 62.0393 (61.4000) Time 0.477 (0.000)
11
+ 2020-09-15 14:09:34,072 Test: [20/40] Le 59.5247 (61.0691) Time 0.470 (0.000)
12
+ 2020-09-15 14:09:40,179 Test: [30/40] Le 61.6374 (61.0768) Time 0.413 (0.000)
13
+ 2020-09-15 14:09:46,671 Images: 1000, Captions: 5000
14
+ 2020-09-15 14:11:00,567 rsum: 500.2
15
+ 2020-09-15 14:11:00,567 Average i2t Recall: 89.7
16
+ 2020-09-15 14:11:00,567 Image to text: 77.9 93.7 97.4 1.0 2.6
17
+ 2020-09-15 14:11:00,567 Average t2i Recall: 77.1
18
+ 2020-09-15 14:11:00,567 Text to image: 57.5 83.4 90.2 1.0 7.2
f30k_butd_region_bert/.DS_Store ADDED
Binary file (6.15 kB). View file
 
f30k_butd_region_bert/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e803e4ff3b5284c6320f2a0cf4659de7f38efe83c65ebec73f09504682597c8
3
+ size 455953045
f30k_butd_region_bert/test_log.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-28 15:56:24,779 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2020-09-28 15:56:26,324 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
3
+ 2020-09-28 15:56:26,325 Model config {
4
+ "architectures": [
5
+ "BertForMaskedLM"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "finetuning_task": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "num_labels": 2,
20
+ "output_attentions": false,
21
+ "output_hidden_states": false,
22
+ "pad_token_id": 0,
23
+ "pruned_heads": {},
24
+ "torchscript": false,
25
+ "type_vocab_size": 2,
26
+ "use_bfloat16": false,
27
+ "vocab_size": 30522
28
+ }
29
+
30
+ 2020-09-28 15:56:27,810 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
31
+ 2020-09-28 15:56:29,985 Use adam as the optimizer, with init lr 0.0005
32
+ 2020-09-28 15:56:29,986 Image encoder is data paralleled now.
33
+ 2020-09-28 15:56:29,998 Loading dataset
34
+ 2020-09-28 15:57:03,073 Computing results...
35
+ 2020-09-28 15:57:42,470 Test: [0/40] Le 61.6969 (61.6969) Time 39.395 (0.000)
36
+ 2020-09-28 15:57:44,960 Test: [10/40] Le 60.8350 (61.2574) Time 0.255 (0.000)
37
+ 2020-09-28 15:57:47,453 Test: [20/40] Le 59.7448 (61.2992) Time 0.249 (0.000)
38
+ 2020-09-28 15:57:49,962 Test: [30/40] Le 62.9457 (61.5590) Time 0.267 (0.000)
39
+ 2020-09-28 15:57:52,391 Images: 1000, Captions: 5000
40
+ 2020-09-28 15:57:53,718 rsum: 513.5
41
+ 2020-09-28 15:57:53,718 Average i2t Recall: 91.6
42
+ 2020-09-28 15:57:53,718 Image to text: 81.7 95.4 97.6 1.0 2.1
43
+ 2020-09-28 15:57:53,718 Average t2i Recall: 79.6
44
+ 2020-09-28 15:57:53,718 Text to image: 61.4 85.9 91.5 1.0 6.3
f30k_butd_region_bert/test_log_ensemble.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ 2020-09-28 15:50:55,326 rsum: 532.0
2
+ 2020-09-28 15:50:55,326 Average i2t Recall: 93.8
3
+ 2020-09-28 15:50:55,326 Image to text: 85.3 97.2 98.9 1.0 1.8
4
+ 2020-09-28 15:50:55,326 Average t2i Recall: 83.5
5
+ 2020-09-28 15:50:55,326 Text to image: 66.7 89.9 94.0 1.0 4.7
f30k_butd_region_bigru/.DS_Store ADDED
Binary file (6.15 kB). View file
 
f30k_butd_region_bigru/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:510ae1af6cd16c23f72b57cee80699e3a5ae106370ce103f5147f4f1cb14760b
3
+ size 57590019
f30k_butd_region_bigru/test_log.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-20 17:07:11,081 Use adam as the optimizer, with init lr 0.0005
2
+ 2020-09-20 17:07:11,083 Loading dataset
3
+ 2020-09-20 17:07:39,605 Computing results...
4
+ 2020-09-20 17:07:40,570 Test: [0/40] Le 61.9494 (61.9494) Time 0.965 (0.000)
5
+ 2020-09-20 17:07:40,897 Test: [10/40] Le 60.5408 (60.4044) Time 0.053 (0.000)
6
+ 2020-09-20 17:07:41,211 Test: [20/40] Le 59.8890 (60.1685) Time 0.032 (0.000)
7
+ 2020-09-20 17:07:41,531 Test: [30/40] Le 61.4653 (60.3205) Time 0.037 (0.000)
8
+ 2020-09-20 17:07:41,891 Images: 1000, Captions: 5000
9
+ 2020-09-20 17:07:42,477 calculate similarity time:
10
+ 2020-09-20 17:07:43,145 rsum: 498.0
11
+ 2020-09-20 17:07:43,145 Average i2t Recall: 89.5
12
+ 2020-09-20 17:07:43,145 Image to text: 76.5 94.2 97.7 1.0 2.6
13
+ 2020-09-20 17:07:43,145 Average t2i Recall: 76.5
14
+ 2020-09-20 17:07:43,145 Text to image: 56.4 83.4 89.9 1.0 8.0
f30k_wsl_grid_bert/.DS_Store ADDED
Binary file (6.15 kB). View file
 
f30k_wsl_grid_bert/model_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7ecdb875501378e1f0623a476579ec6746a3649a6351288aebd4296244c56b
3
+ size 805843326
f30k_wsl_grid_bert/test_log.txt ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2020-09-30 01:28:04,393 loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/tiger/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
2
+ 2020-09-30 01:28:05,900 Did not load checkpoints
3
+ 2020-09-30 01:28:05,902 Resnet backbone now has fixed blocks 2
4
+ 2020-09-30 01:28:07,321 loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/tiger/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
5
+ 2020-09-30 01:28:07,322 Model config {
6
+ "architectures": [
7
+ "BertForMaskedLM"
8
+ ],
9
+ "attention_probs_dropout_prob": 0.1,
10
+ "finetuning_task": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "num_labels": 2,
22
+ "output_attentions": false,
23
+ "output_hidden_states": false,
24
+ "pad_token_id": 0,
25
+ "pruned_heads": {},
26
+ "torchscript": false,
27
+ "type_vocab_size": 2,
28
+ "use_bfloat16": false,
29
+ "vocab_size": 30522
30
+ }
31
+
32
+ 2020-09-30 01:28:08,758 loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/tiger/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157
33
+ 2020-09-30 01:28:10,844 Use adam as the optimizer, with init lr 0.0005
34
+ 2020-09-30 01:28:10,845 Image encoder is data paralleled now.
35
+ 2020-09-30 01:28:10,981 Load full model with backbone
36
+ 2020-09-30 01:28:10,983 Loading dataset
37
+ 2020-09-30 01:28:15,016 Input mode small: scaled by factor 2.0
38
+ 2020-09-30 01:28:24,713 Computing results...
39
+ 2020-09-30 01:29:21,486 Test: [0/40] Le 61.7868 (61.7867) Time 56.770 (0.000)
40
+ 2020-09-30 01:29:29,720 Test: [10/40] Le 62.1460 (61.6953) Time 0.866 (0.000)
41
+ 2020-09-30 01:29:38,035 Test: [20/40] Le 60.0629 (61.3002) Time 0.793 (0.000)
42
+ 2020-09-30 01:29:46,124 Test: [30/40] Le 61.7439 (61.4198) Time 0.788 (0.000)
43
+ 2020-09-30 01:29:56,538 Images: 1000, Captions: 5000
44
+ 2020-09-30 01:29:57,433 Save the similarity into runs/f30k_vsepp_wsl_bert_var_gpool/results_testall_5k.npy
45
+ 2020-09-30 01:29:57,433 calculate similarity time:
46
+ 2020-09-30 01:29:58,057 rsum: 550.9
47
+ 2020-09-30 01:29:58,057 Average i2t Recall: 95.4
48
+ 2020-09-30 01:29:58,057 Image to text: 88.4 98.3 99.5 1.0 1.3
49
+ 2020-09-30 01:29:58,057 Average t2i Recall: 88.2
50
+ 2020-09-30 01:29:58,057 Text to image: 74.2 93.7 96.8 1.0 3.0
f30k_wsl_grid_bert/test_log_ensemble.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ 2020-09-30 01:31:09,925 rsum: 555.1
2
+ 2020-09-30 01:31:09,925 Average i2t Recall: 95.8
3
+ 2020-09-30 01:31:09,925 Image to text: 88.7 98.9 99.8 1.0 1.2
4
+ 2020-09-30 01:31:09,925 Average t2i Recall: 89.2
5
+ 2020-09-30 01:31:09,925 Text to image: 76.1 94.5 97.1 1.0 2.7