Linhz commited on
Commit
4f1a2c9
·
verified ·
1 Parent(s): 885050c

Update Model/MultimodelNER/VLSP2021/train_umt_2021.py

Browse files
Model/MultimodelNER/VLSP2021/train_umt_2021.py CHANGED
@@ -1,351 +1,351 @@
1
- import os
2
- import sys
3
-
4
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
5
- import argparse
6
-
7
- import logging
8
- import random
9
- import numpy as np
10
- import torch
11
- import torch.nn.functional as F
12
- from transformers import AutoTokenizer, BertConfig
13
- from Model.MultimodelNER.UMT import UMT
14
- from Model.MultimodelNER import resnet as resnet
15
- from Model.MultimodelNER.resnet_utils import myResnet
16
- from Model.MultimodelNER.VLSP2021.dataset_roberta import convert_mm_examples_to_features, MNERProcessor_2021
17
- from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
18
- TensorDataset)
19
- from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
20
- from Model.MultimodelNER.ner_evaluate import evaluate_each_class,evaluate
21
- from seqeval.metrics import classification_report
22
- from tqdm import tqdm, trange
23
- import json
24
- from Model.MultimodelNER.predict import convert_mm_examples_to_features_predict, get_test_examples_predict
25
- from Model.MultimodelNER.Ner_processing import *
26
- CONFIG_NAME = 'bert_config.json'
27
- WEIGHTS_NAME = 'pytorch_model.bin'
28
-
29
- logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
30
- datefmt='%m/%d/%Y %H:%M:%S',
31
- level=logging.INFO)
32
- logger = logging.getLogger(__name__)
33
- parser = argparse.ArgumentParser()
34
- ## Required parameters
35
- parser.add_argument("--negative_rate",
36
- default=16,
37
- type=int,
38
- help="the negative samples rate")
39
-
40
- parser.add_argument('--lamb',
41
- default=0.62,
42
- type=float)
43
-
44
- parser.add_argument('--temp',
45
- type=float,
46
- default=0.179,
47
- help="parameter for CL training")
48
-
49
- parser.add_argument('--temp_lamb',
50
- type=float,
51
- default=0.7,
52
- help="parameter for CL training")
53
-
54
- parser.add_argument("--data_dir",
55
- default='./data/twitter2017',
56
- type=str,
57
-
58
- help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
59
- parser.add_argument("--bert_model", default='vinai/phobert-base-v2', type=str)
60
- parser.add_argument("--task_name",
61
- default='sonba',
62
- type=str,
63
-
64
- help="The name of the task to train.")
65
- parser.add_argument("--output_dir",
66
- default='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/best_model/',
67
- type=str,
68
- help="The output directory where the model predictions and checkpoints will be written.")
69
-
70
- ## Other parameters
71
- parser.add_argument("--cache_dir",
72
- default="",
73
- type=str,
74
- help="Where do you want to store the pre-trained models downloaded from s3")
75
-
76
- parser.add_argument("--max_seq_length",
77
- default=128,
78
- type=int,
79
- help="The maximum total input sequence length after WordPiece tokenization. \n"
80
- "Sequences longer than this will be truncated, and sequences shorter \n"
81
- "than this will be padded.")
82
-
83
- parser.add_argument("--do_train",
84
- action='store_true',
85
- help="Whether to run training.")
86
-
87
- parser.add_argument("--do_eval",
88
- action='store_true',
89
- help="Whether to run eval on the dev set.")
90
-
91
- parser.add_argument("--do_lower_case",
92
- action='store_true',
93
- help="Set this flag if you are using an uncased model.")
94
-
95
- parser.add_argument("--train_batch_size",
96
- default=64,
97
- type=int,
98
- help="Total batch size for training.")
99
-
100
- parser.add_argument("--eval_batch_size",
101
- default=16,
102
- type=int,
103
- help="Total batch size for eval.")
104
-
105
- parser.add_argument("--learning_rate",
106
- default=5e-5,
107
- type=float,
108
- help="The initial learning rate for Adam.")
109
-
110
- parser.add_argument("--num_train_epochs",
111
- default=12.0,
112
- type=float,
113
- help="Total number of training epochs to perform.")
114
-
115
- parser.add_argument("--warmup_proportion",
116
- default=0.1,
117
- type=float,
118
- help="Proportion of training to perform linear learning rate warmup for. "
119
- "E.g., 0.1 = 10%% of training.")
120
-
121
- parser.add_argument("--no_cuda",
122
- action='store_true',
123
- help="Whether not to use CUDA when available")
124
-
125
- parser.add_argument("--local_rank",
126
- type=int,
127
- default=-1,
128
- help="local_rank for distributed training on gpus")
129
-
130
- parser.add_argument('--seed',
131
- type=int,
132
- default=37,
133
- help="random seed for initialization")
134
-
135
- parser.add_argument('--gradient_accumulation_steps',
136
- type=int,
137
- default=1,
138
- help="Number of updates steps to accumulate before performing a backward/update pass.")
139
-
140
- parser.add_argument('--fp16',
141
- action='store_true',
142
- help="Whether to use 16-bit float precision instead of 32-bit")
143
-
144
- parser.add_argument('--loss_scale',
145
- type=float, default=0,
146
- help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
147
- "0 (default value): dynamic loss scaling.\n"
148
- "Positive power of 2: static loss scaling value.\n")
149
-
150
- parser.add_argument('--mm_model', default='MTCCMBert', help='model name') # 'MTCCMBert', 'NMMTCCMBert'
151
- parser.add_argument('--layer_num1', type=int, default=1, help='number of txt2img layer')
152
- parser.add_argument('--layer_num2', type=int, default=1, help='number of img2txt layer')
153
- parser.add_argument('--layer_num3', type=int, default=1, help='number of txt2txt layer')
154
- parser.add_argument('--fine_tune_cnn', action='store_true', help='fine tune pre-trained CNN if True')
155
- parser.add_argument('--resnet_root', default='E:/demo_datn/pythonProject1/Model/Resnet/', help='path the pre-trained cnn models')
156
- parser.add_argument('--crop_size', type=int, default=224, help='crop size of image')
157
- parser.add_argument('--path_image', default='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Image', help='path to images')
158
- # parser.add_argument('--mm_model', default='TomBert', help='model name') #
159
- parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
160
- parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
161
- args = parser.parse_args()
162
-
163
-
164
-
165
- processors = {
166
- "twitter2015": MNERProcessor_2021,
167
- "twitter2017": MNERProcessor_2021,
168
- "sonba": MNERProcessor_2021
169
- }
170
-
171
-
172
-
173
- random.seed(args.seed)
174
- np.random.seed(args.seed)
175
- torch.manual_seed(args.seed)
176
-
177
-
178
- task_name = args.task_name.lower()
179
-
180
-
181
-
182
- processor = processors[task_name]()
183
- label_list = processor.get_labels()
184
- auxlabel_list = processor.get_auxlabels()
185
- num_labels = len(label_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
186
- auxnum_labels = len(auxlabel_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
187
-
188
- start_label_id = processor.get_start_label_id()
189
- stop_label_id = processor.get_stop_label_id()
190
-
191
- # ''' initialization of our conversion matrix, in our implementation, it is a 7*12 matrix initialized as follows:
192
- trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
193
- trans_matrix[0, 0] = 1 # pad to pad
194
- trans_matrix[1, 1] = 1 # O to O
195
- trans_matrix[2, 2] = 0.25 # B to B-MISC
196
- trans_matrix[2, 4] = 0.25 # B to B-PER
197
- trans_matrix[2, 6] = 0.25 # B to B-ORG
198
- trans_matrix[2, 8] = 0.25 # B to B-LOC
199
- trans_matrix[3, 3] = 0.25 # I to I-MISC
200
- trans_matrix[3, 5] = 0.25 # I to I-PER
201
- trans_matrix[3, 7] = 0.25 # I to I-ORG
202
- trans_matrix[3, 9] = 0.25 # I to I-LOC
203
- trans_matrix[4, 10] = 1 # X to X
204
- trans_matrix[5, 11] = 1 # [CLS] to [CLS]
205
- trans_matrix[6, 12] = 1 # [SEP] to [SEP]
206
- '''
207
- trans_matrix = np.zeros((num_labels, auxnum_labels), dtype=float)
208
- trans_matrix[0,0]=1 # pad to pad
209
- trans_matrix[1,1]=1
210
- trans_matrix[2,2]=1
211
- trans_matrix[4,2]=1
212
- trans_matrix[6,2]=1
213
- trans_matrix[8,2]=1
214
- trans_matrix[3,3]=1
215
- trans_matrix[5,3]=1
216
- trans_matrix[7,3]=1
217
- trans_matrix[9,3]=1
218
- trans_matrix[10,4]=1
219
- trans_matrix[11,5]=1
220
- trans_matrix[12,6]=1
221
- '''
222
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
223
-
224
- tokenizer = AutoTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
225
-
226
-
227
-
228
- net = getattr(resnet, 'resnet152')()
229
- net.load_state_dict(torch.load(os.path.join(args.resnet_root, 'resnet152.pth')))
230
- encoder = myResnet(net, args.fine_tune_cnn, device)
231
-
232
-
233
- output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
234
- # output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
235
- output_encoder_file = os.path.join(args.output_dir, "pytorch_encoder.bin")
236
-
237
- temp = args.temp
238
- temp_lamb = args.temp_lamb
239
- lamb = args.lamb
240
- negative_rate = args.negative_rate
241
- # # loadmodel
242
- # model = UMT.from_pretrained(args.bert_model,
243
- # cache_dir=args.cache_dir, layer_num1=args.layer_num1,
244
- # layer_num2=args.layer_num2,
245
- # layer_num3=args.layer_num3,
246
- # num_labels_=num_labels, auxnum_labels=auxnum_labels)
247
- # model.load_state_dict(torch.load(output_model_file,map_location=torch.device('cpu')))
248
- # model.to(device)
249
- # encoder_state_dict = torch.load(output_encoder_file,map_location=torch.device('cpu'))
250
- # encoder.load_state_dict(encoder_state_dict)
251
- # encoder.to(device)
252
- # print(model)
253
-
254
- def load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels):
255
- model = UMT.from_pretrained(args.bert_model,
256
- cache_dir=args.cache_dir, layer_num1=args.layer_num1,
257
- layer_num2=args.layer_num2,
258
- layer_num3=args.layer_num3,
259
- num_labels_=num_labels, auxnum_labels=auxnum_labels)
260
- model.load_state_dict(torch.load(output_model_file, map_location=torch.device('cpu')))
261
- model.to(device)
262
- encoder_state_dict = torch.load(output_encoder_file, map_location=torch.device('cpu'))
263
- encoder.load_state_dict(encoder_state_dict)
264
- encoder.to(device)
265
- return model, encoder
266
-
267
- model_umt,encoder_umt=load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels)
268
- #
269
- # # sentence = 'Thương biết_mấy những Thuận, những Liên, những Luận, Xuân, Nghĩa mỗi người một hoàn_cảnh nhưng đều rất giống nhau: rất ham học, rất cố_gắng để đạt mức hiểu biết cao nhất.'
270
- # # image_path = '/kaggle/working/data/014715.jpg'
271
- # # # crop_size = 224'
272
- path_image='E:\demo_datn\pythonProject1\Model\MultimodelNER\VLSP2021\Image'
273
- trans_matrix = np.zeros((auxnum_labels,num_labels), dtype=float)
274
- trans_matrix[0,0]=1 # pad to pad
275
- trans_matrix[1,1]=1 # O to O
276
- trans_matrix[2,2]=0.25 # B to B-MISC
277
- trans_matrix[2,4]=0.25 # B to B-PER
278
- trans_matrix[2,6]=0.25 # B to B-ORG
279
- trans_matrix[2,8]=0.25 # B to B-LOC
280
- trans_matrix[3,3]=0.25 # I to I-MISC
281
- trans_matrix[3,5]=0.25 # I to I-PER
282
- trans_matrix[3,7]=0.25 # I to I-ORG
283
- trans_matrix[3,9]=0.25 # I to I-LOC
284
- trans_matrix[4,10]=1 # X to X
285
- trans_matrix[5,11]=1 # [CLS] to [CLS]
286
- trans_matrix[6,12]=1 # [SE
287
- def predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix):
288
-
289
- features = convert_mm_examples_to_features_predict(eval_examples, 256, tokenizer, 224,path_image)
290
-
291
- input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
292
- input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
293
- added_input_mask = torch.tensor([f.added_input_mask for f in features], dtype=torch.long)
294
- segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
295
- img_feats = torch.stack([f.img_feat for f in features])
296
- print(img_feats)
297
- eval_data = TensorDataset(input_ids, input_mask, added_input_mask, segment_ids, img_feats)
298
- eval_sampler = SequentialSampler(eval_data)
299
- eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=16)
300
-
301
- model_umt.eval()
302
- encoder_umt.eval()
303
-
304
- y_pred = []
305
- label_map = {i: label for i, label in enumerate(label_list, 1)}
306
- label_map[0] = "<pad>"
307
-
308
- for input_ids, input_mask, added_input_mask, segment_ids, img_feats in tqdm(eval_dataloader, desc="Evaluating"):
309
- input_ids = input_ids.to(device)
310
- input_mask = input_mask.to(device)
311
- added_input_mask = added_input_mask.to(device)
312
- segment_ids = segment_ids.to(device)
313
- img_feats = img_feats.to(device)
314
-
315
- with torch.no_grad():
316
- imgs_f, img_mean, img_att = encoder_umt(img_feats)
317
- predicted_label_seq_ids = model_umt(input_ids, segment_ids, input_mask, added_input_mask, img_att,
318
- trans_matrix)
319
-
320
- logits = predicted_label_seq_ids
321
- input_mask = input_mask.to('cpu').numpy()
322
-
323
- for i, mask in enumerate(input_mask):
324
- temp_1 = []
325
- for j, m in enumerate(mask):
326
- if j == 0:
327
- continue
328
- if m:
329
- if label_map[logits[i][j]] not in ["<pad>", "<s>", "</s>", "X"]:
330
- temp_1.append(label_map[logits[i][j]])
331
- else:
332
- break
333
- y_pred.append(temp_1)
334
-
335
- a = eval_examples[0].text_a.split(" ")
336
-
337
- return y_pred, a
338
-
339
- # eval_examples = get_test_examples_predict('E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Filetxt/')
340
- # y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix)
341
- # print(y_pred)
342
- # print(a)
343
- # formatted_output = format_predictions(a, y_pred[0])
344
- #
345
- # final= process_predictions(formatted_output)
346
- # final2= combine_entities(final)
347
- # print(final2)
348
- # final3= remove_B_prefix(final2)
349
- # final4=combine_i_tags(final3)
350
- # print(final3)
351
-
 
1
+ import os
2
+ import sys
3
+
4
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
5
+ import argparse
6
+
7
+ import logging
8
+ import random
9
+ import numpy as np
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from transformers import AutoTokenizer, BertConfig
13
+ from Model.MultimodelNER.UMT import UMT
14
+ from Model.MultimodelNER import resnet as resnet
15
+ from Model.MultimodelNER.resnet_utils import myResnet
16
+ from Model.MultimodelNER.VLSP2021.dataset_roberta import convert_mm_examples_to_features, MNERProcessor_2021
17
+ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
18
+ TensorDataset)
19
+ from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
20
+ from Model.MultimodelNER.ner_evaluate import evaluate_each_class,evaluate
21
+ from seqeval.metrics import classification_report
22
+ from tqdm import tqdm, trange
23
+ import json
24
+ from Model.MultimodelNER.predict import convert_mm_examples_to_features_predict, get_test_examples_predict
25
+ from Model.MultimodelNER.Ner_processing import *
26
+ CONFIG_NAME = 'bert_config.json'
27
+ WEIGHTS_NAME = 'pytorch_model.bin'
28
+
29
+ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
30
+ datefmt='%m/%d/%Y %H:%M:%S',
31
+ level=logging.INFO)
32
+ logger = logging.getLogger(__name__)
33
+ parser = argparse.ArgumentParser()
34
+ ## Required parameters
35
+ parser.add_argument("--negative_rate",
36
+ default=16,
37
+ type=int,
38
+ help="the negative samples rate")
39
+
40
+ parser.add_argument('--lamb',
41
+ default=0.62,
42
+ type=float)
43
+
44
+ parser.add_argument('--temp',
45
+ type=float,
46
+ default=0.179,
47
+ help="parameter for CL training")
48
+
49
+ parser.add_argument('--temp_lamb',
50
+ type=float,
51
+ default=0.7,
52
+ help="parameter for CL training")
53
+
54
+ parser.add_argument("--data_dir",
55
+ default='./data/twitter2017',
56
+ type=str,
57
+
58
+ help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
59
+ parser.add_argument("--bert_model", default='vinai/phobert-base-v2', type=str)
60
+ parser.add_argument("--task_name",
61
+ default='sonba',
62
+ type=str,
63
+
64
+ help="The name of the task to train.")
65
+ parser.add_argument("--output_dir",
66
+ default='Model/MultimodelNER/VLSP2021/best_model/',
67
+ type=str,
68
+ help="The output directory where the model predictions and checkpoints will be written.")
69
+
70
+ ## Other parameters
71
+ parser.add_argument("--cache_dir",
72
+ default="",
73
+ type=str,
74
+ help="Where do you want to store the pre-trained models downloaded from s3")
75
+
76
+ parser.add_argument("--max_seq_length",
77
+ default=128,
78
+ type=int,
79
+ help="The maximum total input sequence length after WordPiece tokenization. \n"
80
+ "Sequences longer than this will be truncated, and sequences shorter \n"
81
+ "than this will be padded.")
82
+
83
+ parser.add_argument("--do_train",
84
+ action='store_true',
85
+ help="Whether to run training.")
86
+
87
+ parser.add_argument("--do_eval",
88
+ action='store_true',
89
+ help="Whether to run eval on the dev set.")
90
+
91
+ parser.add_argument("--do_lower_case",
92
+ action='store_true',
93
+ help="Set this flag if you are using an uncased model.")
94
+
95
+ parser.add_argument("--train_batch_size",
96
+ default=64,
97
+ type=int,
98
+ help="Total batch size for training.")
99
+
100
+ parser.add_argument("--eval_batch_size",
101
+ default=16,
102
+ type=int,
103
+ help="Total batch size for eval.")
104
+
105
+ parser.add_argument("--learning_rate",
106
+ default=5e-5,
107
+ type=float,
108
+ help="The initial learning rate for Adam.")
109
+
110
+ parser.add_argument("--num_train_epochs",
111
+ default=12.0,
112
+ type=float,
113
+ help="Total number of training epochs to perform.")
114
+
115
+ parser.add_argument("--warmup_proportion",
116
+ default=0.1,
117
+ type=float,
118
+ help="Proportion of training to perform linear learning rate warmup for. "
119
+ "E.g., 0.1 = 10%% of training.")
120
+
121
+ parser.add_argument("--no_cuda",
122
+ action='store_true',
123
+ help="Whether not to use CUDA when available")
124
+
125
+ parser.add_argument("--local_rank",
126
+ type=int,
127
+ default=-1,
128
+ help="local_rank for distributed training on gpus")
129
+
130
+ parser.add_argument('--seed',
131
+ type=int,
132
+ default=37,
133
+ help="random seed for initialization")
134
+
135
+ parser.add_argument('--gradient_accumulation_steps',
136
+ type=int,
137
+ default=1,
138
+ help="Number of updates steps to accumulate before performing a backward/update pass.")
139
+
140
+ parser.add_argument('--fp16',
141
+ action='store_true',
142
+ help="Whether to use 16-bit float precision instead of 32-bit")
143
+
144
+ parser.add_argument('--loss_scale',
145
+ type=float, default=0,
146
+ help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
147
+ "0 (default value): dynamic loss scaling.\n"
148
+ "Positive power of 2: static loss scaling value.\n")
149
+
150
+ parser.add_argument('--mm_model', default='MTCCMBert', help='model name') # 'MTCCMBert', 'NMMTCCMBert'
151
+ parser.add_argument('--layer_num1', type=int, default=1, help='number of txt2img layer')
152
+ parser.add_argument('--layer_num2', type=int, default=1, help='number of img2txt layer')
153
+ parser.add_argument('--layer_num3', type=int, default=1, help='number of txt2txt layer')
154
+ parser.add_argument('--fine_tune_cnn', action='store_true', help='fine tune pre-trained CNN if True')
155
+ parser.add_argument('--resnet_root', default='Model/Resnet/', help='path the pre-trained cnn models')
156
+ parser.add_argument('--crop_size', type=int, default=224, help='crop size of image')
157
+ parser.add_argument('--path_image', default='Model/MultimodelNER/VLSP2021/Image', help='path to images')
158
+ # parser.add_argument('--mm_model', default='TomBert', help='model name') #
159
+ parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
160
+ parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
161
+ args = parser.parse_args()
162
+
163
+
164
+
165
+ processors = {
166
+ "twitter2015": MNERProcessor_2021,
167
+ "twitter2017": MNERProcessor_2021,
168
+ "sonba": MNERProcessor_2021
169
+ }
170
+
171
+
172
+
173
+ random.seed(args.seed)
174
+ np.random.seed(args.seed)
175
+ torch.manual_seed(args.seed)
176
+
177
+
178
+ task_name = args.task_name.lower()
179
+
180
+
181
+
182
+ processor = processors[task_name]()
183
+ label_list = processor.get_labels()
184
+ auxlabel_list = processor.get_auxlabels()
185
+ num_labels = len(label_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
186
+ auxnum_labels = len(auxlabel_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
187
+
188
+ start_label_id = processor.get_start_label_id()
189
+ stop_label_id = processor.get_stop_label_id()
190
+
191
+ # ''' initialization of our conversion matrix, in our implementation, it is a 7*12 matrix initialized as follows:
192
+ trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
193
+ trans_matrix[0, 0] = 1 # pad to pad
194
+ trans_matrix[1, 1] = 1 # O to O
195
+ trans_matrix[2, 2] = 0.25 # B to B-MISC
196
+ trans_matrix[2, 4] = 0.25 # B to B-PER
197
+ trans_matrix[2, 6] = 0.25 # B to B-ORG
198
+ trans_matrix[2, 8] = 0.25 # B to B-LOC
199
+ trans_matrix[3, 3] = 0.25 # I to I-MISC
200
+ trans_matrix[3, 5] = 0.25 # I to I-PER
201
+ trans_matrix[3, 7] = 0.25 # I to I-ORG
202
+ trans_matrix[3, 9] = 0.25 # I to I-LOC
203
+ trans_matrix[4, 10] = 1 # X to X
204
+ trans_matrix[5, 11] = 1 # [CLS] to [CLS]
205
+ trans_matrix[6, 12] = 1 # [SEP] to [SEP]
206
+ '''
207
+ trans_matrix = np.zeros((num_labels, auxnum_labels), dtype=float)
208
+ trans_matrix[0,0]=1 # pad to pad
209
+ trans_matrix[1,1]=1
210
+ trans_matrix[2,2]=1
211
+ trans_matrix[4,2]=1
212
+ trans_matrix[6,2]=1
213
+ trans_matrix[8,2]=1
214
+ trans_matrix[3,3]=1
215
+ trans_matrix[5,3]=1
216
+ trans_matrix[7,3]=1
217
+ trans_matrix[9,3]=1
218
+ trans_matrix[10,4]=1
219
+ trans_matrix[11,5]=1
220
+ trans_matrix[12,6]=1
221
+ '''
222
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
223
+
224
+ tokenizer = AutoTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
225
+
226
+
227
+
228
+ net = getattr(resnet, 'resnet152')()
229
+ net.load_state_dict(torch.load(os.path.join(args.resnet_root, 'resnet152.pth')))
230
+ encoder = myResnet(net, args.fine_tune_cnn, device)
231
+
232
+
233
+ output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
234
+ # output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
235
+ output_encoder_file = os.path.join(args.output_dir, "pytorch_encoder.bin")
236
+
237
+ temp = args.temp
238
+ temp_lamb = args.temp_lamb
239
+ lamb = args.lamb
240
+ negative_rate = args.negative_rate
241
+ # # loadmodel
242
+ # model = UMT.from_pretrained(args.bert_model,
243
+ # cache_dir=args.cache_dir, layer_num1=args.layer_num1,
244
+ # layer_num2=args.layer_num2,
245
+ # layer_num3=args.layer_num3,
246
+ # num_labels_=num_labels, auxnum_labels=auxnum_labels)
247
+ # model.load_state_dict(torch.load(output_model_file,map_location=torch.device('cpu')))
248
+ # model.to(device)
249
+ # encoder_state_dict = torch.load(output_encoder_file,map_location=torch.device('cpu'))
250
+ # encoder.load_state_dict(encoder_state_dict)
251
+ # encoder.to(device)
252
+ # print(model)
253
+
254
+ def load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels):
255
+ model = UMT.from_pretrained(args.bert_model,
256
+ cache_dir=args.cache_dir, layer_num1=args.layer_num1,
257
+ layer_num2=args.layer_num2,
258
+ layer_num3=args.layer_num3,
259
+ num_labels_=num_labels, auxnum_labels=auxnum_labels)
260
+ model.load_state_dict(torch.load(output_model_file, map_location=torch.device('cpu')))
261
+ model.to(device)
262
+ encoder_state_dict = torch.load(output_encoder_file, map_location=torch.device('cpu'))
263
+ encoder.load_state_dict(encoder_state_dict)
264
+ encoder.to(device)
265
+ return model, encoder
266
+
267
+ model_umt,encoder_umt=load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels)
268
+ #
269
+ # # sentence = 'Thương biết_mấy những Thuận, những Liên, những Luận, Xuân, Nghĩa mỗi người một hoàn_cảnh nhưng đều rất giống nhau: rất ham học, rất cố_gắng để đạt mức hiểu biết cao nhất.'
270
+ # # image_path = '/kaggle/working/data/014715.jpg'
271
+ # # # crop_size = 224'
272
+ path_image='E:\demo_datn\pythonProject1\Model\MultimodelNER\VLSP2021\Image'
273
+ trans_matrix = np.zeros((auxnum_labels,num_labels), dtype=float)
274
+ trans_matrix[0,0]=1 # pad to pad
275
+ trans_matrix[1,1]=1 # O to O
276
+ trans_matrix[2,2]=0.25 # B to B-MISC
277
+ trans_matrix[2,4]=0.25 # B to B-PER
278
+ trans_matrix[2,6]=0.25 # B to B-ORG
279
+ trans_matrix[2,8]=0.25 # B to B-LOC
280
+ trans_matrix[3,3]=0.25 # I to I-MISC
281
+ trans_matrix[3,5]=0.25 # I to I-PER
282
+ trans_matrix[3,7]=0.25 # I to I-ORG
283
+ trans_matrix[3,9]=0.25 # I to I-LOC
284
+ trans_matrix[4,10]=1 # X to X
285
+ trans_matrix[5,11]=1 # [CLS] to [CLS]
286
+ trans_matrix[6,12]=1 # [SE
287
+ def predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix):
288
+
289
+ features = convert_mm_examples_to_features_predict(eval_examples, 256, tokenizer, 224,path_image)
290
+
291
+ input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
292
+ input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
293
+ added_input_mask = torch.tensor([f.added_input_mask for f in features], dtype=torch.long)
294
+ segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
295
+ img_feats = torch.stack([f.img_feat for f in features])
296
+ print(img_feats)
297
+ eval_data = TensorDataset(input_ids, input_mask, added_input_mask, segment_ids, img_feats)
298
+ eval_sampler = SequentialSampler(eval_data)
299
+ eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=16)
300
+
301
+ model_umt.eval()
302
+ encoder_umt.eval()
303
+
304
+ y_pred = []
305
+ label_map = {i: label for i, label in enumerate(label_list, 1)}
306
+ label_map[0] = "<pad>"
307
+
308
+ for input_ids, input_mask, added_input_mask, segment_ids, img_feats in tqdm(eval_dataloader, desc="Evaluating"):
309
+ input_ids = input_ids.to(device)
310
+ input_mask = input_mask.to(device)
311
+ added_input_mask = added_input_mask.to(device)
312
+ segment_ids = segment_ids.to(device)
313
+ img_feats = img_feats.to(device)
314
+
315
+ with torch.no_grad():
316
+ imgs_f, img_mean, img_att = encoder_umt(img_feats)
317
+ predicted_label_seq_ids = model_umt(input_ids, segment_ids, input_mask, added_input_mask, img_att,
318
+ trans_matrix)
319
+
320
+ logits = predicted_label_seq_ids
321
+ input_mask = input_mask.to('cpu').numpy()
322
+
323
+ for i, mask in enumerate(input_mask):
324
+ temp_1 = []
325
+ for j, m in enumerate(mask):
326
+ if j == 0:
327
+ continue
328
+ if m:
329
+ if label_map[logits[i][j]] not in ["<pad>", "<s>", "</s>", "X"]:
330
+ temp_1.append(label_map[logits[i][j]])
331
+ else:
332
+ break
333
+ y_pred.append(temp_1)
334
+
335
+ a = eval_examples[0].text_a.split(" ")
336
+
337
+ return y_pred, a
338
+
339
+ # eval_examples = get_test_examples_predict('Model/MultimodelNER/VLSP2021/Filetxt/')
340
+ # y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix)
341
+ # print(y_pred)
342
+ # print(a)
343
+ # formatted_output = format_predictions(a, y_pred[0])
344
+ #
345
+ # final= process_predictions(formatted_output)
346
+ # final2= combine_entities(final)
347
+ # print(final2)
348
+ # final3= remove_B_prefix(final2)
349
+ # final4=combine_i_tags(final3)
350
+ # print(final3)
351
+