Linhz commited on
Commit
fd07025
1 Parent(s): 106800c

Upload 80 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Model/MultimodelNER/Ner_processing.py +95 -0
  2. Model/MultimodelNER/UMT.py +290 -0
  3. Model/MultimodelNER/VLSP2016/Filetxt/list.txt +106 -0
  4. Model/MultimodelNER/VLSP2016/Filetxt/output.txt +6 -0
  5. Model/MultimodelNER/VLSP2016/Filetxt/test.txt +97 -0
  6. Model/MultimodelNER/VLSP2016/Image/014716.jpg +0 -0
  7. Model/MultimodelNER/VLSP2016/Image/My model.jpg +0 -0
  8. Model/MultimodelNER/VLSP2016/Image/bully.jpeg +0 -0
  9. Model/MultimodelNER/VLSP2016/Image/bully.jpg +0 -0
  10. Model/MultimodelNER/VLSP2016/Image/maria.jpg +0 -0
  11. Model/MultimodelNER/VLSP2016/Image/penguin.jpg +0 -0
  12. Model/MultimodelNER/VLSP2016/Image/pero.jpg +0 -0
  13. Model/MultimodelNER/VLSP2016/Image/pero2.jpg +0 -0
  14. Model/MultimodelNER/VLSP2016/Image/taybannha.jpg +0 -0
  15. Model/MultimodelNER/VLSP2016/MNER_2016.py +106 -0
  16. Model/MultimodelNER/VLSP2016/__pycache__/MNER_2016.cpython-39.pyc +0 -0
  17. Model/MultimodelNER/VLSP2016/__pycache__/dataset_roberta.cpython-39.pyc +0 -0
  18. Model/MultimodelNER/VLSP2016/__pycache__/train_umt_2016.cpython-39.pyc +0 -0
  19. Model/MultimodelNER/VLSP2016/best_model/bert_config.json +28 -0
  20. Model/MultimodelNER/VLSP2016/best_model/eval_results.txt +11 -0
  21. Model/MultimodelNER/VLSP2016/best_model/model_config.json +1 -0
  22. Model/MultimodelNER/VLSP2016/best_model/mtmner_pred.txt +0 -0
  23. Model/MultimodelNER/VLSP2016/best_model/pytorch_encoder.bin +3 -0
  24. Model/MultimodelNER/VLSP2016/best_model/pytorch_model.bin +3 -0
  25. Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors +3 -0
  26. Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json +0 -0
  27. Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/refs/main +1 -0
  28. Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json +27 -0
  29. Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin +3 -0
  30. Model/MultimodelNER/VLSP2016/dataset_roberta.py +452 -0
  31. Model/MultimodelNER/VLSP2016/list.txt +5 -0
  32. Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors +3 -0
  33. Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json +0 -0
  34. Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/refs/main +1 -0
  35. Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json +27 -0
  36. Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin +3 -0
  37. Model/MultimodelNER/VLSP2016/test.txt +78 -0
  38. Model/MultimodelNER/VLSP2016/train_umt_2016.py +352 -0
  39. Model/MultimodelNER/VLSP2021/Filetxt/test.txt +97 -0
  40. Model/MultimodelNER/VLSP2021/Image/taybannha.jpg +0 -0
  41. Model/MultimodelNER/VLSP2021/MNER_2021.py +151 -0
  42. Model/MultimodelNER/VLSP2021/__pycache__/MNER_2021.cpython-39.pyc +0 -0
  43. Model/MultimodelNER/VLSP2021/__pycache__/dataset_roberta.cpython-39.pyc +0 -0
  44. Model/MultimodelNER/VLSP2021/__pycache__/train_umt_2021.cpython-39.pyc +0 -0
  45. Model/MultimodelNER/VLSP2021/best_model/bert_config.json +28 -0
  46. Model/MultimodelNER/VLSP2021/best_model/eval_results.txt +50 -0
  47. Model/MultimodelNER/VLSP2021/best_model/model_config.json +1 -0
  48. Model/MultimodelNER/VLSP2021/best_model/mtmner_pred.txt +0 -0
  49. Model/MultimodelNER/VLSP2021/best_model/pytorch_encoder.bin +3 -0
  50. Model/MultimodelNER/VLSP2021/best_model/pytorch_model.bin +3 -0
Model/MultimodelNER/Ner_processing.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def format_predictions(words, predictions):
2
+ '''
3
+ Chuyển đổi danh sách từ và dự đoán sang định dạng (word, label)
4
+ '''
5
+ formatted = []
6
+ for word, label in zip(words, predictions):
7
+ formatted.append((word, label))
8
+ return formatted
9
+
10
+ def process_predictions(predictions):
11
+ '''
12
+ Tách các từ có dấu gạch dưới thành các từ riêng biệt với cùng nhãn
13
+ '''
14
+ formatted = []
15
+ for word, label in predictions:
16
+ if '_' in word:
17
+ formatted.append((word.replace('_', ' '), label))
18
+ else:
19
+ formatted.append((word, label))
20
+ return formatted
21
+
22
+
23
+ def combine_entities(predictions):
24
+ combined = []
25
+ temp_entity = []
26
+ temp_label = None
27
+
28
+ for word, label in predictions:
29
+ if label.startswith('B-'):
30
+ if temp_entity:
31
+ combined.append((' '.join(temp_entity), temp_label))
32
+ temp_entity = []
33
+ temp_entity.append(word)
34
+ temp_label = label
35
+ elif label.startswith('I-') and temp_label and label[2:] == temp_label[2:]:
36
+ temp_entity.append(word)
37
+ else:
38
+ if temp_entity:
39
+ combined.append((' '.join(temp_entity), temp_label))
40
+ temp_entity = []
41
+ temp_label = None
42
+ combined.append((word, label))
43
+
44
+ if temp_entity:
45
+ combined.append((' '.join(temp_entity), temp_label))
46
+
47
+ return combined
48
+
49
+
50
+
51
+
52
+ def remove_B_prefix(entities):
53
+ modified_entities = []
54
+ for word, label in entities:
55
+ if label.startswith('B-'):
56
+ label = label[2:] # Loại bỏ phần 'B-' khỏi nhãn
57
+ modified_entities.append((word, label))
58
+ return modified_entities
59
+
60
+
61
+ def combine_i_tags(tokens_labels):
62
+ combined = []
63
+ current_combination = []
64
+ current_label = None
65
+
66
+ for token, label in tokens_labels:
67
+ if label.startswith('I-'):
68
+ label = label[2:] # Remove the 'I-' prefix
69
+ if current_label is None:
70
+ current_label = label
71
+ current_combination.append(token)
72
+ elif current_label == label:
73
+ current_combination.append(token)
74
+ else:
75
+ combined.append((' '.join(current_combination), current_label))
76
+ current_combination = [token]
77
+ current_label = label
78
+ else:
79
+ if current_combination:
80
+ combined.append((' '.join(current_combination), current_label))
81
+ current_combination = []
82
+ current_label = None
83
+ combined.append((token, label))
84
+
85
+ if current_combination:
86
+ combined.append((' '.join(current_combination), current_label))
87
+
88
+ return combined
89
+
90
+ tokens_labels = [('Dân', 'O'), ('trí', 'O'), ('Chức', 'O'), ('vô', 'O'), ('địch', 'O'), ('Euro 2008', 'EVENT-SPORT'), ('đầy', 'O'), ('thuyết', 'O'), ('phục', 'O'), ('của', 'O'), ('Tây Ban Nha', 'LOCATION'), ('trên', 'O'), ('đất', 'O'), ('Áo', 'LOCATION'), ('và', 'O'), ('Thụy Sĩ', 'PERSON'), ('đã', 'O'), ('mở', 'O'), ('ra', 'O'), ('kỷ', 'O'), ('nguyên', 'O'), ('vinh', 'O'), ('quanh', 'O'), ('của', 'O'), ('La', 'ORGANIZATION'), ('Furia', 'I-ORGANIZATION-SPORTS'), ('Roja', 'I-ORGANIZATION-SPORTS'), (',', 'O'), ('với', 'O'), ('lối', 'O'), ('chơi', 'O'), ('tiqui', 'O'), ('taka', 'O'), ('đầy', 'O'), ('biến', 'O'), ('ảo', 'O'), ('.', 'O'), ('Trong', 'O'), ('quá', 'O'), ('khứ', 'O'), (',', 'O'), ('Tây Ban Nha', 'LOCATION'), ('nổi', 'O'), ('tiếng', 'O'), ('với', 'O'), ('biệt', 'O'), ('danh', 'O'), ('Vua', 'O'), ('vòng', 'O'), ('loại', 'O'), ('.', 'O'), ('Họ', 'O'), ('thường', 'O'), ('thi', 'O'), ('đấu', 'O'), ('rất', 'O'), ('tốt', 'O'), ('ở', 'O'), ('vòng', 'O'), ('loại', 'O'), ('nhưng', 'O'), ('lại', 'O'), ('chưa', 'O'), ('bao', 'O'), ('giờ', 'O'), ('chứng', 'O'), ('minh', 'O'), ('được', 'O'), ('sức', 'O'), ('mạnh', 'O'), ('ở', 'O'), ('vòng', 'O'), ('chung', 'O'), ('kết', 'O'), ('giải', 'O'), ('đấu', 'O'), ('lớn', 'O'), ('.', 'O'), ('Lần', 'O'), ('duy', 'O'), ('nhất', 'O'), ('họ', 'O'), ('lên', 'O'), ('ngôi', 'O'), ('là', 'O'), ('ở', 'O'), ('kỳ', 'O'), ('Euro', 'EVENT-SPORT'), ('1964', 'O'), ('.', 'O')]
91
+
92
+ combined_tokens_labels = combine_i_tags(tokens_labels)
93
+ print(combined_tokens_labels)
94
+
95
+
Model/MultimodelNER/UMT.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
3
+ # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """PyTorch BERT model."""
17
+
18
+ from __future__ import absolute_import, division, print_function, unicode_literals
19
+
20
+ import copy
21
+ import json
22
+ import logging
23
+ import math
24
+ import os
25
+ import shutil
26
+ import tarfile
27
+ import tempfile
28
+ import sys
29
+ from io import open
30
+ from torchcrf import CRF
31
+
32
+ import torch
33
+ from torch import nn
34
+ from torch.nn import CrossEntropyLoss
35
+
36
+ import torch.nn.functional as F
37
+ from torch.autograd import Variable
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ def gelu(x):
43
+ """Implementation of the gelu activation function.
44
+ For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
45
+ 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
46
+ Also see https://arxiv.org/abs/1606.08415
47
+ """
48
+ return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
49
+
50
+
51
+ def swish(x):
52
+ return x * torch.sigmoid(x)
53
+
54
+
55
+ ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
56
+
57
+ from transformers import RobertaModel
58
+ from transformers.models.roberta.modeling_roberta import RobertaLayer, RobertaPreTrainedModel, RobertaOutput, \
59
+ RobertaSelfOutput, RobertaIntermediate
60
+
61
+
62
+ class RobertaSelfEncoder(nn.Module):
63
+ def __init__(self, config):
64
+ super(RobertaSelfEncoder, self).__init__()
65
+ layer = RobertaLayer(config)
66
+ self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(1)])
67
+
68
+ def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
69
+ all_encoder_layers = []
70
+ for layer_module in self.layer:
71
+ hidden_states = layer_module(hidden_states, attention_mask)
72
+ if output_all_encoded_layers:
73
+ all_encoder_layers.append(hidden_states)
74
+ if not output_all_encoded_layers:
75
+ all_encoder_layers.append(hidden_states)
76
+ return all_encoder_layers
77
+
78
+
79
+ class RobertaCrossEncoder(nn.Module):
80
+ def __init__(self, config, layer_num):
81
+ super(RobertaCrossEncoder, self).__init__()
82
+ layer = RobertaCrossAttentionLayer(config)
83
+ self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(layer_num)])
84
+
85
+ def forward(self, s1_hidden_states, s2_hidden_states, s2_attention_mask, output_all_encoded_layers=True):
86
+ all_encoder_layers = []
87
+ for layer_module in self.layer:
88
+ s1_hidden_states = layer_module(s1_hidden_states, s2_hidden_states, s2_attention_mask)
89
+ if output_all_encoded_layers:
90
+ all_encoder_layers.append(s1_hidden_states)
91
+ if not output_all_encoded_layers:
92
+ all_encoder_layers.append(s1_hidden_states)
93
+ return all_encoder_layers
94
+
95
+
96
+ class RobertaCoAttention(nn.Module):
97
+ def __init__(self, config):
98
+ super(RobertaCoAttention, self).__init__()
99
+ if config.hidden_size % config.num_attention_heads != 0:
100
+ raise ValueError(
101
+ "The hidden size (%d) is not a multiple of the number of attention "
102
+ "heads (%d)" % (config.hidden_size, config.num_attention_heads))
103
+ self.num_attention_heads = config.num_attention_heads
104
+ self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
105
+ self.all_head_size = self.num_attention_heads * self.attention_head_size
106
+
107
+ self.query = nn.Linear(config.hidden_size, self.all_head_size)
108
+ self.key = nn.Linear(config.hidden_size, self.all_head_size)
109
+ self.value = nn.Linear(config.hidden_size, self.all_head_size)
110
+
111
+ self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
112
+
113
+ def transpose_for_scores(self, x):
114
+ new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
115
+ x = x.view(*new_x_shape)
116
+ return x.permute(0, 2, 1, 3)
117
+
118
+ def forward(self, s1_hidden_states, s2_hidden_states, s2_attention_mask):
119
+ mixed_query_layer = self.query(s1_hidden_states)
120
+ mixed_key_layer = self.key(s2_hidden_states)
121
+ mixed_value_layer = self.value(s2_hidden_states)
122
+
123
+ query_layer = self.transpose_for_scores(mixed_query_layer)
124
+ key_layer = self.transpose_for_scores(mixed_key_layer)
125
+ value_layer = self.transpose_for_scores(mixed_value_layer)
126
+
127
+ # Take the dot product between "query" and "key" to get the raw attention scores.
128
+ attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
129
+
130
+ attention_scores = attention_scores / math.sqrt(self.attention_head_size)
131
+ # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
132
+ attention_scores = attention_scores + s2_attention_mask
133
+
134
+ # Normalize the attention scores to probabilities.
135
+ attention_probs = nn.Softmax(dim=-1)(attention_scores)
136
+
137
+ # This is actually dropping out entire tokens to attend to, which might
138
+ # seem a bit unusual, but is taken from the original Transformer paper.
139
+ attention_probs = self.dropout(attention_probs)
140
+
141
+ context_layer = torch.matmul(attention_probs, value_layer)
142
+
143
+ context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
144
+
145
+ new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
146
+ context_layer = context_layer.view(*new_context_layer_shape)
147
+ return context_layer
148
+
149
+
150
+ class RobertaCrossAttention(nn.Module):
151
+ def __init__(self, config):
152
+ super(RobertaCrossAttention, self).__init__()
153
+ self.self = RobertaCoAttention(config)
154
+ self.output = RobertaSelfOutput(config)
155
+
156
+ def forward(self, s1_input_tensor, s2_input_tensor, s2_attention_mask):
157
+ s1_cross_output = self.self(s1_input_tensor, s2_input_tensor, s2_attention_mask)
158
+ attention_output = self.output(s1_cross_output, s1_input_tensor)
159
+ return attention_output
160
+
161
+
162
+ class RobertaCrossAttentionLayer(nn.Module):
163
+ def __init__(self, config):
164
+ super(RobertaCrossAttentionLayer, self).__init__()
165
+ self.attention = RobertaCrossAttention(config)
166
+ self.intermediate = RobertaIntermediate(config)
167
+ self.output = RobertaOutput(config)
168
+
169
+ def forward(self, s1_hidden_states, s2_hidden_states, s2_attention_mask):
170
+ attention_output = self.attention(s1_hidden_states, s2_hidden_states, s2_attention_mask)
171
+ intermediate_output = self.intermediate(attention_output)
172
+ layer_output = self.output(intermediate_output, attention_output)
173
+ return layer_output
174
+
175
+
176
+ class UMT(RobertaPreTrainedModel):
177
+ """Coupled Cross-Modal Attention BERT model for token-level classification with CRF on top.
178
+ """
179
+
180
+ def __init__(self, config, layer_num1=1, layer_num2=1, layer_num3=1, num_labels_=2, auxnum_labels=2):
181
+ super(UMT, self).__init__(config)
182
+ self.num_labels = num_labels_
183
+ self.roberta = RobertaModel(config)
184
+ # self.trans_matrix = torch.zeros(num_labels, auxnum_labels)
185
+ self.self_attention = RobertaSelfEncoder(config)
186
+ self.self_attention_v2 = RobertaSelfEncoder(config)
187
+ self.dropout = nn.Dropout(config.hidden_dropout_prob)
188
+ self.vismap2text = nn.Linear(2048, config.hidden_size)
189
+ self.vismap2text_v2 = nn.Linear(2048, config.hidden_size)
190
+ self.txt2img_attention = RobertaCrossEncoder(config, layer_num1)
191
+ self.img2txt_attention = RobertaCrossEncoder(config, layer_num2)
192
+ self.txt2txt_attention = RobertaCrossEncoder(config, layer_num3)
193
+ self.gate = nn.Linear(config.hidden_size * 2, config.hidden_size)
194
+ ### self.self_attention = BertLastSelfAttention(config)
195
+ self.classifier = nn.Linear(config.hidden_size * 2, num_labels_)
196
+ self.aux_classifier = nn.Linear(config.hidden_size, auxnum_labels)
197
+
198
+ self.crf = CRF(num_labels_, batch_first=True)
199
+ self.aux_crf = CRF(auxnum_labels, batch_first=True)
200
+
201
+ self.init_weights()
202
+
203
+ # this forward is just for predict, not for train
204
+ # dont confuse this with _forward_alg above.
205
+ def forward(self, input_ids, segment_ids, input_mask, added_attention_mask, visual_embeds_att, trans_matrix,
206
+ labels=None, auxlabels=None):
207
+ # Get the emission scores from the BiLSTM
208
+ features = self.roberta(input_ids, token_type_ids=segment_ids,
209
+ attention_mask=input_mask) # batch_size * seq_len * hidden_size
210
+ sequence_output = features["last_hidden_state"]
211
+ sequence_output = self.dropout(sequence_output)
212
+
213
+ extended_txt_mask = input_mask.unsqueeze(1).unsqueeze(2)
214
+ extended_txt_mask = extended_txt_mask.to(dtype=next(self.parameters()).dtype) # fp16 compatibility
215
+ extended_txt_mask = (1.0 - extended_txt_mask) * -10000.0
216
+ aux_addon_sequence_encoder = self.self_attention(sequence_output, extended_txt_mask)
217
+
218
+ aux_addon_sequence_output = aux_addon_sequence_encoder[-1]
219
+ aux_addon_sequence_output = aux_addon_sequence_output[0]
220
+ aux_bert_feats = self.aux_classifier(aux_addon_sequence_output)
221
+ #######aux_bert_feats = self.aux_classifier(sequence_output)
222
+ trans_matrix_tensor = torch.tensor(trans_matrix, dtype=torch.float32, device=aux_bert_feats.device)
223
+ trans_bert_feats = torch.matmul(aux_bert_feats, trans_matrix_tensor)
224
+
225
+ # trans_bert_feats = torch.matmul(aux_bert_feats, trans_matrix.float())
226
+
227
+ main_addon_sequence_encoder = self.self_attention_v2(sequence_output, extended_txt_mask)
228
+ main_addon_sequence_output = main_addon_sequence_encoder[-1]
229
+ main_addon_sequence_output = main_addon_sequence_output[0]
230
+ vis_embed_map = visual_embeds_att.view(-1, 2048, 49).permute(0, 2, 1) # self.batch_size, 49, 2048
231
+ converted_vis_embed_map = self.vismap2text(vis_embed_map) # self.batch_size, 49, hidden_dim
232
+
233
+ # '''
234
+ # apply txt2img attention mechanism to obtain image-based text representations
235
+ img_mask = added_attention_mask[:, :49]
236
+ extended_img_mask = img_mask.unsqueeze(1).unsqueeze(2)
237
+ extended_img_mask = extended_img_mask.to(dtype=next(self.parameters()).dtype) # fp16 compatibility
238
+ extended_img_mask = (1.0 - extended_img_mask) * -10000.0
239
+
240
+ cross_encoder = self.txt2img_attention(main_addon_sequence_output, converted_vis_embed_map, extended_img_mask)
241
+ cross_output_layer = cross_encoder[-1] # self.batch_size * text_len * hidden_dim
242
+
243
+ # apply img2txt attention mechanism to obtain multimodal-based text representations
244
+ converted_vis_embed_map_v2 = self.vismap2text_v2(vis_embed_map) # self.batch_size, 49, hidden_dim
245
+
246
+ cross_txt_encoder = self.img2txt_attention(converted_vis_embed_map_v2, main_addon_sequence_output,
247
+ extended_txt_mask)
248
+ cross_txt_output_layer = cross_txt_encoder[-1] # self.batch_size * 49 * hidden_dim
249
+ cross_final_txt_encoder = self.txt2txt_attention(main_addon_sequence_output, cross_txt_output_layer,
250
+ extended_img_mask)
251
+ ##cross_final_txt_encoder = self.txt2txt_attention(aux_addon_sequence_output, cross_txt_output_layer, extended_img_mask)
252
+ cross_final_txt_layer = cross_final_txt_encoder[-1] # self.batch_size * text_len * hidden_dim
253
+ # cross_final_txt_layer = torch.add(cross_final_txt_layer, sequence_output)
254
+
255
+ # visual gate
256
+ merge_representation = torch.cat((cross_final_txt_layer, cross_output_layer), dim=-1)
257
+ gate_value = torch.sigmoid(self.gate(merge_representation)) # batch_size, text_len, hidden_dim
258
+ gated_converted_att_vis_embed = torch.mul(gate_value, cross_output_layer)
259
+ # reverse_gate_value = torch.neg(gate_value).add(1)
260
+ # gated_converted_att_vis_embed = torch.add(torch.mul(reverse_gate_value, cross_final_txt_layer),
261
+ # torch.mul(gate_value, cross_output_layer))
262
+
263
+ # direct concatenation
264
+ # gated_converted_att_vis_embed = self.dropout(gated_converted_att_vis_embed)
265
+ final_output = torch.cat((cross_final_txt_layer, gated_converted_att_vis_embed), dim=-1)
266
+ ###### final_output = self.dropout(final_output)
267
+ # middle_output = torch.cat((cross_final_txt_layer, gated_converted_att_vis_embed), dim=-1)
268
+ # final_output = torch.cat((sequence_output, middle_output), dim=-1)
269
+
270
+ ###### addon_sequence_output = self.self_attention(final_output, extended_txt_mask)
271
+ bert_feats = self.classifier(final_output)
272
+
273
+ alpha = 0.5
274
+ final_bert_feats = torch.add(torch.mul(bert_feats, alpha), torch.mul(trans_bert_feats, 1 - alpha))
275
+
276
+ # suggested by Hongjie
277
+ # bert_feats = F.log_softmax(bert_feats, dim=-1)
278
+
279
+ if labels is not None:
280
+ beta = 0.5 # 73.87(73.50) 85.37(85.00) 0.5 5e-5 #73.45 85.05 1.0 1 1 1 4e-5 # 73.63 0.1 1 1 1 5e-5 # old 0.1 2 1 1 85.23 0.2 1 1 85.04
281
+ ##beta = 0.6
282
+ aux_loss = - self.aux_crf(aux_bert_feats, auxlabels, mask=input_mask.byte(), reduction='mean')
283
+ main_loss = - self.crf(final_bert_feats, labels, mask=input_mask.byte(), reduction='mean')
284
+ loss = main_loss + beta * aux_loss
285
+ return loss
286
+ else:
287
+ pred_tags = self.crf.decode(final_bert_feats, mask=input_mask.byte())
288
+ return pred_tags
289
+
290
+
Model/MultimodelNER/VLSP2016/Filetxt/list.txt ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMGID:pero
2
+ James
3
+
4
+ Shan
5
+ đều
6
+ nghĩ
7
+ rằng
8
+ gửi
9
+ Pero
10
+ đi
11
+ làm
12
+ nông
13
+ thử
14
+ cho
15
+ vui
16
+
17
+ bắt
18
+ đầu
19
+ cho
20
+ Pero
21
+ du
22
+
23
+ từ
24
+ đầu
25
+ tháng
26
+ 3
27
+ năm
28
+ nay
29
+ .
30
+ Ngày
31
+ 8
32
+ 4
33
+ ,
34
+ người
35
+ bạn
36
+ báo
37
+ tin
38
+ cho
39
+ Alan
40
+ biết
41
+ Pero
42
+ mất
43
+ tích
44
+
45
+ họ
46
+ tìm
47
+ kiếm
48
+ Pero
49
+
50
+ rất
51
+ nhiều
52
+ nơi
53
+ .
54
+ Hai
55
+ vợ
56
+ chồng
57
+ anh
58
+ James
59
+ đều
60
+ nghĩ
61
+ Pero
62
+ đã
63
+ mất
64
+ .
65
+ Tuy
66
+ nhiên
67
+ một
68
+ tối
69
+ nọ
70
+ khi
71
+ xuống
72
+ thăm
73
+ bầy
74
+ gia
75
+ súc
76
+ sau
77
+ khi
78
+ ăn
79
+ tối
80
+ ,
81
+ Alan
82
+ đã
83
+ rất
84
+ ngạc
85
+ nhiên
86
+ khi
87
+ mở
88
+ cửa
89
+
90
+ thấy
91
+ Pero
92
+ ngồi
93
+ chờ
94
+ ngay
95
+ trước
96
+ cửa
97
+ .
98
+ Thấy
99
+ Alan
100
+ ,
101
+ Pero
102
+ mừng
103
+ rỡ
104
+
105
+ cùng
106
+ .
Model/MultimodelNER/VLSP2016/Filetxt/output.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ This
2
+ is
3
+ some
4
+ example
5
+ text
6
+ .
Model/MultimodelNER/VLSP2016/Filetxt/test.txt ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMGID:taybannha
2
+ Dân
3
+ trí
4
+ Chức
5
+
6
+ địch
7
+ Euro
8
+ 2008
9
+ đầy
10
+ thuyết
11
+ phục
12
+ của
13
+ Tây
14
+ Ban
15
+ Nha
16
+ trên
17
+ đất
18
+ Áo
19
+
20
+ Thụy
21
+
22
+ đã
23
+ mở
24
+ ra
25
+ kỷ
26
+ nguyên
27
+ vinh
28
+ quanh
29
+ của
30
+ La
31
+ Furia
32
+ Roja
33
+ ,
34
+ với
35
+ lối
36
+ chơi
37
+ tiqui
38
+ taka
39
+ đầy
40
+ biến
41
+ ảo
42
+ .
43
+ Trong
44
+ quá
45
+ khứ
46
+ ,
47
+ Tây
48
+ Ban
49
+ Nha
50
+ nổi
51
+ tiếng
52
+ với
53
+ biệt
54
+ danh
55
+ Vua
56
+ vòng
57
+ loại
58
+ .
59
+ Họ
60
+ thường
61
+ thi
62
+ đấu
63
+ rất
64
+ tốt
65
+
66
+ vòng
67
+ loại
68
+ nhưng
69
+ lại
70
+ chưa
71
+ bao
72
+ giờ
73
+ chứng
74
+ minh
75
+ được
76
+ sức
77
+ mạnh
78
+
79
+ vòng
80
+ chung
81
+ kết
82
+ giải
83
+ đấu
84
+ lớn
85
+ .
86
+ Lần
87
+ duy
88
+ nhất
89
+ họ
90
+ lên
91
+ ngôi
92
+
93
+
94
+ kỳ
95
+ Euro
96
+ 1964
97
+ .
Model/MultimodelNER/VLSP2016/Image/014716.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/My model.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/bully.jpeg ADDED
Model/MultimodelNER/VLSP2016/Image/bully.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/maria.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/penguin.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/pero.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/pero2.jpg ADDED
Model/MultimodelNER/VLSP2016/Image/taybannha.jpg ADDED
Model/MultimodelNER/VLSP2016/MNER_2016.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from spacy import displacy
3
+ from Model.NER.VLSP2021.Predict_Ner import ViTagger
4
+ import re
5
+ from thunghiemxuly import save_uploaded_image,convert_text_to_txt,add_string_to_txt
6
+
7
+ import os
8
+ from transformers import AutoTokenizer, BertConfig
9
+ from Model.MultimodelNER.VLSP2016.train_umt_2016 import load_model,predict
10
+ from Model.MultimodelNER.Ner_processing import format_predictions,process_predictions,combine_entities,remove_B_prefix,combine_i_tags
11
+
12
+ from Model.MultimodelNER.predict import get_test_examples_predict
13
+ from Model.MultimodelNER import resnet as resnet
14
+ from Model.MultimodelNER.resnet_utils import myResnet
15
+ import torch
16
+ import numpy as np
17
+ from Model.MultimodelNER.VLSP2016.dataset_roberta import MNERProcessor_2016
18
+
19
+
20
+ CONFIG_NAME = 'bert_config.json'
21
+ WEIGHTS_NAME = 'pytorch_model.bin'
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+
24
+
25
+ net = getattr(resnet, 'resnet152')()
26
+ net.load_state_dict(torch.load(os.path.join('E:/demo_datn/pythonProject1/Model/Resnet/', 'resnet152.pth')))
27
+ encoder = myResnet(net, True, device)
28
+ def process_text(text):
29
+ # Loại bỏ dấu cách thừa và dấu cách ở đầu và cuối văn bản
30
+ processed_text = re.sub(r'\s+', ' ', text.strip())
31
+ return processed_text
32
+
33
+
34
+
35
+ def show_mner_2016():
36
+ multimodal_text = st.text_area("Enter your text for MNER:", height=300)
37
+ multimodal_text = process_text(multimodal_text) # Xử lý văn bản
38
+ image = st.file_uploader("Upload an image (only jpg):", type=["jpg"])
39
+ if st.button("Process Multimodal NER"):
40
+ save_image = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Image'
41
+ save_txt = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/test.txt'
42
+ image_name = image.name
43
+ save_uploaded_image(image, save_image)
44
+ convert_text_to_txt(multimodal_text, save_txt)
45
+ add_string_to_txt(image_name, save_txt)
46
+ st.image(image, caption="Uploaded Image", use_column_width=True)
47
+
48
+ bert_model='vinai/phobert-base-v2'
49
+ output_dir='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/best_model'
50
+ output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
51
+ output_encoder_file = os.path.join(output_dir, "pytorch_encoder.bin")
52
+ processor = MNERProcessor_2016()
53
+ label_list = processor.get_labels()
54
+ auxlabel_list = processor.get_auxlabels()
55
+ num_labels = len(label_list) + 1
56
+ auxnum_labels = len(auxlabel_list) + 1
57
+ trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
58
+ trans_matrix[0, 0] = 1 # pad to pad
59
+ trans_matrix[1, 1] = 1 # O to O
60
+ trans_matrix[2, 2] = 0.25 # B to B-MISC
61
+ trans_matrix[2, 4] = 0.25 # B to B-PER
62
+ trans_matrix[2, 6] = 0.25 # B to B-ORG
63
+ trans_matrix[2, 8] = 0.25 # B to B-LOC
64
+ trans_matrix[3, 3] = 0.25 # I to I-MISC
65
+ trans_matrix[3, 5] = 0.25 # I to I-PER
66
+ trans_matrix[3, 7] = 0.25 # I to I-ORG
67
+ trans_matrix[3, 9] = 0.25 # I to I-LOC
68
+ trans_matrix[4, 10] = 1 # X to X
69
+ trans_matrix[5, 11] = 1 # [CLS] to [CLS]
70
+ trans_matrix[6, 12] = 1
71
+ tokenizer = AutoTokenizer.from_pretrained(bert_model, do_lower_case=False)
72
+ model_umt, encoder_umt = load_model(output_model_file, output_encoder_file, encoder,num_labels,auxnum_labels)
73
+ eval_examples = get_test_examples_predict('E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/')
74
+
75
+ y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device,save_image,trans_matrix)
76
+ formatted_output = format_predictions(a, y_pred[0])
77
+ final = process_predictions(formatted_output)
78
+ final2 = combine_entities(final)
79
+ final3 = remove_B_prefix(final2)
80
+ final4 = combine_i_tags(final3)
81
+ words_and_labels = final4
82
+ # Tạo danh sách từ
83
+ words = [word for word, _ in words_and_labels]
84
+ # Tạo danh sách thực thể và nhãn cho mỗi từ, loại bỏ nhãn 'O'
85
+ entities = [{'start': sum(len(word) + 1 for word, _ in words_and_labels[:i]),
86
+ 'end': sum(len(word) + 1 for word, _ in words_and_labels[:i + 1]), 'label': label} for
87
+ i, (word, label)
88
+ in enumerate(words_and_labels) if label != 'O']
89
+ # print(entities)
90
+
91
+ # Render the visualization without color for 'O' labels
92
+ html = displacy.render(
93
+ {"text": " ".join(words), "ents": entities, "title": None},
94
+ style="ent",
95
+ manual=True,
96
+ options={"colors": {"MISC": "#806699",
97
+ "ORG": "#ff6666",
98
+ "LOC": "#66cc66",
99
+ "PER": "#bf80ff",
100
+ "O": None}}
101
+ )
102
+ # print(html)
103
+ st.markdown(html, unsafe_allow_html=True)
104
+
105
+
106
+ ###Ví dụ 1 : Một trận hỗn chiến đã xảy ra tại trận đấu khúc côn cầu giữa Penguins và Islanders ở Mỹ (image:penguin)
Model/MultimodelNER/VLSP2016/__pycache__/MNER_2016.cpython-39.pyc ADDED
Binary file (4.34 kB). View file
 
Model/MultimodelNER/VLSP2016/__pycache__/dataset_roberta.cpython-39.pyc ADDED
Binary file (9.5 kB). View file
 
Model/MultimodelNER/VLSP2016/__pycache__/train_umt_2016.cpython-39.pyc ADDED
Binary file (8.82 kB). View file
 
Model/MultimodelNER/VLSP2016/best_model/bert_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vinai/phobert-base-v2",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 258,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "tokenizer_class": "PhobertTokenizer",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.35.2",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 64001
28
+ }
Model/MultimodelNER/VLSP2016/best_model/eval_results.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ LOC 0.9570 0.9618 0.9594 996
4
+ MISC 0.9143 0.8889 0.9014 36
5
+ ORG 0.8129 0.7975 0.8051 158
6
+ PER 0.9835 0.9788 0.9812 851
7
+
8
+ micro avg 0.9563 0.9549 0.9556 2041
9
+ macro avg 0.9169 0.9068 0.9118 2041
10
+ weighted avg 0.9561 0.9549 0.9555 2041
11
+ Overall: 0.9563297350343474 0.9549240568348849 0.9556263790144643
Model/MultimodelNER/VLSP2016/best_model/model_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bert_model": "vinai/phobert-base-v2", "do_lower": false, "max_seq_length": 256, "num_labels": 13, "label_map": {"1": "B-ORG", "2": "B-MISC", "3": "I-PER", "4": "I-ORG", "5": "B-LOC", "6": "I-MISC", "7": "I-LOC", "8": "O", "9": "B-PER", "10": "X", "11": "<s>", "12": "</s>"}}
Model/MultimodelNER/VLSP2016/best_model/mtmner_pred.txt ADDED
The diff for this file is too large to render. See raw diff
 
Model/MultimodelNER/VLSP2016/best_model/pytorch_encoder.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab29aaf11c3beb874e34fc9bccaa1fb838d94701cf4a4189c37d768a7678e958
3
+ size 241699561
Model/MultimodelNER/VLSP2016/best_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c950c331c48a229744b1b727a49d3dc248f28377ba8efbd86612daf2721e4368
3
+ size 699285929
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json ADDED
File without changes
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/refs/main ADDED
@@ -0,0 +1 @@
 
 
1
+ 2b51e367d92093c9688112098510e6a58bab67cd
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 258,
16
+ "model_type": "roberta",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "position_embedding_type": "absolute",
21
+ "tokenizer_class": "PhobertTokenizer",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.26.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 64001
27
+ }
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba09eb4c244a5b3a49ad76d52d129ac085b61f5c6287de7f99508b02be589f9
3
+ size 540322347
Model/MultimodelNER/VLSP2016/dataset_roberta.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import logging
3
+ import os
4
+
5
+ logger = logging.getLogger(__name__)
6
+ from torchvision import transforms
7
+ from PIL import Image
8
+
9
+
10
+ class SBInputExample(object):
11
+ """A single training/test example for simple sequence classification."""
12
+
13
+ def __init__(self, guid, text_a, text_b, img_id, label=None, auxlabel=None):
14
+ """Constructs a InputExample.
15
+
16
+ Args:
17
+ guid: Unique id for the example.
18
+ text_a: string. The untokenized text of the first sequence. For single
19
+ sequence tasks, only this sequence must be specified.
20
+ text_b: (Optional) string. The untokenized text of the second sequence.
21
+ Only must be specified for sequence pair tasks.
22
+ label: (Optional) string. The label of the example. This should be
23
+ specified for train and dev examples, but not for test examples.
24
+ """
25
+ self.guid = guid
26
+ self.text_a = text_a
27
+ self.text_b = text_b
28
+ self.img_id = img_id
29
+ self.label = label
30
+ # Please note that the auxlabel is not used in SB
31
+ # it is just kept in order not to modify the original code
32
+ self.auxlabel = auxlabel
33
+
34
+
35
+ class SBInputFeatures(object):
36
+ """A single set of features of data"""
37
+
38
+ def __init__(self, input_ids, input_mask, added_input_mask, segment_ids, img_feat, label_id, auxlabel_id):
39
+ self.input_ids = input_ids
40
+ self.input_mask = input_mask
41
+ self.added_input_mask = added_input_mask
42
+ self.segment_ids = segment_ids
43
+ self.img_feat = img_feat
44
+ self.label_id = label_id
45
+ self.auxlabel_id = auxlabel_id
46
+
47
+
48
+ def sbreadfile(filename):
49
+ '''
50
+ Đọc dữ liệu từ tệp và trả về dưới dạng danh sách các cặp từ và nhãn, cùng với danh sách hình ảnh và nhãn phụ.
51
+ '''
52
+ print("Chuẩn bị dữ liệu cho ", filename)
53
+ f = open(filename, encoding='utf8')
54
+ data = []
55
+ imgs = []
56
+ auxlabels = []
57
+ sentence = []
58
+ label = []
59
+ auxlabel = []
60
+ imgid = ''
61
+
62
+ for line in f:
63
+ line = line.strip() # Loại bỏ các dấu cách thừa ở đầu và cuối dòng
64
+ if line.startswith('IMGID:'):
65
+ imgid = line.split('IMGID:')[1] + '.jpg'
66
+ continue
67
+ if line == '':
68
+ if len(sentence) > 0:
69
+ data.append((sentence, label))
70
+ imgs.append(imgid)
71
+ auxlabels.append(auxlabel)
72
+ sentence = []
73
+ label = []
74
+ auxlabel = []
75
+ imgid = ''
76
+ continue
77
+ splits = line.split('\t')
78
+ if len(splits) == 2: # Đảm bảo dòng có ít nhất một từ và một nhãn
79
+ word, cur_label = splits
80
+ sentence.append(word)
81
+ label.append(cur_label)
82
+ auxlabel.append(cur_label[0]) # Lấy ký tự đầu tiên của nhãn làm nhãn phụ
83
+
84
+ if len(sentence) > 0: # Xử lý dữ liệu cuối cùng trong tệp
85
+ data.append((sentence, label))
86
+ imgs.append(imgid)
87
+ auxlabels.append(auxlabel)
88
+
89
+ print("Số lượng mẫu: " + str(len(data)))
90
+ print("Số lượng hình ảnh: " + str(len(imgs)))
91
+ return data, imgs, auxlabels
92
+
93
+
94
+ # def sbreadfile(filename): #code gốc
95
+ # '''
96
+ # read file
97
+ # return format :
98
+ # [ ['EU', 'B-ORG'], ['rejects', 'O'], ['German', 'B-MISC'], ['call', 'O'], ['to', 'O'], ['boycott', 'O'], ['British', 'B-MISC'], ['lamb', 'O'], ['.', 'O'] ]
99
+ # '''
100
+ # print("prepare data for ",filename)
101
+ # f = open(filename,encoding='utf8')
102
+ # data = []
103
+ # imgs = []
104
+ # auxlabels = []
105
+ # sentence = []
106
+ # label = []
107
+ # auxlabel = []
108
+ # imgid = ''
109
+ # a = 0
110
+ # for line in f:
111
+ # if line.startswith('IMGID:'):
112
+ # imgid = line.strip().split('IMGID:')[1] + '.jpg'
113
+ # continue
114
+ # if line[0] == "\n":
115
+ # if len(sentence) > 0:
116
+ # data.append((sentence, label))
117
+ # imgs.append(imgid)
118
+ # auxlabels.append(auxlabel)
119
+ # sentence = []
120
+ # label = []
121
+ # imgid = ''
122
+ # auxlabel = []
123
+ # continue
124
+ # splits = line.split('\t')
125
+ # sentence.append(splits[0])
126
+ # cur_label = splits[-1][:-1]
127
+ # # if cur_label == 'B-OTHER':
128
+ # # cur_label = 'B-MISC'
129
+ # # elif cur_label == 'I-OTHER':
130
+ # # cur_label = 'I-MISC'
131
+ # label.append(cur_label)
132
+ # auxlabel.append(cur_label[0])
133
+
134
+ # if len(sentence) > 0:
135
+ # data.append((sentence, label))
136
+ # imgs.append(imgid)
137
+ # auxlabels.append(auxlabel)
138
+ # sentence = []
139
+ # label = []
140
+ # auxlabel = []
141
+
142
+ # print("The number of samples: " + str(len(data)))
143
+ # print("The number of images: " + str(len(imgs)))
144
+ # return data, imgs, auxlabels
145
+
146
+ class DataProcessor(object):
147
+ """Base class for data converters for sequence classification data sets."""
148
+
149
+ def get_train_examples(self, data_dir):
150
+ """Gets a collection of `InputExample`s for the train set."""
151
+ raise NotImplementedError()
152
+
153
+ def get_dev_examples(self, data_dir):
154
+ """Gets a collection of `InputExample`s for the dev set."""
155
+ raise NotImplementedError()
156
+
157
+ def get_labels(self):
158
+ """Gets the list of labels for this data set."""
159
+ raise NotImplementedError()
160
+
161
+ @classmethod
162
+ def _read_sbtsv(cls, input_file, quotechar=None):
163
+ """Reads a tab separated value file."""
164
+ return sbreadfile(input_file)
165
+
166
+
167
+ class MNERProcessor_2016(DataProcessor):
168
+ """Processor for the CoNLL-2003 data set."""
169
+
170
+ def get_train_examples(self, data_dir):
171
+ """See base class."""
172
+ data, imgs, auxlabels = self._read_sbtsv(os.path.join(data_dir, "train.txt"))
173
+ return self._create_examples(data, imgs, auxlabels, "train")
174
+
175
+ def get_dev_examples(self, data_dir):
176
+ """See base class."""
177
+ data, imgs, auxlabels = self._read_sbtsv(os.path.join(data_dir, "dev.txt"))
178
+ return self._create_examples(data, imgs, auxlabels, "dev")
179
+
180
+ def get_test_examples(self, data_dir):
181
+ """See base class."""
182
+ data, imgs, auxlabels = self._read_sbtsv(os.path.join(data_dir, "test.txt"))
183
+ return self._create_examples(data, imgs, auxlabels, "test")
184
+
185
+ def get_labels(self):
186
+ # return [
187
+ # "O","I-PRODUCT-AWARD",
188
+ # "B-MISCELLANEOUS",
189
+ # "B-QUANTITY-NUM",
190
+ # "B-ORGANIZATION-SPORTS",
191
+ # "B-DATETIME",
192
+ # "I-ADDRESS",
193
+ # "I-PERSON",
194
+ # "I-EVENT-SPORT",
195
+ # "B-ADDRESS",
196
+ # "B-EVENT-NATURAL",
197
+ # "I-LOCATION-GPE",
198
+ # "B-EVENT-GAMESHOW",
199
+ # "B-DATETIME-TIMERANGE",
200
+ # "I-QUANTITY-NUM",
201
+ # "I-QUANTITY-AGE",
202
+ # "B-EVENT-CUL",
203
+ # "I-QUANTITY-TEM",
204
+ # "I-PRODUCT-LEGAL",
205
+ # "I-LOCATION-STRUC",
206
+ # "I-ORGANIZATION",
207
+ # "B-PHONENUMBER",
208
+ # "B-IP",
209
+ # "B-QUANTITY-AGE",
210
+ # "I-DATETIME-TIME",
211
+ # "I-DATETIME",
212
+ # "B-ORGANIZATION-MED",
213
+ # "B-DATETIME-SET",
214
+ # "I-EVENT-CUL",
215
+ # "B-QUANTITY-DIM",
216
+ # "I-QUANTITY-DIM",
217
+ # "B-EVENT",
218
+ # "B-DATETIME-DATERANGE",
219
+ # "I-EVENT-GAMESHOW",
220
+ # "B-PRODUCT-AWARD",
221
+ # "B-LOCATION-STRUC",
222
+ # "B-LOCATION",
223
+ # "B-PRODUCT",
224
+ # "I-MISCELLANEOUS",
225
+ # "B-SKILL",
226
+ # "I-QUANTITY-ORD",
227
+ # "I-ORGANIZATION-STOCK",
228
+ # "I-LOCATION-GEO",
229
+ # "B-PERSON",
230
+ # "B-PRODUCT-COM",
231
+ # "B-PRODUCT-LEGAL",
232
+ # "I-LOCATION",
233
+ # "B-QUANTITY-TEM",
234
+ # "I-PRODUCT",
235
+ # "B-QUANTITY-CUR",
236
+ # "I-QUANTITY-CUR",
237
+ # "B-LOCATION-GPE",
238
+ # "I-PHONENUMBER",
239
+ # "I-ORGANIZATION-MED",
240
+ # "I-EVENT-NATURAL",
241
+ # "I-EMAIL",
242
+ # "B-ORGANIZATION",
243
+ # "B-URL",
244
+ # "I-DATETIME-TIMERANGE",
245
+ # "I-QUANTITY",
246
+ # "I-IP",
247
+ # "B-EVENT-SPORT",
248
+ # "B-PERSONTYPE",
249
+ # "B-QUANTITY-PER",
250
+ # "I-QUANTITY-PER",
251
+ # "I-PRODUCT-COM",
252
+ # "I-DATETIME-DURATION",
253
+ # "B-LOCATION-GPE-GEO",
254
+ # "B-QUANTITY-ORD",
255
+ # "I-EVENT",
256
+ # "B-DATETIME-TIME",
257
+ # "B-QUANTITY",
258
+ # "I-DATETIME-SET",
259
+ # "I-LOCATION-GPE-GEO",
260
+ # "B-ORGANIZATION-STOCK",
261
+ # "I-ORGANIZATION-SPORTS",
262
+ # "I-SKILL",
263
+ # "I-URL",
264
+ # "B-DATETIME-DURATION",
265
+ # "I-DATETIME-DATE",
266
+ # "I-PERSONTYPE",
267
+ # "B-DATETIME-DATE",
268
+ # "I-DATETIME-DATERANGE",
269
+ # "B-LOCATION-GEO",
270
+ # "B-EMAIL","X","<s>", "</s>"]
271
+
272
+ # vlsp2016
273
+ return [
274
+ "B-ORG", "B-MISC",
275
+ "I-PER",
276
+ "I-ORG",
277
+ "B-LOC",
278
+ "I-MISC",
279
+ "I-LOC",
280
+ "O",
281
+ "B-PER",
282
+ "X",
283
+ "<s>",
284
+ "</s>"]
285
+
286
+ # vlsp2018
287
+ # return [
288
+ # "O","I-ORGANIZATION",
289
+ # "B-ORGANIZATION",
290
+ # "I-LOCATION",
291
+ # "B-MISCELLANEOUS",
292
+ # "I-PERSON",
293
+ # "B-PERSON",
294
+ # "I-MISCELLANEOUS",
295
+ # "B-LOCATION",
296
+ # "X",
297
+ # "<s>",
298
+ # "</s>"]
299
+
300
+ def get_auxlabels(self):
301
+ return ["O", "B", "I", "X", "<s>", "</s>"]
302
+
303
+ def get_start_label_id(self):
304
+ label_list = self.get_labels()
305
+ label_map = {label: i for i, label in enumerate(label_list, 1)}
306
+ return label_map['<s>']
307
+
308
+ def get_stop_label_id(self):
309
+ label_list = self.get_labels()
310
+ label_map = {label: i for i, label in enumerate(label_list, 1)}
311
+ return label_map['</s>']
312
+
313
+ def _create_examples(self, lines, imgs, auxlabels, set_type):
314
+ examples = []
315
+ for i, (sentence, label) in enumerate(lines):
316
+ guid = "%s-%s" % (set_type, i)
317
+ text_a = ' '.join(sentence)
318
+ text_b = None
319
+ img_id = imgs[i]
320
+ label = label
321
+ auxlabel = auxlabels[i]
322
+ examples.append(
323
+ SBInputExample(guid=guid, text_a=text_a, text_b=text_b, img_id=img_id, label=label, auxlabel=auxlabel))
324
+ return examples
325
+
326
+
327
+ def image_process(image_path, transform):
328
+ image = Image.open(image_path).convert('RGB')
329
+ image = transform(image)
330
+ return image
331
+
332
+
333
+ def convert_mm_examples_to_features(examples, label_list, auxlabel_list,
334
+ max_seq_length, tokenizer, crop_size, path_img):
335
+ label_map = {label: i for i, label in enumerate(label_list, 1)}
336
+ auxlabel_map = {label: i for i, label in enumerate(auxlabel_list, 1)}
337
+
338
+ features = []
339
+ count = 0
340
+
341
+ transform = transforms.Compose([
342
+ transforms.Resize([256, 256]),
343
+ transforms.RandomCrop(crop_size), # args.crop_size, by default it is set to be 224
344
+ transforms.RandomHorizontalFlip(),
345
+ transforms.ToTensor(),
346
+ transforms.Normalize((0.485, 0.456, 0.406),
347
+ (0.229, 0.224, 0.225))])
348
+
349
+ for (ex_index, example) in enumerate(examples):
350
+ textlist = example.text_a.split(' ')
351
+ labellist = example.label
352
+ auxlabellist = example.auxlabel
353
+ tokens = []
354
+ labels = []
355
+ auxlabels = []
356
+ for i, word in enumerate(textlist):
357
+ token = tokenizer.tokenize(word)
358
+ tokens.extend(token)
359
+ label_1 = labellist[i]
360
+ auxlabel_1 = auxlabellist[i]
361
+ for m in range(len(token)):
362
+ if m == 0:
363
+ labels.append(label_1)
364
+ auxlabels.append(auxlabel_1)
365
+ else:
366
+ labels.append("X")
367
+ auxlabels.append("X")
368
+ if len(tokens) >= max_seq_length - 1:
369
+ tokens = tokens[0:(max_seq_length - 2)]
370
+ labels = labels[0:(max_seq_length - 2)]
371
+ auxlabels = auxlabels[0:(max_seq_length - 2)]
372
+ ntokens = []
373
+ segment_ids = []
374
+ label_ids = []
375
+ auxlabel_ids = []
376
+ ntokens.append("<s>")
377
+ segment_ids.append(0)
378
+ label_ids.append(label_map["<s>"])
379
+ auxlabel_ids.append(auxlabel_map["<s>"])
380
+ for i, token in enumerate(tokens):
381
+ ntokens.append(token)
382
+ segment_ids.append(0)
383
+ label_ids.append(label_map[labels[i]])
384
+ auxlabel_ids.append(auxlabel_map[auxlabels[i]])
385
+ ntokens.append("</s>")
386
+ segment_ids.append(0)
387
+ label_ids.append(label_map["</s>"])
388
+ auxlabel_ids.append(auxlabel_map["</s>"])
389
+ input_ids = tokenizer.convert_tokens_to_ids(ntokens)
390
+ input_mask = [1] * len(input_ids)
391
+ added_input_mask = [1] * (len(input_ids) + 49) # 1 or 49 is for encoding regional image representations
392
+
393
+ while len(input_ids) < max_seq_length:
394
+ input_ids.append(0)
395
+ input_mask.append(0)
396
+ added_input_mask.append(0)
397
+ segment_ids.append(0)
398
+ label_ids.append(0)
399
+ auxlabel_ids.append(0)
400
+
401
+ assert len(input_ids) == max_seq_length
402
+ assert len(input_mask) == max_seq_length
403
+ assert len(segment_ids) == max_seq_length
404
+ assert len(label_ids) == max_seq_length
405
+ assert len(auxlabel_ids) == max_seq_length
406
+
407
+ image_name = example.img_id
408
+ image_path = os.path.join(path_img, image_name)
409
+
410
+ if not os.path.exists(image_path):
411
+ if 'NaN' not in image_path:
412
+ print(image_path)
413
+ try:
414
+ image = image_process(image_path, transform)
415
+ except:
416
+ count += 1
417
+ image_path_fail = os.path.join(path_img, 'background.jpg')
418
+ image = image_process(image_path_fail, transform)
419
+
420
+ else:
421
+ if ex_index < 2:
422
+ logger.info("*** Example ***")
423
+ logger.info("guid: %s" % (example.guid))
424
+ logger.info("tokens: %s" % " ".join(
425
+ [str(x) for x in tokens]))
426
+ logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
427
+ logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
428
+ logger.info(
429
+ "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
430
+ logger.info("label: %s" % " ".join([str(x) for x in label_ids]))
431
+ logger.info("auxlabel: %s" % " ".join([str(x) for x in auxlabel_ids]))
432
+
433
+ features.append(
434
+ SBInputFeatures(input_ids=input_ids, input_mask=input_mask, added_input_mask=added_input_mask,
435
+ segment_ids=segment_ids, img_feat=image, label_id=label_ids, auxlabel_id=auxlabel_ids))
436
+
437
+ print('the number of problematic samples: ' + str(count))
438
+ return features
439
+
440
+
441
+ # if __name__ == "__main__":
442
+ # processor = MNERProcessor_2016()
443
+ # label_list = processor.get_labels()
444
+ # auxlabel_list = processor.get_auxlabels()
445
+ # num_labels = len(label_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
446
+ #
447
+ # start_label_id = processor.get_start_label_id()
448
+ # stop_label_id = processor.get_stop_label_id()
449
+ #
450
+ # data_dir = r'sample_data'
451
+ # train_examples = processor.get_train_examples(data_dir)
452
+ # print(train_examples[0].img_id)
Model/MultimodelNER/VLSP2016/list.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ IMGID:namngo
2
+ Toi
3
+ ten
4
+ la
5
+ Minh
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json ADDED
File without changes
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/refs/main ADDED
@@ -0,0 +1 @@
 
 
1
+ 2b51e367d92093c9688112098510e6a58bab67cd
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 258,
16
+ "model_type": "roberta",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "position_embedding_type": "absolute",
21
+ "tokenizer_class": "PhobertTokenizer",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.26.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 64001
27
+ }
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba09eb4c244a5b3a49ad76d52d129ac085b61f5c6287de7f99508b02be589f9
3
+ size 540322347
Model/MultimodelNER/VLSP2016/test.txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMGID:014716
2
+ “ O
3
+ Tôi O
4
+ xin O
5
+ cám_ơn O
6
+ thượng_sĩ O
7
+ Nguyễn B-PER
8
+ Trung I-PER
9
+ Hiếu I-PER
10
+ ( O
11
+ người O
12
+ phiên_dịch O
13
+ tiếng B-MISC
14
+ Anh I-MISC
15
+ cho O
16
+ đơn_vị O
17
+ tình_báo O
18
+ quân_sự O
19
+ số O
20
+ 635 O
21
+ của O
22
+ quân_đội O
23
+ Mỹ B-LOC
24
+ biên_chế O
25
+ bên O
26
+ cạnh O
27
+ lữ_đoàn B-ORG
28
+ bộ_binh I-ORG
29
+ số I-ORG
30
+ 11 I-ORG
31
+ , O
32
+ sư_đoàn B-ORG
33
+ bộ_binh I-ORG
34
+ 23 I-ORG
35
+ ) O
36
+ , O
37
+ người O
38
+ đã O
39
+ cứu O
40
+ cuốn O
41
+ nhật_ký O
42
+ của O
43
+ chị O
44
+ tôi O
45
+ khỏi O
46
+ bị O
47
+ quẳng O
48
+ vào O
49
+ đống O
50
+ lửa O
51
+ bởi O
52
+ anh O
53
+ đã O
54
+ nhận O
55
+ ra O
56
+ trong O
57
+ cuốn O
58
+ sổ O
59
+ này O
60
+ đã O
61
+ chứa_đựng O
62
+ lửa O
63
+ rồi O
64
+ để O
65
+ anh O
66
+ trao O
67
+ lại O
68
+ nó O
69
+ cho O
70
+ Fred B-PER
71
+ như O
72
+ một O
73
+ lời O
74
+ uỷ_thác O
75
+ từ O
76
+ chị O
77
+ tôi O
78
+ . O
Model/MultimodelNER/VLSP2016/train_umt_2016.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
5
+ import argparse
6
+
7
+ import logging
8
+ import random
9
+ import numpy as np
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from transformers import AutoTokenizer, BertConfig
13
+ from Model.MultimodelNER.UMT import UMT
14
+ from Model.MultimodelNER import resnet as resnet
15
+ from Model.MultimodelNER.resnet_utils import myResnet
16
+ from Model.MultimodelNER.VLSP2016.dataset_roberta import convert_mm_examples_to_features, MNERProcessor_2016
17
+ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
18
+ TensorDataset)
19
+ from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
20
+ from Model.MultimodelNER.ner_evaluate import evaluate_each_class,evaluate
21
+ from seqeval.metrics import classification_report
22
+ from tqdm import tqdm, trange
23
+ import json
24
+ from Model.MultimodelNER.predict import convert_mm_examples_to_features_predict, get_test_examples_predict
25
+ from Model.MultimodelNER.Ner_processing import *
26
+ CONFIG_NAME = 'bert_config.json'
27
+ WEIGHTS_NAME = 'pytorch_model.bin'
28
+
29
+ logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
30
+ datefmt='%m/%d/%Y %H:%M:%S',
31
+ level=logging.INFO)
32
+ logger = logging.getLogger(__name__)
33
+ parser = argparse.ArgumentParser()
34
+ ## Required parameters
35
+ parser.add_argument("--negative_rate",
36
+ default=16,
37
+ type=int,
38
+ help="the negative samples rate")
39
+
40
+ parser.add_argument('--lamb',
41
+ default=0.62,
42
+ type=float)
43
+
44
+ parser.add_argument('--temp',
45
+ type=float,
46
+ default=0.179,
47
+ help="parameter for CL training")
48
+
49
+ parser.add_argument('--temp_lamb',
50
+ type=float,
51
+ default=0.7,
52
+ help="parameter for CL training")
53
+
54
+ parser.add_argument("--data_dir",
55
+ default='./data/twitter2017',
56
+ type=str,
57
+
58
+ help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
59
+ parser.add_argument("--bert_model", default='vinai/phobert-base-v2', type=str)
60
+ parser.add_argument("--task_name",
61
+ default='sonba',
62
+ type=str,
63
+
64
+ help="The name of the task to train.")
65
+ parser.add_argument("--output_dir",
66
+ default='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/best_model/',
67
+ type=str,
68
+ help="The output directory where the model predictions and checkpoints will be written.")
69
+
70
+ ## Other parameters
71
+ parser.add_argument("--cache_dir",
72
+ default="",
73
+ type=str,
74
+ help="Where do you want to store the pre-trained models downloaded from s3")
75
+
76
+ parser.add_argument("--max_seq_length",
77
+ default=128,
78
+ type=int,
79
+ help="The maximum total input sequence length after WordPiece tokenization. \n"
80
+ "Sequences longer than this will be truncated, and sequences shorter \n"
81
+ "than this will be padded.")
82
+
83
+ parser.add_argument("--do_train",
84
+ action='store_true',
85
+ help="Whether to run training.")
86
+
87
+ parser.add_argument("--do_eval",
88
+ action='store_true',
89
+ help="Whether to run eval on the dev set.")
90
+
91
+ parser.add_argument("--do_lower_case",
92
+ action='store_true',
93
+ help="Set this flag if you are using an uncased model.")
94
+
95
+ parser.add_argument("--train_batch_size",
96
+ default=64,
97
+ type=int,
98
+ help="Total batch size for training.")
99
+
100
+ parser.add_argument("--eval_batch_size",
101
+ default=16,
102
+ type=int,
103
+ help="Total batch size for eval.")
104
+
105
+ parser.add_argument("--learning_rate",
106
+ default=5e-5,
107
+ type=float,
108
+ help="The initial learning rate for Adam.")
109
+
110
+ parser.add_argument("--num_train_epochs",
111
+ default=12.0,
112
+ type=float,
113
+ help="Total number of training epochs to perform.")
114
+
115
+ parser.add_argument("--warmup_proportion",
116
+ default=0.1,
117
+ type=float,
118
+ help="Proportion of training to perform linear learning rate warmup for. "
119
+ "E.g., 0.1 = 10%% of training.")
120
+
121
+ parser.add_argument("--no_cuda",
122
+ action='store_true',
123
+ help="Whether not to use CUDA when available")
124
+
125
+ parser.add_argument("--local_rank",
126
+ type=int,
127
+ default=-1,
128
+ help="local_rank for distributed training on gpus")
129
+
130
+ parser.add_argument('--seed',
131
+ type=int,
132
+ default=37,
133
+ help="random seed for initialization")
134
+
135
+ parser.add_argument('--gradient_accumulation_steps',
136
+ type=int,
137
+ default=1,
138
+ help="Number of updates steps to accumulate before performing a backward/update pass.")
139
+
140
+ parser.add_argument('--fp16',
141
+ action='store_true',
142
+ help="Whether to use 16-bit float precision instead of 32-bit")
143
+
144
+ parser.add_argument('--loss_scale',
145
+ type=float, default=0,
146
+ help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
147
+ "0 (default value): dynamic loss scaling.\n"
148
+ "Positive power of 2: static loss scaling value.\n")
149
+
150
+ parser.add_argument('--mm_model', default='MTCCMBert', help='model name') # 'MTCCMBert', 'NMMTCCMBert'
151
+ parser.add_argument('--layer_num1', type=int, default=1, help='number of txt2img layer')
152
+ parser.add_argument('--layer_num2', type=int, default=1, help='number of img2txt layer')
153
+ parser.add_argument('--layer_num3', type=int, default=1, help='number of txt2txt layer')
154
+ parser.add_argument('--fine_tune_cnn', action='store_true', help='fine tune pre-trained CNN if True')
155
+ parser.add_argument('--resnet_root', default='E:/demo_datn/pythonProject1/Model/Resnet/', help='path the pre-trained cnn models')
156
+ parser.add_argument('--crop_size', type=int, default=224, help='crop size of image')
157
+ parser.add_argument('--path_image', default='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Image', help='path to images')
158
+ # parser.add_argument('--mm_model', default='TomBert', help='model name') #
159
+ parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
160
+ parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
161
+ args = parser.parse_args()
162
+
163
+
164
+
165
+ processors = {
166
+ "twitter2015": MNERProcessor_2016,
167
+ "twitter2017": MNERProcessor_2016,
168
+ "sonba": MNERProcessor_2016
169
+ }
170
+
171
+
172
+
173
+ random.seed(args.seed)
174
+ np.random.seed(args.seed)
175
+ torch.manual_seed(args.seed)
176
+
177
+
178
+ task_name = args.task_name.lower()
179
+
180
+
181
+
182
+ processor = processors[task_name]()
183
+ label_list = processor.get_labels()
184
+ auxlabel_list = processor.get_auxlabels()
185
+ num_labels = len(label_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
186
+ auxnum_labels = len(auxlabel_list) + 1 # label 0 corresponds to padding, label in label_list starts from 1
187
+
188
+ start_label_id = processor.get_start_label_id()
189
+ stop_label_id = processor.get_stop_label_id()
190
+
191
+ # ''' initialization of our conversion matrix, in our implementation, it is a 7*12 matrix initialized as follows:
192
+ trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
193
+ trans_matrix[0, 0] = 1 # pad to pad
194
+ trans_matrix[1, 1] = 1 # O to O
195
+ trans_matrix[2, 2] = 0.25 # B to B-MISC
196
+ trans_matrix[2, 4] = 0.25 # B to B-PER
197
+ trans_matrix[2, 6] = 0.25 # B to B-ORG
198
+ trans_matrix[2, 8] = 0.25 # B to B-LOC
199
+ trans_matrix[3, 3] = 0.25 # I to I-MISC
200
+ trans_matrix[3, 5] = 0.25 # I to I-PER
201
+ trans_matrix[3, 7] = 0.25 # I to I-ORG
202
+ trans_matrix[3, 9] = 0.25 # I to I-LOC
203
+ trans_matrix[4, 10] = 1 # X to X
204
+ trans_matrix[5, 11] = 1 # [CLS] to [CLS]
205
+ trans_matrix[6, 12] = 1 # [SEP] to [SEP]
206
+ '''
207
+ trans_matrix = np.zeros((num_labels, auxnum_labels), dtype=float)
208
+ trans_matrix[0,0]=1 # pad to pad
209
+ trans_matrix[1,1]=1
210
+ trans_matrix[2,2]=1
211
+ trans_matrix[4,2]=1
212
+ trans_matrix[6,2]=1
213
+ trans_matrix[8,2]=1
214
+ trans_matrix[3,3]=1
215
+ trans_matrix[5,3]=1
216
+ trans_matrix[7,3]=1
217
+ trans_matrix[9,3]=1
218
+ trans_matrix[10,4]=1
219
+ trans_matrix[11,5]=1
220
+ trans_matrix[12,6]=1
221
+ '''
222
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
223
+
224
+ tokenizer = AutoTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
225
+
226
+
227
+
228
+ net = getattr(resnet, 'resnet152')()
229
+ net.load_state_dict(torch.load(os.path.join(args.resnet_root, 'resnet152.pth')))
230
+ encoder = myResnet(net, args.fine_tune_cnn, device)
231
+
232
+
233
+ output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
234
+ # output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
235
+ output_encoder_file = os.path.join(args.output_dir, "pytorch_encoder.bin")
236
+
237
+ temp = args.temp
238
+ temp_lamb = args.temp_lamb
239
+ lamb = args.lamb
240
+ negative_rate = args.negative_rate
241
+ # # loadmodel
242
+ # model = UMT.from_pretrained(args.bert_model,
243
+ # cache_dir=args.cache_dir, layer_num1=args.layer_num1,
244
+ # layer_num2=args.layer_num2,
245
+ # layer_num3=args.layer_num3,
246
+ # num_labels_=num_labels, auxnum_labels=auxnum_labels)
247
+ # model.load_state_dict(torch.load(output_model_file,map_location=torch.device('cpu')))
248
+ # model.to(device)
249
+ # encoder_state_dict = torch.load(output_encoder_file,map_location=torch.device('cpu'))
250
+ # encoder.load_state_dict(encoder_state_dict)
251
+ # encoder.to(device)
252
+ # print(model)
253
+
254
+ def load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels):
255
+ model = UMT.from_pretrained(args.bert_model,
256
+ cache_dir=args.cache_dir, layer_num1=args.layer_num1,
257
+ layer_num2=args.layer_num2,
258
+ layer_num3=args.layer_num3,
259
+ num_labels_=num_labels, auxnum_labels=auxnum_labels)
260
+ model.load_state_dict(torch.load(output_model_file, map_location=torch.device('cpu')))
261
+ model.to(device)
262
+ encoder_state_dict = torch.load(output_encoder_file, map_location=torch.device('cpu'))
263
+ encoder.load_state_dict(encoder_state_dict)
264
+ encoder.to(device)
265
+ return model, encoder
266
+
267
+ model_umt,encoder_umt=load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels)
268
+ #
269
+ # # sentence = 'Thương biết_mấy những Thuận, những Liên, những Luận, Xuân, Nghĩa mỗi người một hoàn_cảnh nhưng đều rất giống nhau: rất ham học, rất cố_gắng để đạt mức hiểu biết cao nhất.'
270
+ # # image_path = '/kaggle/working/data/014715.jpg'
271
+ # # # crop_size = 224'
272
+ path_image='E:\demo_datn\pythonProject1\Model\MultimodelNER\VLSP2016\Image'
273
+ trans_matrix = np.zeros((auxnum_labels,num_labels), dtype=float)
274
+ trans_matrix[0,0]=1 # pad to pad
275
+ trans_matrix[1,1]=1 # O to O
276
+ trans_matrix[2,2]=0.25 # B to B-MISC
277
+ trans_matrix[2,4]=0.25 # B to B-PER
278
+ trans_matrix[2,6]=0.25 # B to B-ORG
279
+ trans_matrix[2,8]=0.25 # B to B-LOC
280
+ trans_matrix[3,3]=0.25 # I to I-MISC
281
+ trans_matrix[3,5]=0.25 # I to I-PER
282
+ trans_matrix[3,7]=0.25 # I to I-ORG
283
+ trans_matrix[3,9]=0.25 # I to I-LOC
284
+ trans_matrix[4,10]=1 # X to X
285
+ trans_matrix[5,11]=1 # [CLS] to [CLS]
286
+ trans_matrix[6,12]=1 # [SE
287
+ path_image='E:\demo_datn\pythonProject1\Model\MultimodelNER\VLSP2016\Image'
288
+
289
+ def predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix):
290
+
291
+ features = convert_mm_examples_to_features_predict(eval_examples, 256, tokenizer, 224,path_image)
292
+
293
+ input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
294
+ input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
295
+ added_input_mask = torch.tensor([f.added_input_mask for f in features], dtype=torch.long)
296
+ segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
297
+ img_feats = torch.stack([f.img_feat for f in features])
298
+ print(img_feats)
299
+ eval_data = TensorDataset(input_ids, input_mask, added_input_mask, segment_ids, img_feats)
300
+ eval_sampler = SequentialSampler(eval_data)
301
+ eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=16)
302
+
303
+ model_umt.eval()
304
+ encoder_umt.eval()
305
+
306
+ y_pred = []
307
+ label_map = {i: label for i, label in enumerate(label_list, 1)}
308
+ label_map[0] = "<pad>"
309
+
310
+ for input_ids, input_mask, added_input_mask, segment_ids, img_feats in tqdm(eval_dataloader, desc="Evaluating"):
311
+ input_ids = input_ids.to(device)
312
+ input_mask = input_mask.to(device)
313
+ added_input_mask = added_input_mask.to(device)
314
+ segment_ids = segment_ids.to(device)
315
+ img_feats = img_feats.to(device)
316
+
317
+ with torch.no_grad():
318
+ imgs_f, img_mean, img_att = encoder_umt(img_feats)
319
+ predicted_label_seq_ids = model_umt(input_ids, segment_ids, input_mask, added_input_mask, img_att,
320
+ trans_matrix)
321
+
322
+ logits = predicted_label_seq_ids
323
+ input_mask = input_mask.to('cpu').numpy()
324
+
325
+ for i, mask in enumerate(input_mask):
326
+ temp_1 = []
327
+ for j, m in enumerate(mask):
328
+ if j == 0:
329
+ continue
330
+ if m:
331
+ if label_map[logits[i][j]] not in ["<pad>", "<s>", "</s>", "X"]:
332
+ temp_1.append(label_map[logits[i][j]])
333
+ else:
334
+ break
335
+ y_pred.append(temp_1)
336
+
337
+ a = eval_examples[0].text_a.split(" ")
338
+
339
+ return y_pred, a
340
+
341
+ eval_examples = get_test_examples_predict('E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/')
342
+ y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix)
343
+ print(y_pred)
344
+ formatted_output = format_predictions(a, y_pred[0])
345
+ print(formatted_output)
346
+ final= process_predictions(formatted_output)
347
+ final2= combine_entities(final)
348
+ final3= remove_B_prefix(final2)
349
+ final4=combine_i_tags(final3)
350
+
351
+ print(final4)
352
+
Model/MultimodelNER/VLSP2021/Filetxt/test.txt ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IMGID:taybannha
2
+ Dân
3
+ trí
4
+ Chức
5
+
6
+ địch
7
+ Euro
8
+ 2008
9
+ đầy
10
+ thuyết
11
+ phục
12
+ của
13
+ Tây
14
+ Ban
15
+ Nha
16
+ trên
17
+ đất
18
+ Áo
19
+
20
+ Thụy
21
+
22
+ đã
23
+ mở
24
+ ra
25
+ kỷ
26
+ nguyên
27
+ vinh
28
+ quanh
29
+ của
30
+ La
31
+ Furia
32
+ Roja
33
+ ,
34
+ với
35
+ lối
36
+ chơi
37
+ tiqui
38
+ taka
39
+ đầy
40
+ biến
41
+ ảo
42
+ .
43
+ Trong
44
+ quá
45
+ khứ
46
+ ,
47
+ Tây
48
+ Ban
49
+ Nha
50
+ nổi
51
+ tiếng
52
+ với
53
+ biệt
54
+ danh
55
+ Vua
56
+ vòng
57
+ loại
58
+ .
59
+ Họ
60
+ thường
61
+ thi
62
+ đấu
63
+ rất
64
+ tốt
65
+
66
+ vòng
67
+ loại
68
+ nhưng
69
+ lại
70
+ chưa
71
+ bao
72
+ giờ
73
+ chứng
74
+ minh
75
+ được
76
+ sức
77
+ mạnh
78
+
79
+ vòng
80
+ chung
81
+ kết
82
+ giải
83
+ đấu
84
+ lớn
85
+ .
86
+ Lần
87
+ duy
88
+ nhất
89
+ họ
90
+ lên
91
+ ngôi
92
+
93
+
94
+ kỳ
95
+ Euro
96
+ 1964
97
+ .
Model/MultimodelNER/VLSP2021/Image/taybannha.jpg ADDED
Model/MultimodelNER/VLSP2021/MNER_2021.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from spacy import displacy
3
+ from Model.NER.VLSP2021.Predict_Ner import ViTagger
4
+ import re
5
+ from thunghiemxuly import save_uploaded_image,convert_text_to_txt,add_string_to_txt
6
+
7
+ import os
8
+ from transformers import AutoTokenizer, BertConfig
9
+ from Model.MultimodelNER.VLSP2021.train_umt_2021 import load_model,predict
10
+ from Model.MultimodelNER.Ner_processing import format_predictions,process_predictions,combine_entities,remove_B_prefix,combine_i_tags
11
+
12
+ from Model.MultimodelNER.predict import get_test_examples_predict
13
+ from Model.MultimodelNER import resnet as resnet
14
+ from Model.MultimodelNER.resnet_utils import myResnet
15
+ import torch
16
+ import numpy as np
17
+ from Model.MultimodelNER.VLSP2021.dataset_roberta import MNERProcessor_2021
18
+
19
+
20
+ CONFIG_NAME = 'bert_config.json'
21
+ WEIGHTS_NAME = 'pytorch_model.bin'
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+
24
+
25
+ net = getattr(resnet, 'resnet152')()
26
+ net.load_state_dict(torch.load(os.path.join('E:/demo_datn/pythonProject1/Model/Resnet/', 'resnet152.pth')))
27
+ encoder = myResnet(net, True, device)
28
+ def process_text(text):
29
+ # Loại bỏ dấu cách thừa và dấu cách ở đầu và cuối văn bản
30
+ processed_text = re.sub(r'\s+', ' ', text.strip())
31
+ return processed_text
32
+
33
+
34
+
35
+ def show_mner_2021():
36
+ multimodal_text = st.text_area("Enter your text for MNER:", height=300)
37
+ multimodal_text = process_text(multimodal_text) # Xử lý văn bản
38
+ image = st.file_uploader("Upload an image (only jpg):", type=["jpg"])
39
+ if st.button("Process Multimodal NER"):
40
+ save_image = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Image'
41
+ save_txt = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Filetxt/test.txt'
42
+ image_name = image.name
43
+ save_uploaded_image(image, save_image)
44
+ convert_text_to_txt(multimodal_text, save_txt)
45
+ add_string_to_txt(image_name, save_txt)
46
+ st.image(image, caption="Uploaded Image", use_column_width=True)
47
+
48
+ bert_model = 'vinai/phobert-base-v2'
49
+ output_dir = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/best_model'
50
+ output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
51
+ output_encoder_file = os.path.join(output_dir, "pytorch_encoder.bin")
52
+ processor = MNERProcessor_2021()
53
+ label_list = processor.get_labels()
54
+ auxlabel_list = processor.get_auxlabels()
55
+ num_labels = len(label_list) + 1
56
+ auxnum_labels = len(auxlabel_list) + 1
57
+ trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
58
+ trans_matrix[0, 0] = 1 # pad to pad
59
+ trans_matrix[1, 1] = 1 # O to O
60
+ trans_matrix[2, 2] = 0.25 # B to B-MISC
61
+ trans_matrix[2, 4] = 0.25 # B to B-PER
62
+ trans_matrix[2, 6] = 0.25 # B to B-ORG
63
+ trans_matrix[2, 8] = 0.25 # B to B-LOC
64
+ trans_matrix[3, 3] = 0.25 # I to I-MISC
65
+ trans_matrix[3, 5] = 0.25 # I to I-PER
66
+ trans_matrix[3, 7] = 0.25 # I to I-ORG
67
+ trans_matrix[3, 9] = 0.25 # I to I-LOC
68
+ trans_matrix[4, 10] = 1 # X to X
69
+ trans_matrix[5, 11] = 1 # [CLS] to [CLS]
70
+ trans_matrix[6, 12] = 1
71
+ tokenizer = AutoTokenizer.from_pretrained(bert_model, do_lower_case=False)
72
+ model_umt, encoder_umt = load_model(output_model_file, output_encoder_file, encoder, num_labels,
73
+ auxnum_labels)
74
+ eval_examples = get_test_examples_predict(
75
+ 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Filetxt/')
76
+
77
+ y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device, save_image, trans_matrix)
78
+ formatted_output = format_predictions(a, y_pred[0])
79
+ final = process_predictions(formatted_output)
80
+ final2 = combine_entities(final)
81
+ final3 = remove_B_prefix(final2)
82
+ final4 = combine_i_tags(final3)
83
+
84
+ words_and_labels = final4
85
+ # Tạo danh sách từ
86
+ words = [word for word, _ in words_and_labels]
87
+ # Tạo danh sách thực thể và nhãn cho mỗi từ, loại bỏ nhãn 'O'
88
+ entities = [{'start': sum(len(word) + 1 for word, _ in words_and_labels[:i]),
89
+ 'end': sum(len(word) + 1 for word, _ in words_and_labels[:i + 1]), 'label': label} for
90
+ i, (word, label)
91
+ in enumerate(words_and_labels) if label != 'O']
92
+ # print(entities)
93
+
94
+ # Render the visualization without color for 'O' labels
95
+ html = displacy.render(
96
+ {"text": " ".join(words), "ents": entities, "title": None},
97
+ style="ent",
98
+ manual=True,
99
+ options={"colors": {"DATETIME-DATERANGE": "#66c2ff",
100
+ "LOCATION-GPE": "#ffcc99",
101
+ "O": None, # Màu cho nhãn 'O'
102
+ "QUANTITY-NUM": "#ffdf80",
103
+ "EVENT-CUL": "#bfbfbf",
104
+ "DATETIME": "#80ff80",
105
+ "PERSONTYPE": "#ff80ff",
106
+ "PERSON": "#bf80ff",
107
+ "QUANTITY-PER": "#80cccc",
108
+ "ORGANIZATION": "#ff6666",
109
+ "LOCATION-GEO": "#66cc66",
110
+ "LOCATION-STRUC": "#cccc66",
111
+ "PRODUCT-COM": "#ffff66",
112
+ "DATETIME-DATE": "#66cccc",
113
+ "QUANTITY-DIM": "#6666ff",
114
+ "PRODUCT": "#cc6666",
115
+ "QUANTITY": "#6666cc",
116
+ "DATETIME-DURATION": "#9966ff",
117
+ "QUANTITY-CUR": "#ff9966",
118
+ "DATETIME-TIME": "#cdbf93",
119
+ "QUANTITY-TEM": "#cc9966",
120
+ "DATETIME-TIMERANGE": "#cc8566",
121
+ "EVENT-GAMESHOW": "#8c8c5a",
122
+ "QUANTITY-AGE": "#70db70",
123
+ "QUANTITY-ORD": "#e699ff",
124
+ "PRODUCT-LEGAL": "#806699",
125
+ "LOCATION": "#993366",
126
+ "ORGANIZATION-MED": "#339933",
127
+ "URL": "#ff4d4d",
128
+ "PHONENUMBER": "#99cc99",
129
+ "ORGANIZATION-SPORTS": "#6666ff",
130
+ "EVENT-SPORT": "#ffff80",
131
+ "SKILL": "#b38f66",
132
+ "EVENT-NATURAL": "#ff9966",
133
+ "ADDRESS": "#cc9966",
134
+ "IP": "#b38f66",
135
+ "EMAIL": "#cc8566",
136
+ "ORGANIZATION-STOCK": "#666633",
137
+ "DATETIME-SET": "#70db70",
138
+ "PRODUCT-AWARD": "#e699ff",
139
+ "MISCELLANEOUS": "#806699",
140
+ "LOCATION-GPE-GEO": "#99ffff"}}
141
+ )
142
+ # print(html)
143
+ st.markdown(html, unsafe_allow_html=True)
144
+
145
+ # Sử dụng widget st.html để hiển thị HTML
146
+
147
+ # Hiển thị văn bản đã nhập
148
+ # st.write("Văn bản đã nhập:", text)
149
+
150
+
151
+ ###Ví dụ 1 : Một trận hỗn chiến đã xảy ra tại trận đấu khúc côn cầu giữa Penguins và Islanders ở Mỹ (image:penguin)
Model/MultimodelNER/VLSP2021/__pycache__/MNER_2021.cpython-39.pyc ADDED
Binary file (5.34 kB). View file
 
Model/MultimodelNER/VLSP2021/__pycache__/dataset_roberta.cpython-39.pyc ADDED
Binary file (10.7 kB). View file
 
Model/MultimodelNER/VLSP2021/__pycache__/train_umt_2021.cpython-39.pyc ADDED
Binary file (8.82 kB). View file
 
Model/MultimodelNER/VLSP2021/best_model/bert_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vinai/phobert-base-v2",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 258,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "tokenizer_class": "PhobertTokenizer",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.35.2",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 64001
28
+ }
Model/MultimodelNER/VLSP2021/best_model/eval_results.txt ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ /s> 0.0000 0.0000 0.0000 0
4
+ ADDRESS 0.0455 0.0455 0.0455 22
5
+ DATETIME 0.4883 0.6221 0.5472 606
6
+ DATETIME-DATE 0.6016 0.5204 0.5581 563
7
+ DATETIME-DATERANGE 0.3426 0.2701 0.3020 137
8
+ DATETIME-DURATION 0.7900 0.6337 0.7033 475
9
+ DATETIME-SET 0.0000 0.0000 0.0000 4
10
+ DATETIME-TIME 0.3291 0.5200 0.4031 50
11
+ DATETIME-TIMERANGE 0.2540 0.1231 0.1658 130
12
+ EMAIL 1.0000 1.0000 1.0000 2
13
+ EVENT 0.1687 0.0782 0.1069 179
14
+ EVENT-CUL 0.5000 0.4375 0.4667 16
15
+ EVENT-GAMESHOW 0.4085 0.5370 0.4640 54
16
+ EVENT-NATURAL 0.0000 0.0000 0.0000 9
17
+ EVENT-SPORT 0.5634 0.5517 0.5575 145
18
+ IP 1.0000 1.0000 1.0000 15
19
+ LOCATION 0.1120 0.2668 0.1578 431
20
+ LOCATION-GEO 0.5556 0.3333 0.4167 120
21
+ LOCATION-GPE 0.7486 0.6113 0.6730 2367
22
+ LOCATION-STRUC 0.5286 0.5248 0.5267 141
23
+ MISCELLANEOUS 0.0000 0.0000 0.0000 0
24
+ ORGANIZATION 0.6576 0.7000 0.6782 1630
25
+ ORGANIZATION-MED 0.5395 0.6833 0.6029 120
26
+ ORGANIZATION-SPORTS 0.6362 0.7827 0.7019 382
27
+ ORGANIZATION-STOCK 0.0000 0.0000 0.0000 29
28
+ PERSON 0.9347 0.9574 0.9459 2466
29
+ PERSONTYPE 0.5262 0.6104 0.5652 806
30
+ PHONENUMBER 0.7273 0.8000 0.7619 10
31
+ PRODUCT 0.4419 0.3834 0.4106 446
32
+ PRODUCT-AWARD 0.0000 0.0000 0.0000 78
33
+ PRODUCT-COM 0.3488 0.5068 0.4132 148
34
+ PRODUCT-LEGAL 0.4107 0.1322 0.2000 174
35
+ QUANTITY 0.0756 0.3464 0.1241 153
36
+ QUANTITY-AGE 0.8433 0.9187 0.8794 246
37
+ QUANTITY-CUR 0.8256 0.8367 0.8311 447
38
+ QUANTITY-DIM 0.5763 0.2787 0.3757 244
39
+ QUANTITY-NUM 0.8569 0.5573 0.6754 2182
40
+ QUANTITY-ORD 0.5388 0.2907 0.3777 454
41
+ QUANTITY-PER 0.9088 0.8911 0.8999 358
42
+ QUANTITY-TEM 0.6923 0.8182 0.7500 11
43
+ SKILL 0.0000 0.0000 0.0000 2
44
+ URL 0.5714 0.5714 0.5714 7
45
+ _ 0.0000 0.0000 0.0000 0
46
+
47
+ micro avg 0.6371 0.6260 0.6315 15859
48
+ macro avg 0.4546 0.4451 0.4386 15859
49
+ weighted avg 0.6838 0.6260 0.6430 15859
50
+ Overall: 0.6368360277136259 0.6259537171322278 0.6313479823194582
Model/MultimodelNER/VLSP2021/best_model/model_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bert_model": "vinai/phobert-base-v2", "do_lower": false, "max_seq_length": 256, "num_labels": 89, "label_map": {"1": "O", "2": "I-PRODUCT-AWARD", "3": "B-MISCELLANEOUS", "4": "B-QUANTITY-NUM", "5": "B-ORGANIZATION-SPORTS", "6": "B-DATETIME", "7": "I-ADDRESS", "8": "I-PERSON", "9": "I-EVENT-SPORT", "10": "B-ADDRESS", "11": "B-EVENT-NATURAL", "12": "I-LOCATION-GPE", "13": "B-EVENT-GAMESHOW", "14": "B-DATETIME-TIMERANGE", "15": "I-QUANTITY-NUM", "16": "I-QUANTITY-AGE", "17": "B-EVENT-CUL", "18": "I-QUANTITY-TEM", "19": "I-PRODUCT-LEGAL", "20": "I-LOCATION-STRUC", "21": "I-ORGANIZATION", "22": "B-PHONENUMBER", "23": "B-IP", "24": "B-QUANTITY-AGE", "25": "I-DATETIME-TIME", "26": "I-DATETIME", "27": "B-ORGANIZATION-MED", "28": "B-DATETIME-SET", "29": "I-EVENT-CUL", "30": "B-QUANTITY-DIM", "31": "I-QUANTITY-DIM", "32": "B-EVENT", "33": "B-DATETIME-DATERANGE", "34": "I-EVENT-GAMESHOW", "35": "B-PRODUCT-AWARD", "36": "B-LOCATION-STRUC", "37": "B-LOCATION", "38": "B-PRODUCT", "39": "I-MISCELLANEOUS", "40": "B-SKILL", "41": "I-QUANTITY-ORD", "42": "I-ORGANIZATION-STOCK", "43": "I-LOCATION-GEO", "44": "B-PERSON", "45": "B-PRODUCT-COM", "46": "B-PRODUCT-LEGAL", "47": "I-LOCATION", "48": "B-QUANTITY-TEM", "49": "I-PRODUCT", "50": "B-QUANTITY-CUR", "51": "I-QUANTITY-CUR", "52": "B-LOCATION-GPE", "53": "I-PHONENUMBER", "54": "I-ORGANIZATION-MED", "55": "I-EVENT-NATURAL", "56": "I-EMAIL", "57": "B-ORGANIZATION", "58": "B-URL", "59": "I-DATETIME-TIMERANGE", "60": "I-QUANTITY", "61": "I-IP", "62": "B-EVENT-SPORT", "63": "B-PERSONTYPE", "64": "B-QUANTITY-PER", "65": "I-QUANTITY-PER", "66": "I-PRODUCT-COM", "67": "I-DATETIME-DURATION", "68": "B-LOCATION-GPE-GEO", "69": "B-QUANTITY-ORD", "70": "I-EVENT", "71": "B-DATETIME-TIME", "72": "B-QUANTITY", "73": "I-DATETIME-SET", "74": "I-LOCATION-GPE-GEO", "75": "B-ORGANIZATION-STOCK", "76": "I-ORGANIZATION-SPORTS", "77": "I-SKILL", "78": "I-URL", "79": "B-DATETIME-DURATION", "80": "I-DATETIME-DATE", "81": "I-PERSONTYPE", "82": "B-DATETIME-DATE", "83": "I-DATETIME-DATERANGE", "84": "B-LOCATION-GEO", "85": "B-EMAIL", "86": "X", "87": "<s>", "88": "</s>"}}
Model/MultimodelNER/VLSP2021/best_model/mtmner_pred.txt ADDED
The diff for this file is too large to render. See raw diff
 
Model/MultimodelNER/VLSP2021/best_model/pytorch_encoder.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9314f35060b4df32a623855c3e6a665cdaee354eeb5cd4925085fc7b00cc180c
3
+ size 241699561
Model/MultimodelNER/VLSP2021/best_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd439026a1a847a3c53a011acff0ca41b734b4084fd801d99c5e1ba962358d20
3
+ size 699784873