Spaces:

Linhz
/

ViMNer

Runtime error

App Files Files Community

Linhz commited on Jun 9, 2024

Commit

fd07025

verified ·

1 Parent(s): 106800c

Upload 80 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Model/MultimodelNER/Ner_processing.py +95 -0
Model/MultimodelNER/UMT.py +290 -0
Model/MultimodelNER/VLSP2016/Filetxt/list.txt +106 -0
Model/MultimodelNER/VLSP2016/Filetxt/output.txt +6 -0
Model/MultimodelNER/VLSP2016/Filetxt/test.txt +97 -0
Model/MultimodelNER/VLSP2016/Image/014716.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/My model.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/bully.jpeg +0 -0
Model/MultimodelNER/VLSP2016/Image/bully.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/maria.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/penguin.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/pero.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/pero2.jpg +0 -0
Model/MultimodelNER/VLSP2016/Image/taybannha.jpg +0 -0
Model/MultimodelNER/VLSP2016/MNER_2016.py +106 -0
Model/MultimodelNER/VLSP2016/__pycache__/MNER_2016.cpython-39.pyc +0 -0
Model/MultimodelNER/VLSP2016/__pycache__/dataset_roberta.cpython-39.pyc +0 -0
Model/MultimodelNER/VLSP2016/__pycache__/train_umt_2016.cpython-39.pyc +0 -0
Model/MultimodelNER/VLSP2016/best_model/bert_config.json +28 -0
Model/MultimodelNER/VLSP2016/best_model/eval_results.txt +11 -0
Model/MultimodelNER/VLSP2016/best_model/model_config.json +1 -0
Model/MultimodelNER/VLSP2016/best_model/mtmner_pred.txt +0 -0
Model/MultimodelNER/VLSP2016/best_model/pytorch_encoder.bin +3 -0
Model/MultimodelNER/VLSP2016/best_model/pytorch_model.bin +3 -0
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors +3 -0
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json +0 -0
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/refs/main +1 -0
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json +27 -0
Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin +3 -0
Model/MultimodelNER/VLSP2016/dataset_roberta.py +452 -0
Model/MultimodelNER/VLSP2016/list.txt +5 -0
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors +3 -0
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json +0 -0
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/refs/main +1 -0
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json +27 -0
Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin +3 -0
Model/MultimodelNER/VLSP2016/test.txt +78 -0
Model/MultimodelNER/VLSP2016/train_umt_2016.py +352 -0
Model/MultimodelNER/VLSP2021/Filetxt/test.txt +97 -0
Model/MultimodelNER/VLSP2021/Image/taybannha.jpg +0 -0
Model/MultimodelNER/VLSP2021/MNER_2021.py +151 -0
Model/MultimodelNER/VLSP2021/__pycache__/MNER_2021.cpython-39.pyc +0 -0
Model/MultimodelNER/VLSP2021/__pycache__/dataset_roberta.cpython-39.pyc +0 -0
Model/MultimodelNER/VLSP2021/__pycache__/train_umt_2021.cpython-39.pyc +0 -0
Model/MultimodelNER/VLSP2021/best_model/bert_config.json +28 -0
Model/MultimodelNER/VLSP2021/best_model/eval_results.txt +50 -0
Model/MultimodelNER/VLSP2021/best_model/model_config.json +1 -0
Model/MultimodelNER/VLSP2021/best_model/mtmner_pred.txt +0 -0
Model/MultimodelNER/VLSP2021/best_model/pytorch_encoder.bin +3 -0
Model/MultimodelNER/VLSP2021/best_model/pytorch_model.bin +3 -0

Model/MultimodelNER/Ner_processing.py ADDED Viewed

	@@ -0,0 +1,95 @@

+def format_predictions(words, predictions):
+    '''
+    Chuyển đổi danh sách từ và dự đoán sang định dạng (word, label)
+    '''
+    formatted = []
+    for word, label in zip(words, predictions):
+        formatted.append((word, label))
+    return formatted
+def process_predictions(predictions):
+    '''
+    Tách các từ có dấu gạch dưới thành các từ riêng biệt với cùng nhãn
+    '''
+    formatted = []
+    for word, label in predictions:
+        if '_' in word:
+            formatted.append((word.replace('_', ' '), label))
+        else:
+            formatted.append((word, label))
+    return formatted
+def combine_entities(predictions):
+    combined = []
+    temp_entity = []
+    temp_label = None
+    for word, label in predictions:
+        if label.startswith('B-'):
+            if temp_entity:
+                combined.append((' '.join(temp_entity), temp_label))
+                temp_entity = []
+            temp_entity.append(word)
+            temp_label = label
+        elif label.startswith('I-') and temp_label and label[2:] == temp_label[2:]:
+            temp_entity.append(word)
+        else:
+            if temp_entity:
+                combined.append((' '.join(temp_entity), temp_label))
+                temp_entity = []
+                temp_label = None
+            combined.append((word, label))
+    if temp_entity:
+        combined.append((' '.join(temp_entity), temp_label))
+    return combined
+def remove_B_prefix(entities):
+    modified_entities = []
+    for word, label in entities:
+        if label.startswith('B-'):
+            label = label[2:]  # Loại bỏ phần 'B-' khỏi nhãn
+        modified_entities.append((word, label))
+    return modified_entities
+def combine_i_tags(tokens_labels):
+    combined = []
+    current_combination = []
+    current_label = None
+    for token, label in tokens_labels:
+        if label.startswith('I-'):
+            label = label[2:]  # Remove the 'I-' prefix
+            if current_label is None:
+                current_label = label
+                current_combination.append(token)
+            elif current_label == label:
+                current_combination.append(token)
+            else:
+                combined.append((' '.join(current_combination), current_label))
+                current_combination = [token]
+                current_label = label
+        else:
+            if current_combination:
+                combined.append((' '.join(current_combination), current_label))
+                current_combination = []
+                current_label = None
+            combined.append((token, label))
+    if current_combination:
+        combined.append((' '.join(current_combination), current_label))
+    return combined
+tokens_labels = [('Dân', 'O'), ('trí', 'O'), ('Chức', 'O'), ('vô', 'O'), ('địch', 'O'), ('Euro 2008', 'EVENT-SPORT'), ('đầy', 'O'), ('thuyết', 'O'), ('phục', 'O'), ('của', 'O'), ('Tây Ban Nha', 'LOCATION'), ('trên', 'O'), ('đất', 'O'), ('Áo', 'LOCATION'), ('và', 'O'), ('Thụy Sĩ', 'PERSON'), ('đã', 'O'), ('mở', 'O'), ('ra', 'O'), ('kỷ', 'O'), ('nguyên', 'O'), ('vinh', 'O'), ('quanh', 'O'), ('của', 'O'), ('La', 'ORGANIZATION'), ('Furia', 'I-ORGANIZATION-SPORTS'), ('Roja', 'I-ORGANIZATION-SPORTS'), (',', 'O'), ('với', 'O'), ('lối', 'O'), ('chơi', 'O'), ('tiqui', 'O'), ('taka', 'O'), ('đầy', 'O'), ('biến', 'O'), ('ảo', 'O'), ('.', 'O'), ('Trong', 'O'), ('quá', 'O'), ('khứ', 'O'), (',', 'O'), ('Tây Ban Nha', 'LOCATION'), ('nổi', 'O'), ('tiếng', 'O'), ('với', 'O'), ('biệt', 'O'), ('danh', 'O'), ('Vua', 'O'), ('vòng', 'O'), ('loại', 'O'), ('.', 'O'), ('Họ', 'O'), ('thường', 'O'), ('thi', 'O'), ('đấu', 'O'), ('rất', 'O'), ('tốt', 'O'), ('ở', 'O'), ('vòng', 'O'), ('loại', 'O'), ('nhưng', 'O'), ('lại', 'O'), ('chưa', 'O'), ('bao', 'O'), ('giờ', 'O'), ('chứng', 'O'), ('minh', 'O'), ('được', 'O'), ('sức', 'O'), ('mạnh', 'O'), ('ở', 'O'), ('vòng', 'O'), ('chung', 'O'), ('kết', 'O'), ('giải', 'O'), ('đấu', 'O'), ('lớn', 'O'), ('.', 'O'), ('Lần', 'O'), ('duy', 'O'), ('nhất', 'O'), ('họ', 'O'), ('lên', 'O'), ('ngôi', 'O'), ('là', 'O'), ('ở', 'O'), ('kỳ', 'O'), ('Euro', 'EVENT-SPORT'), ('1964', 'O'), ('.', 'O')]
+combined_tokens_labels = combine_i_tags(tokens_labels)
+print(combined_tokens_labels)

Model/MultimodelNER/UMT.py ADDED Viewed

	@@ -0,0 +1,290 @@

+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch BERT model."""
+from __future__ import absolute_import, division, print_function, unicode_literals
+import copy
+import json
+import logging
+import math
+import os
+import shutil
+import tarfile
+import tempfile
+import sys
+from io import open
+from torchcrf import CRF
+import torch
+from torch import nn
+from torch.nn import CrossEntropyLoss
+import torch.nn.functional as F
+from torch.autograd import Variable
+logger = logging.getLogger(__name__)
+def gelu(x):
+    """Implementation of the gelu activation function.
+        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
+        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
+        Also see https://arxiv.org/abs/1606.08415
+    """
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+def swish(x):
+    return x * torch.sigmoid(x)
+ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
+from transformers import RobertaModel
+from transformers.models.roberta.modeling_roberta import RobertaLayer, RobertaPreTrainedModel, RobertaOutput, \
+    RobertaSelfOutput, RobertaIntermediate
+class RobertaSelfEncoder(nn.Module):
+    def __init__(self, config):
+        super(RobertaSelfEncoder, self).__init__()
+        layer = RobertaLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(1)])
+    def forward(self, hidden_states, attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(hidden_states)
+        return all_encoder_layers
+class RobertaCrossEncoder(nn.Module):
+    def __init__(self, config, layer_num):
+        super(RobertaCrossEncoder, self).__init__()
+        layer = RobertaCrossAttentionLayer(config)
+        self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(layer_num)])
+    def forward(self, s1_hidden_states, s2_hidden_states, s2_attention_mask, output_all_encoded_layers=True):
+        all_encoder_layers = []
+        for layer_module in self.layer:
+            s1_hidden_states = layer_module(s1_hidden_states, s2_hidden_states, s2_attention_mask)
+            if output_all_encoded_layers:
+                all_encoder_layers.append(s1_hidden_states)
+        if not output_all_encoded_layers:
+            all_encoder_layers.append(s1_hidden_states)
+        return all_encoder_layers
+class RobertaCoAttention(nn.Module):
+    def __init__(self, config):
+        super(RobertaCoAttention, self).__init__()
+        if config.hidden_size % config.num_attention_heads != 0:
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads))
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def forward(self, s1_hidden_states, s2_hidden_states, s2_attention_mask):
+        mixed_query_layer = self.query(s1_hidden_states)
+        mixed_key_layer = self.key(s2_hidden_states)
+        mixed_value_layer = self.value(s2_hidden_states)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+        attention_scores = attention_scores + s2_attention_mask
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+class RobertaCrossAttention(nn.Module):
+    def __init__(self, config):
+        super(RobertaCrossAttention, self).__init__()
+        self.self = RobertaCoAttention(config)
+        self.output = RobertaSelfOutput(config)
+    def forward(self, s1_input_tensor, s2_input_tensor, s2_attention_mask):
+        s1_cross_output = self.self(s1_input_tensor, s2_input_tensor, s2_attention_mask)
+        attention_output = self.output(s1_cross_output, s1_input_tensor)
+        return attention_output
+class RobertaCrossAttentionLayer(nn.Module):
+    def __init__(self, config):
+        super(RobertaCrossAttentionLayer, self).__init__()
+        self.attention = RobertaCrossAttention(config)
+        self.intermediate = RobertaIntermediate(config)
+        self.output = RobertaOutput(config)
+    def forward(self, s1_hidden_states, s2_hidden_states, s2_attention_mask):
+        attention_output = self.attention(s1_hidden_states, s2_hidden_states, s2_attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+class UMT(RobertaPreTrainedModel):
+    """Coupled Cross-Modal Attention BERT model for token-level classification with CRF on top.
+    """
+    def __init__(self, config, layer_num1=1, layer_num2=1, layer_num3=1, num_labels_=2, auxnum_labels=2):
+        super(UMT, self).__init__(config)
+        self.num_labels = num_labels_
+        self.roberta = RobertaModel(config)
+        # self.trans_matrix = torch.zeros(num_labels, auxnum_labels)
+        self.self_attention = RobertaSelfEncoder(config)
+        self.self_attention_v2 = RobertaSelfEncoder(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.vismap2text = nn.Linear(2048, config.hidden_size)
+        self.vismap2text_v2 = nn.Linear(2048, config.hidden_size)
+        self.txt2img_attention = RobertaCrossEncoder(config, layer_num1)
+        self.img2txt_attention = RobertaCrossEncoder(config, layer_num2)
+        self.txt2txt_attention = RobertaCrossEncoder(config, layer_num3)
+        self.gate = nn.Linear(config.hidden_size * 2, config.hidden_size)
+        ### self.self_attention = BertLastSelfAttention(config)
+        self.classifier = nn.Linear(config.hidden_size * 2, num_labels_)
+        self.aux_classifier = nn.Linear(config.hidden_size, auxnum_labels)
+        self.crf = CRF(num_labels_, batch_first=True)
+        self.aux_crf = CRF(auxnum_labels, batch_first=True)
+        self.init_weights()
+    # this forward is just for predict, not for train
+    # dont confuse this with _forward_alg above.
+    def forward(self, input_ids, segment_ids, input_mask, added_attention_mask, visual_embeds_att, trans_matrix,
+                labels=None, auxlabels=None):
+        # Get the emission scores from the BiLSTM
+        features = self.roberta(input_ids, token_type_ids=segment_ids,
+                                attention_mask=input_mask)  # batch_size * seq_len * hidden_size
+        sequence_output = features["last_hidden_state"]
+        sequence_output = self.dropout(sequence_output)
+        extended_txt_mask = input_mask.unsqueeze(1).unsqueeze(2)
+        extended_txt_mask = extended_txt_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+        extended_txt_mask = (1.0 - extended_txt_mask) * -10000.0
+        aux_addon_sequence_encoder = self.self_attention(sequence_output, extended_txt_mask)
+        aux_addon_sequence_output = aux_addon_sequence_encoder[-1]
+        aux_addon_sequence_output = aux_addon_sequence_output[0]
+        aux_bert_feats = self.aux_classifier(aux_addon_sequence_output)
+        #######aux_bert_feats = self.aux_classifier(sequence_output)
+        trans_matrix_tensor = torch.tensor(trans_matrix, dtype=torch.float32, device=aux_bert_feats.device)
+        trans_bert_feats = torch.matmul(aux_bert_feats, trans_matrix_tensor)
+        # trans_bert_feats = torch.matmul(aux_bert_feats, trans_matrix.float())
+        main_addon_sequence_encoder = self.self_attention_v2(sequence_output, extended_txt_mask)
+        main_addon_sequence_output = main_addon_sequence_encoder[-1]
+        main_addon_sequence_output = main_addon_sequence_output[0]
+        vis_embed_map = visual_embeds_att.view(-1, 2048, 49).permute(0, 2, 1)  # self.batch_size, 49, 2048
+        converted_vis_embed_map = self.vismap2text(vis_embed_map)  # self.batch_size, 49, hidden_dim
+        # '''
+        # apply txt2img attention mechanism to obtain image-based text representations
+        img_mask = added_attention_mask[:, :49]
+        extended_img_mask = img_mask.unsqueeze(1).unsqueeze(2)
+        extended_img_mask = extended_img_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+        extended_img_mask = (1.0 - extended_img_mask) * -10000.0
+        cross_encoder = self.txt2img_attention(main_addon_sequence_output, converted_vis_embed_map, extended_img_mask)
+        cross_output_layer = cross_encoder[-1]  # self.batch_size * text_len * hidden_dim
+        # apply img2txt attention mechanism to obtain multimodal-based text representations
+        converted_vis_embed_map_v2 = self.vismap2text_v2(vis_embed_map)  # self.batch_size, 49, hidden_dim
+        cross_txt_encoder = self.img2txt_attention(converted_vis_embed_map_v2, main_addon_sequence_output,
+                                                   extended_txt_mask)
+        cross_txt_output_layer = cross_txt_encoder[-1]  # self.batch_size * 49 * hidden_dim
+        cross_final_txt_encoder = self.txt2txt_attention(main_addon_sequence_output, cross_txt_output_layer,
+                                                         extended_img_mask)
+        ##cross_final_txt_encoder = self.txt2txt_attention(aux_addon_sequence_output, cross_txt_output_layer, extended_img_mask)
+        cross_final_txt_layer = cross_final_txt_encoder[-1]  # self.batch_size * text_len * hidden_dim
+        # cross_final_txt_layer = torch.add(cross_final_txt_layer, sequence_output)
+        # visual gate
+        merge_representation = torch.cat((cross_final_txt_layer, cross_output_layer), dim=-1)
+        gate_value = torch.sigmoid(self.gate(merge_representation))  # batch_size, text_len, hidden_dim
+        gated_converted_att_vis_embed = torch.mul(gate_value, cross_output_layer)
+        # reverse_gate_value = torch.neg(gate_value).add(1)
+        # gated_converted_att_vis_embed = torch.add(torch.mul(reverse_gate_value, cross_final_txt_layer),
+        # torch.mul(gate_value, cross_output_layer))
+        # direct concatenation
+        # gated_converted_att_vis_embed = self.dropout(gated_converted_att_vis_embed)
+        final_output = torch.cat((cross_final_txt_layer, gated_converted_att_vis_embed), dim=-1)
+        ###### final_output = self.dropout(final_output)
+        # middle_output = torch.cat((cross_final_txt_layer, gated_converted_att_vis_embed), dim=-1)
+        # final_output = torch.cat((sequence_output, middle_output), dim=-1)
+        ###### addon_sequence_output = self.self_attention(final_output, extended_txt_mask)
+        bert_feats = self.classifier(final_output)
+        alpha = 0.5
+        final_bert_feats = torch.add(torch.mul(bert_feats, alpha), torch.mul(trans_bert_feats, 1 - alpha))
+        # suggested by Hongjie
+        # bert_feats = F.log_softmax(bert_feats, dim=-1)
+        if labels is not None:
+            beta = 0.5  # 73.87(73.50) 85.37(85.00) 0.5 5e-5 #73.45 85.05 1.0 1 1 1 4e-5 # 73.63 0.1 1 1 1 5e-5 # old 0.1 2 1 1 85.23 0.2 1 1 85.04
+            ##beta = 0.6
+            aux_loss = - self.aux_crf(aux_bert_feats, auxlabels, mask=input_mask.byte(), reduction='mean')
+            main_loss = - self.crf(final_bert_feats, labels, mask=input_mask.byte(), reduction='mean')
+            loss = main_loss + beta * aux_loss
+            return loss
+        else:
+            pred_tags = self.crf.decode(final_bert_feats, mask=input_mask.byte())
+            return pred_tags

Model/MultimodelNER/VLSP2016/Filetxt/list.txt ADDED Viewed

	@@ -0,0 +1,106 @@

+IMGID:pero
+James
+và
+Shan
+đều
+nghĩ
+rằng
+gửi
+Pero
+đi
+làm
+nông
+thử
+cho
+vui
+và
+bắt
+đầu
+cho
+Pero
+du
+hí
+từ
+đầu
+tháng
+3
+năm
+nay
+.
+Ngày
+8
+4
+,
+người
+bạn
+báo
+tin
+cho
+Alan
+biết
+Pero
+mất
+tích
+dù
+họ
+tìm
+kiếm
+Pero
+ở
+rất
+nhiều
+nơi
+.
+Hai
+vợ
+chồng
+anh
+James
+đều
+nghĩ
+Pero
+đã
+mất
+.
+Tuy
+nhiên
+một
+tối
+nọ
+khi
+xuống
+thăm
+bầy
+gia
+súc
+sau
+khi
+ăn
+tối
+,
+Alan
+đã
+rất
+ngạc
+nhiên
+khi
+mở
+cửa
+và
+thấy
+Pero
+ngồi
+chờ
+ngay
+trước
+cửa
+.
+Thấy
+Alan
+,
+Pero
+mừng
+rỡ
+vô
+cùng
+.

Model/MultimodelNER/VLSP2016/Filetxt/output.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+This
+is
+some
+example
+text
+.

Model/MultimodelNER/VLSP2016/Filetxt/test.txt ADDED Viewed

	@@ -0,0 +1,97 @@

+IMGID:taybannha
+Dân
+trí
+Chức
+vô
+địch
+Euro
+2008
+đầy
+thuyết
+phục
+của
+Tây
+Ban
+Nha
+trên
+đất
+Áo
+và
+Thụy
+Sĩ
+đã
+mở
+ra
+kỷ
+nguyên
+vinh
+quanh
+của
+La
+Furia
+Roja
+,
+với
+lối
+chơi
+tiqui
+taka
+đầy
+biến
+ảo
+.
+Trong
+quá
+khứ
+,
+Tây
+Ban
+Nha
+nổi
+tiếng
+với
+biệt
+danh
+Vua
+vòng
+loại
+.
+Họ
+thường
+thi
+đấu
+rất
+tốt
+ở
+vòng
+loại
+nhưng
+lại
+chưa
+bao
+giờ
+chứng
+minh
+được
+sức
+mạnh
+ở
+vòng
+chung
+kết
+giải
+đấu
+lớn
+.
+Lần
+duy
+nhất
+họ
+lên
+ngôi
+là
+ở
+kỳ
+Euro
+1964
+.

Model/MultimodelNER/VLSP2016/Image/014716.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/My model.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/bully.jpeg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/bully.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/maria.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/penguin.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/pero.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/pero2.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/Image/taybannha.jpg ADDED Viewed

Model/MultimodelNER/VLSP2016/MNER_2016.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import streamlit as st
+from spacy import displacy
+from Model.NER.VLSP2021.Predict_Ner import ViTagger
+import re
+from thunghiemxuly import save_uploaded_image,convert_text_to_txt,add_string_to_txt
+import os
+from transformers import AutoTokenizer, BertConfig
+from Model.MultimodelNER.VLSP2016.train_umt_2016 import load_model,predict
+from Model.MultimodelNER.Ner_processing import format_predictions,process_predictions,combine_entities,remove_B_prefix,combine_i_tags
+from Model.MultimodelNER.predict import get_test_examples_predict
+from Model.MultimodelNER import resnet as resnet
+from Model.MultimodelNER.resnet_utils import myResnet
+import torch
+import numpy as np
+from Model.MultimodelNER.VLSP2016.dataset_roberta import MNERProcessor_2016
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+net = getattr(resnet, 'resnet152')()
+net.load_state_dict(torch.load(os.path.join('E:/demo_datn/pythonProject1/Model/Resnet/', 'resnet152.pth')))
+encoder = myResnet(net, True, device)
+def process_text(text):
+    # Loại bỏ dấu cách thừa và dấu cách ở đầu và cuối văn bản
+    processed_text = re.sub(r'\s+', ' ', text.strip())
+    return processed_text
+def show_mner_2016():
+    multimodal_text = st.text_area("Enter your text for MNER:", height=300)
+    multimodal_text = process_text(multimodal_text)  # Xử lý văn bản
+    image = st.file_uploader("Upload an image (only jpg):", type=["jpg"])
+    if st.button("Process Multimodal NER"):
+            save_image = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Image'
+            save_txt = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/test.txt'
+            image_name = image.name
+            save_uploaded_image(image, save_image)
+            convert_text_to_txt(multimodal_text, save_txt)
+            add_string_to_txt(image_name, save_txt)
+            st.image(image, caption="Uploaded Image", use_column_width=True)
+            bert_model='vinai/phobert-base-v2'
+            output_dir='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/best_model'
+            output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
+            output_encoder_file = os.path.join(output_dir, "pytorch_encoder.bin")
+            processor = MNERProcessor_2016()
+            label_list = processor.get_labels()
+            auxlabel_list = processor.get_auxlabels()
+            num_labels = len(label_list) + 1
+            auxnum_labels = len(auxlabel_list) + 1
+            trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
+            trans_matrix[0, 0] = 1  # pad to pad
+            trans_matrix[1, 1] = 1  # O to O
+            trans_matrix[2, 2] = 0.25  # B to B-MISC
+            trans_matrix[2, 4] = 0.25  # B to B-PER
+            trans_matrix[2, 6] = 0.25  # B to B-ORG
+            trans_matrix[2, 8] = 0.25  # B to B-LOC
+            trans_matrix[3, 3] = 0.25  # I to I-MISC
+            trans_matrix[3, 5] = 0.25  # I to I-PER
+            trans_matrix[3, 7] = 0.25  # I to I-ORG
+            trans_matrix[3, 9] = 0.25  # I to I-LOC
+            trans_matrix[4, 10] = 1  # X to X
+            trans_matrix[5, 11] = 1  # [CLS] to [CLS]
+            trans_matrix[6, 12] = 1
+            tokenizer = AutoTokenizer.from_pretrained(bert_model, do_lower_case=False)
+            model_umt, encoder_umt = load_model(output_model_file, output_encoder_file, encoder,num_labels,auxnum_labels)
+            eval_examples = get_test_examples_predict('E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/')
+            y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device,save_image,trans_matrix)
+            formatted_output = format_predictions(a, y_pred[0])
+            final = process_predictions(formatted_output)
+            final2 = combine_entities(final)
+            final3 = remove_B_prefix(final2)
+            final4 = combine_i_tags(final3)
+            words_and_labels = final4
+            # Tạo danh sách từ
+            words = [word for word, _ in words_and_labels]
+            # Tạo danh sách thực thể và nhãn cho mỗi từ, loại bỏ nhãn 'O'
+            entities = [{'start': sum(len(word) + 1 for word, _ in words_and_labels[:i]),
+                         'end': sum(len(word) + 1 for word, _ in words_and_labels[:i + 1]), 'label': label} for
+                        i, (word, label)
+                        in enumerate(words_and_labels) if label != 'O']
+            # print(entities)
+            # Render the visualization without color for 'O' labels
+            html = displacy.render(
+                {"text": " ".join(words), "ents": entities, "title": None},
+                style="ent",
+                manual=True,
+                options={"colors": {"MISC": "#806699",
+                                    "ORG": "#ff6666",
+                                    "LOC": "#66cc66",
+                                    "PER": "#bf80ff",
+                                    "O": None}}
+            )
+            # print(html)
+            st.markdown(html, unsafe_allow_html=True)
+###Ví dụ 1 : Một trận hỗn chiến đã xảy ra tại trận đấu khúc côn cầu giữa  Penguins và Islanders ở Mỹ (image:penguin)

Model/MultimodelNER/VLSP2016/__pycache__/MNER_2016.cpython-39.pyc ADDED Viewed

Binary file (4.34 kB). View file

Model/MultimodelNER/VLSP2016/__pycache__/dataset_roberta.cpython-39.pyc ADDED Viewed

Binary file (9.5 kB). View file

Model/MultimodelNER/VLSP2016/__pycache__/train_umt_2016.cpython-39.pyc ADDED Viewed

Binary file (8.82 kB). View file

Model/MultimodelNER/VLSP2016/best_model/bert_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "vinai/phobert-base-v2",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

Model/MultimodelNER/VLSP2016/best_model/eval_results.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+              precision    recall  f1-score   support
+         LOC     0.9570    0.9618    0.9594       996
+        MISC     0.9143    0.8889    0.9014        36
+         ORG     0.8129    0.7975    0.8051       158
+         PER     0.9835    0.9788    0.9812       851
+   micro avg     0.9563    0.9549    0.9556      2041
+   macro avg     0.9169    0.9068    0.9118      2041
+weighted avg     0.9561    0.9549    0.9555      2041
+Overall: 0.9563297350343474 0.9549240568348849 0.9556263790144643

Model/MultimodelNER/VLSP2016/best_model/model_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bert_model": "vinai/phobert-base-v2", "do_lower": false, "max_seq_length": 256, "num_labels": 13, "label_map": {"1": "B-ORG", "2": "B-MISC", "3": "I-PER", "4": "I-ORG", "5": "B-LOC", "6": "I-MISC", "7": "I-LOC", "8": "O", "9": "B-PER", "10": "X", "11": "<s>", "12": "</s>"}}

Model/MultimodelNER/VLSP2016/best_model/mtmner_pred.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/MultimodelNER/VLSP2016/best_model/pytorch_encoder.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab29aaf11c3beb874e34fc9bccaa1fb838d94701cf4a4189c37d768a7678e958
+size 241699561

Model/MultimodelNER/VLSP2016/best_model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c950c331c48a229744b1b727a49d3dc248f28377ba8efbd86612daf2721e4368
+size 699285929

Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json ADDED Viewed

File without changes

Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2b51e367d92093c9688112098510e6a58bab67cd

Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

Model/MultimodelNER/VLSP2016/cache/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba09eb4c244a5b3a49ad76d52d129ac085b61f5c6287de7f99508b02be589f9
+size 540322347

Model/MultimodelNER/VLSP2016/dataset_roberta.py ADDED Viewed

	@@ -0,0 +1,452 @@

+import torch
+import logging
+import os
+logger = logging.getLogger(__name__)
+from torchvision import transforms
+from PIL import Image
+class SBInputExample(object):
+    """A single training/test example for simple sequence classification."""
+    def __init__(self, guid, text_a, text_b, img_id, label=None, auxlabel=None):
+        """Constructs a InputExample.
+        Args:
+            guid: Unique id for the example.
+            text_a: string. The untokenized text of the first sequence. For single
+            sequence tasks, only this sequence must be specified.
+            text_b: (Optional) string. The untokenized text of the second sequence.
+            Only must be specified for sequence pair tasks.
+            label: (Optional) string. The label of the example. This should be
+            specified for train and dev examples, but not for test examples.
+        """
+        self.guid = guid
+        self.text_a = text_a
+        self.text_b = text_b
+        self.img_id = img_id
+        self.label = label
+        # Please note that the auxlabel is not used in SB
+        # it is just kept in order not to modify the original code
+        self.auxlabel = auxlabel
+class SBInputFeatures(object):
+    """A single set of features of data"""
+    def __init__(self, input_ids, input_mask, added_input_mask, segment_ids, img_feat, label_id, auxlabel_id):
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.added_input_mask = added_input_mask
+        self.segment_ids = segment_ids
+        self.img_feat = img_feat
+        self.label_id = label_id
+        self.auxlabel_id = auxlabel_id
+def sbreadfile(filename):
+    '''
+    Đọc dữ liệu từ tệp và trả về dưới dạng danh sách các cặp từ và nhãn, cùng với danh sách hình ảnh và nhãn phụ.
+    '''
+    print("Chuẩn bị dữ liệu cho ", filename)
+    f = open(filename, encoding='utf8')
+    data = []
+    imgs = []
+    auxlabels = []
+    sentence = []
+    label = []
+    auxlabel = []
+    imgid = ''
+    for line in f:
+        line = line.strip()  # Loại bỏ các dấu cách thừa ở đầu và cuối dòng
+        if line.startswith('IMGID:'):
+            imgid = line.split('IMGID:')[1] + '.jpg'
+            continue
+        if line == '':
+            if len(sentence) > 0:
+                data.append((sentence, label))
+                imgs.append(imgid)
+                auxlabels.append(auxlabel)
+                sentence = []
+                label = []
+                auxlabel = []
+                imgid = ''
+            continue
+        splits = line.split('\t')
+        if len(splits) == 2:  # Đảm bảo dòng có ít nhất một từ và một nhãn
+            word, cur_label = splits
+            sentence.append(word)
+            label.append(cur_label)
+            auxlabel.append(cur_label[0])  # Lấy ký tự đầu tiên của nhãn làm nhãn phụ
+    if len(sentence) > 0:  # Xử lý dữ liệu cuối cùng trong tệp
+        data.append((sentence, label))
+        imgs.append(imgid)
+        auxlabels.append(auxlabel)
+    print("Số lượng mẫu: " + str(len(data)))
+    print("Số lượng hình ảnh: " + str(len(imgs)))
+    return data, imgs, auxlabels
+# def sbreadfile(filename): #code gốc
+#     '''
+#     read file
+#     return format :
+#     [ ['EU', 'B-ORG'], ['rejects', 'O'], ['German', 'B-MISC'], ['call', 'O'], ['to', 'O'], ['boycott', 'O'], ['British', 'B-MISC'], ['lamb', 'O'], ['.', 'O'] ]
+#     '''
+#     print("prepare data for ",filename)
+#     f = open(filename,encoding='utf8')
+#     data = []
+#     imgs = []
+#     auxlabels = []
+#     sentence = []
+#     label = []
+#     auxlabel = []
+#     imgid = ''
+#     a = 0
+#     for line in f:
+#         if line.startswith('IMGID:'):
+#             imgid = line.strip().split('IMGID:')[1] + '.jpg'
+#             continue
+#         if line[0] == "\n":
+#             if len(sentence) > 0:
+#                 data.append((sentence, label))
+#                 imgs.append(imgid)
+#                 auxlabels.append(auxlabel)
+#                 sentence = []
+#                 label = []
+#                 imgid = ''
+#                 auxlabel = []
+#             continue
+#         splits = line.split('\t')
+#         sentence.append(splits[0])
+#         cur_label = splits[-1][:-1]
+#         # if cur_label == 'B-OTHER':
+#         #     cur_label = 'B-MISC'
+#         # elif cur_label == 'I-OTHER':
+#         #     cur_label = 'I-MISC'
+#         label.append(cur_label)
+#         auxlabel.append(cur_label[0])
+#     if len(sentence) > 0:
+#         data.append((sentence, label))
+#         imgs.append(imgid)
+#         auxlabels.append(auxlabel)
+#         sentence = []
+#         label = []
+#         auxlabel = []
+#     print("The number of samples: " + str(len(data)))
+#     print("The number of images: " + str(len(imgs)))
+#     return data, imgs, auxlabels
+class DataProcessor(object):
+    """Base class for data converters for sequence classification data sets."""
+    def get_train_examples(self, data_dir):
+        """Gets a collection of `InputExample`s for the train set."""
+        raise NotImplementedError()
+    def get_dev_examples(self, data_dir):
+        """Gets a collection of `InputExample`s for the dev set."""
+        raise NotImplementedError()
+    def get_labels(self):
+        """Gets the list of labels for this data set."""
+        raise NotImplementedError()
+    @classmethod
+    def _read_sbtsv(cls, input_file, quotechar=None):
+        """Reads a tab separated value file."""
+        return sbreadfile(input_file)
+class MNERProcessor_2016(DataProcessor):
+    """Processor for the CoNLL-2003 data set."""
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        data, imgs, auxlabels = self._read_sbtsv(os.path.join(data_dir, "train.txt"))
+        return self._create_examples(data, imgs, auxlabels, "train")
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        data, imgs, auxlabels = self._read_sbtsv(os.path.join(data_dir, "dev.txt"))
+        return self._create_examples(data, imgs, auxlabels, "dev")
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        data, imgs, auxlabels = self._read_sbtsv(os.path.join(data_dir, "test.txt"))
+        return self._create_examples(data, imgs, auxlabels, "test")
+    def get_labels(self):
+        #         return [
+        # "O","I-PRODUCT-AWARD",
+        # "B-MISCELLANEOUS",
+        # "B-QUANTITY-NUM",
+        # "B-ORGANIZATION-SPORTS",
+        # "B-DATETIME",
+        # "I-ADDRESS",
+        # "I-PERSON",
+        # "I-EVENT-SPORT",
+        # "B-ADDRESS",
+        # "B-EVENT-NATURAL",
+        # "I-LOCATION-GPE",
+        # "B-EVENT-GAMESHOW",
+        # "B-DATETIME-TIMERANGE",
+        # "I-QUANTITY-NUM",
+        # "I-QUANTITY-AGE",
+        # "B-EVENT-CUL",
+        # "I-QUANTITY-TEM",
+        # "I-PRODUCT-LEGAL",
+        # "I-LOCATION-STRUC",
+        # "I-ORGANIZATION",
+        # "B-PHONENUMBER",
+        # "B-IP",
+        # "B-QUANTITY-AGE",
+        # "I-DATETIME-TIME",
+        # "I-DATETIME",
+        # "B-ORGANIZATION-MED",
+        # "B-DATETIME-SET",
+        # "I-EVENT-CUL",
+        # "B-QUANTITY-DIM",
+        # "I-QUANTITY-DIM",
+        # "B-EVENT",
+        # "B-DATETIME-DATERANGE",
+        # "I-EVENT-GAMESHOW",
+        # "B-PRODUCT-AWARD",
+        # "B-LOCATION-STRUC",
+        # "B-LOCATION",
+        # "B-PRODUCT",
+        # "I-MISCELLANEOUS",
+        # "B-SKILL",
+        # "I-QUANTITY-ORD",
+        # "I-ORGANIZATION-STOCK",
+        # "I-LOCATION-GEO",
+        # "B-PERSON",
+        # "B-PRODUCT-COM",
+        # "B-PRODUCT-LEGAL",
+        # "I-LOCATION",
+        # "B-QUANTITY-TEM",
+        # "I-PRODUCT",
+        # "B-QUANTITY-CUR",
+        # "I-QUANTITY-CUR",
+        # "B-LOCATION-GPE",
+        # "I-PHONENUMBER",
+        # "I-ORGANIZATION-MED",
+        # "I-EVENT-NATURAL",
+        # "I-EMAIL",
+        # "B-ORGANIZATION",
+        # "B-URL",
+        # "I-DATETIME-TIMERANGE",
+        # "I-QUANTITY",
+        # "I-IP",
+        # "B-EVENT-SPORT",
+        # "B-PERSONTYPE",
+        # "B-QUANTITY-PER",
+        # "I-QUANTITY-PER",
+        # "I-PRODUCT-COM",
+        # "I-DATETIME-DURATION",
+        # "B-LOCATION-GPE-GEO",
+        # "B-QUANTITY-ORD",
+        # "I-EVENT",
+        # "B-DATETIME-TIME",
+        # "B-QUANTITY",
+        # "I-DATETIME-SET",
+        # "I-LOCATION-GPE-GEO",
+        # "B-ORGANIZATION-STOCK",
+        # "I-ORGANIZATION-SPORTS",
+        # "I-SKILL",
+        # "I-URL",
+        # "B-DATETIME-DURATION",
+        # "I-DATETIME-DATE",
+        # "I-PERSONTYPE",
+        # "B-DATETIME-DATE",
+        # "I-DATETIME-DATERANGE",
+        # "B-LOCATION-GEO",
+        # "B-EMAIL","X","<s>", "</s>"]
+        # vlsp2016
+        return [
+            "B-ORG", "B-MISC",
+            "I-PER",
+            "I-ORG",
+            "B-LOC",
+            "I-MISC",
+            "I-LOC",
+            "O",
+            "B-PER",
+            "X",
+            "<s>",
+            "</s>"]
+        # vlsp2018
+        # return [
+        #         "O","I-ORGANIZATION",
+        #         "B-ORGANIZATION",
+        #         "I-LOCATION",
+        #         "B-MISCELLANEOUS",
+        #         "I-PERSON",
+        #         "B-PERSON",
+        #         "I-MISCELLANEOUS",
+        #         "B-LOCATION",
+        #         "X",
+        #         "<s>",
+        #         "</s>"]
+    def get_auxlabels(self):
+        return ["O", "B", "I", "X", "<s>", "</s>"]
+    def get_start_label_id(self):
+        label_list = self.get_labels()
+        label_map = {label: i for i, label in enumerate(label_list, 1)}
+        return label_map['<s>']
+    def get_stop_label_id(self):
+        label_list = self.get_labels()
+        label_map = {label: i for i, label in enumerate(label_list, 1)}
+        return label_map['</s>']
+    def _create_examples(self, lines, imgs, auxlabels, set_type):
+        examples = []
+        for i, (sentence, label) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = ' '.join(sentence)
+            text_b = None
+            img_id = imgs[i]
+            label = label
+            auxlabel = auxlabels[i]
+            examples.append(
+                SBInputExample(guid=guid, text_a=text_a, text_b=text_b, img_id=img_id, label=label, auxlabel=auxlabel))
+        return examples
+def image_process(image_path, transform):
+    image = Image.open(image_path).convert('RGB')
+    image = transform(image)
+    return image
+def convert_mm_examples_to_features(examples, label_list, auxlabel_list,
+                                    max_seq_length, tokenizer, crop_size, path_img):
+    label_map = {label: i for i, label in enumerate(label_list, 1)}
+    auxlabel_map = {label: i for i, label in enumerate(auxlabel_list, 1)}
+    features = []
+    count = 0
+    transform = transforms.Compose([
+        transforms.Resize([256, 256]),
+        transforms.RandomCrop(crop_size),  # args.crop_size, by default it is set to be 224
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        transforms.Normalize((0.485, 0.456, 0.406),
+                             (0.229, 0.224, 0.225))])
+    for (ex_index, example) in enumerate(examples):
+        textlist = example.text_a.split(' ')
+        labellist = example.label
+        auxlabellist = example.auxlabel
+        tokens = []
+        labels = []
+        auxlabels = []
+        for i, word in enumerate(textlist):
+            token = tokenizer.tokenize(word)
+            tokens.extend(token)
+            label_1 = labellist[i]
+            auxlabel_1 = auxlabellist[i]
+            for m in range(len(token)):
+                if m == 0:
+                    labels.append(label_1)
+                    auxlabels.append(auxlabel_1)
+                else:
+                    labels.append("X")
+                    auxlabels.append("X")
+        if len(tokens) >= max_seq_length - 1:
+            tokens = tokens[0:(max_seq_length - 2)]
+            labels = labels[0:(max_seq_length - 2)]
+            auxlabels = auxlabels[0:(max_seq_length - 2)]
+        ntokens = []
+        segment_ids = []
+        label_ids = []
+        auxlabel_ids = []
+        ntokens.append("<s>")
+        segment_ids.append(0)
+        label_ids.append(label_map["<s>"])
+        auxlabel_ids.append(auxlabel_map["<s>"])
+        for i, token in enumerate(tokens):
+            ntokens.append(token)
+            segment_ids.append(0)
+            label_ids.append(label_map[labels[i]])
+            auxlabel_ids.append(auxlabel_map[auxlabels[i]])
+        ntokens.append("</s>")
+        segment_ids.append(0)
+        label_ids.append(label_map["</s>"])
+        auxlabel_ids.append(auxlabel_map["</s>"])
+        input_ids = tokenizer.convert_tokens_to_ids(ntokens)
+        input_mask = [1] * len(input_ids)
+        added_input_mask = [1] * (len(input_ids) + 49)  # 1 or 49 is for encoding regional image representations
+        while len(input_ids) < max_seq_length:
+            input_ids.append(0)
+            input_mask.append(0)
+            added_input_mask.append(0)
+            segment_ids.append(0)
+            label_ids.append(0)
+            auxlabel_ids.append(0)
+        assert len(input_ids) == max_seq_length
+        assert len(input_mask) == max_seq_length
+        assert len(segment_ids) == max_seq_length
+        assert len(label_ids) == max_seq_length
+        assert len(auxlabel_ids) == max_seq_length
+        image_name = example.img_id
+        image_path = os.path.join(path_img, image_name)
+        if not os.path.exists(image_path):
+            if 'NaN' not in image_path:
+                print(image_path)
+        try:
+            image = image_process(image_path, transform)
+        except:
+            count += 1
+            image_path_fail = os.path.join(path_img, 'background.jpg')
+            image = image_process(image_path_fail, transform)
+        else:
+            if ex_index < 2:
+                logger.info("*** Example ***")
+                logger.info("guid: %s" % (example.guid))
+                logger.info("tokens: %s" % " ".join(
+                    [str(x) for x in tokens]))
+                logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+                logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+                logger.info(
+                    "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+                logger.info("label: %s" % " ".join([str(x) for x in label_ids]))
+                logger.info("auxlabel: %s" % " ".join([str(x) for x in auxlabel_ids]))
+            features.append(
+                SBInputFeatures(input_ids=input_ids, input_mask=input_mask, added_input_mask=added_input_mask,
+                                segment_ids=segment_ids, img_feat=image, label_id=label_ids, auxlabel_id=auxlabel_ids))
+    print('the number of problematic samples: ' + str(count))
+    return features
+# if __name__ == "__main__":
+#     processor = MNERProcessor_2016()
+#     label_list = processor.get_labels()
+#     auxlabel_list = processor.get_auxlabels()
+#     num_labels = len(label_list) + 1  # label 0 corresponds to padding, label in label_list starts from 1
+#
+#     start_label_id = processor.get_start_label_id()
+#     stop_label_id = processor.get_stop_label_id()
+#
+#     data_dir = r'sample_data'
+#     train_examples = processor.get_train_examples(data_dir)
+#     print(train_examples[0].img_id)

Model/MultimodelNER/VLSP2016/list.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+IMGID:namngo
+Toi
+ten
+la
+Minh

Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/.no_exist/2b51e367d92093c9688112098510e6a58bab67cd/model.safetensors.index.json ADDED Viewed

File without changes

Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2b51e367d92093c9688112098510e6a58bab67cd

Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

Model/MultimodelNER/VLSP2016/models--vinai--phobert-base-v2/snapshots/2b51e367d92093c9688112098510e6a58bab67cd/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba09eb4c244a5b3a49ad76d52d129ac085b61f5c6287de7f99508b02be589f9
+size 540322347

Model/MultimodelNER/VLSP2016/test.txt ADDED Viewed

	@@ -0,0 +1,78 @@

+IMGID:014716
+“	O
+Tôi	O
+xin	O
+cám_ơn	O
+thượng_sĩ	O
+Nguyễn	B-PER
+Trung	I-PER
+Hiếu	I-PER
+(	O
+người	O
+phiên_dịch	O
+tiếng	B-MISC
+Anh	I-MISC
+cho	O
+đơn_vị	O
+tình_báo	O
+quân_sự	O
+số	O
+635	O
+của	O
+quân_đội	O
+Mỹ	B-LOC
+biên_chế	O
+bên	O
+cạnh	O
+lữ_đoàn	B-ORG
+bộ_binh	I-ORG
+số	I-ORG
+11	I-ORG
+,	O
+sư_đoàn	B-ORG
+bộ_binh	I-ORG
+23	I-ORG
+)	O
+,	O
+người	O
+đã	O
+cứu	O
+cuốn	O
+nhật_ký	O
+của	O
+chị	O
+tôi	O
+khỏi	O
+bị	O
+quẳng	O
+vào	O
+đống	O
+lửa	O
+bởi	O
+anh	O
+đã	O
+nhận	O
+ra	O
+trong	O
+cuốn	O
+sổ	O
+này	O
+đã	O
+chứa_đựng	O
+lửa	O
+rồi	O
+để	O
+anh	O
+trao	O
+lại	O
+nó	O
+cho	O
+Fred	B-PER
+như	O
+một	O
+lời	O
+uỷ_thác	O
+từ	O
+chị	O
+tôi	O
+.	O

Model/MultimodelNER/VLSP2016/train_umt_2016.py ADDED Viewed

	@@ -0,0 +1,352 @@

+import os
+import sys
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+import argparse
+import logging
+import random
+import numpy as np
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, BertConfig
+from Model.MultimodelNER.UMT import UMT
+from Model.MultimodelNER import resnet as resnet
+from Model.MultimodelNER.resnet_utils import myResnet
+from Model.MultimodelNER.VLSP2016.dataset_roberta import convert_mm_examples_to_features, MNERProcessor_2016
+from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
+                              TensorDataset)
+from pytorch_pretrained_bert.optimization import BertAdam, warmup_linear
+from Model.MultimodelNER.ner_evaluate import evaluate_each_class,evaluate
+from seqeval.metrics import classification_report
+from tqdm import tqdm, trange
+import json
+from Model.MultimodelNER.predict import convert_mm_examples_to_features_predict, get_test_examples_predict
+from Model.MultimodelNER.Ner_processing import *
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
+                    datefmt='%m/%d/%Y %H:%M:%S',
+                    level=logging.INFO)
+logger = logging.getLogger(__name__)
+parser = argparse.ArgumentParser()
+## Required parameters
+parser.add_argument("--negative_rate",
+                    default=16,
+                    type=int,
+                    help="the negative samples rate")
+parser.add_argument('--lamb',
+                    default=0.62,
+                    type=float)
+parser.add_argument('--temp',
+                    type=float,
+                    default=0.179,
+                    help="parameter for CL training")
+parser.add_argument('--temp_lamb',
+                    type=float,
+                    default=0.7,
+                    help="parameter for CL training")
+parser.add_argument("--data_dir",
+                    default='./data/twitter2017',
+                    type=str,
+                    help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
+parser.add_argument("--bert_model", default='vinai/phobert-base-v2', type=str)
+parser.add_argument("--task_name",
+                    default='sonba',
+                    type=str,
+                    help="The name of the task to train.")
+parser.add_argument("--output_dir",
+                    default='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/best_model/',
+                    type=str,
+                    help="The output directory where the model predictions and checkpoints will be written.")
+## Other parameters
+parser.add_argument("--cache_dir",
+                    default="",
+                    type=str,
+                    help="Where do you want to store the pre-trained models downloaded from s3")
+parser.add_argument("--max_seq_length",
+                    default=128,
+                    type=int,
+                    help="The maximum total input sequence length after WordPiece tokenization. \n"
+                         "Sequences longer than this will be truncated, and sequences shorter \n"
+                         "than this will be padded.")
+parser.add_argument("--do_train",
+                    action='store_true',
+                    help="Whether to run training.")
+parser.add_argument("--do_eval",
+                    action='store_true',
+                    help="Whether to run eval on the dev set.")
+parser.add_argument("--do_lower_case",
+                    action='store_true',
+                    help="Set this flag if you are using an uncased model.")
+parser.add_argument("--train_batch_size",
+                    default=64,
+                    type=int,
+                    help="Total batch size for training.")
+parser.add_argument("--eval_batch_size",
+                    default=16,
+                    type=int,
+                    help="Total batch size for eval.")
+parser.add_argument("--learning_rate",
+                    default=5e-5,
+                    type=float,
+                    help="The initial learning rate for Adam.")
+parser.add_argument("--num_train_epochs",
+                    default=12.0,
+                    type=float,
+                    help="Total number of training epochs to perform.")
+parser.add_argument("--warmup_proportion",
+                    default=0.1,
+                    type=float,
+                    help="Proportion of training to perform linear learning rate warmup for. "
+                         "E.g., 0.1 = 10%% of training.")
+parser.add_argument("--no_cuda",
+                    action='store_true',
+                    help="Whether not to use CUDA when available")
+parser.add_argument("--local_rank",
+                    type=int,
+                    default=-1,
+                    help="local_rank for distributed training on gpus")
+parser.add_argument('--seed',
+                    type=int,
+                    default=37,
+                    help="random seed for initialization")
+parser.add_argument('--gradient_accumulation_steps',
+                    type=int,
+                    default=1,
+                    help="Number of updates steps to accumulate before performing a backward/update pass.")
+parser.add_argument('--fp16',
+                    action='store_true',
+                    help="Whether to use 16-bit float precision instead of 32-bit")
+parser.add_argument('--loss_scale',
+                    type=float, default=0,
+                    help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
+                         "0 (default value): dynamic loss scaling.\n"
+                         "Positive power of 2: static loss scaling value.\n")
+parser.add_argument('--mm_model', default='MTCCMBert', help='model name')  # 'MTCCMBert', 'NMMTCCMBert'
+parser.add_argument('--layer_num1', type=int, default=1, help='number of txt2img layer')
+parser.add_argument('--layer_num2', type=int, default=1, help='number of img2txt layer')
+parser.add_argument('--layer_num3', type=int, default=1, help='number of txt2txt layer')
+parser.add_argument('--fine_tune_cnn', action='store_true', help='fine tune pre-trained CNN if True')
+parser.add_argument('--resnet_root', default='E:/demo_datn/pythonProject1/Model/Resnet/', help='path the pre-trained cnn models')
+parser.add_argument('--crop_size', type=int, default=224, help='crop size of image')
+parser.add_argument('--path_image', default='E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Image', help='path to images')
+# parser.add_argument('--mm_model', default='TomBert', help='model name') #
+parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
+parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
+args = parser.parse_args()
+processors = {
+    "twitter2015": MNERProcessor_2016,
+    "twitter2017": MNERProcessor_2016,
+    "sonba": MNERProcessor_2016
+}
+random.seed(args.seed)
+np.random.seed(args.seed)
+torch.manual_seed(args.seed)
+task_name = args.task_name.lower()
+processor = processors[task_name]()
+label_list = processor.get_labels()
+auxlabel_list = processor.get_auxlabels()
+num_labels = len(label_list) + 1  # label 0 corresponds to padding, label in label_list starts from 1
+auxnum_labels = len(auxlabel_list) + 1  # label 0 corresponds to padding, label in label_list starts from 1
+start_label_id = processor.get_start_label_id()
+stop_label_id = processor.get_stop_label_id()
+# ''' initialization of our conversion matrix, in our implementation, it is a 7*12 matrix initialized as follows:
+trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
+trans_matrix[0, 0] = 1  # pad to pad
+trans_matrix[1, 1] = 1  # O to O
+trans_matrix[2, 2] = 0.25  # B to B-MISC
+trans_matrix[2, 4] = 0.25  # B to B-PER
+trans_matrix[2, 6] = 0.25  # B to B-ORG
+trans_matrix[2, 8] = 0.25  # B to B-LOC
+trans_matrix[3, 3] = 0.25  # I to I-MISC
+trans_matrix[3, 5] = 0.25  # I to I-PER
+trans_matrix[3, 7] = 0.25  # I to I-ORG
+trans_matrix[3, 9] = 0.25  # I to I-LOC
+trans_matrix[4, 10] = 1  # X to X
+trans_matrix[5, 11] = 1  # [CLS] to [CLS]
+trans_matrix[6, 12] = 1  # [SEP] to [SEP]
+'''
+trans_matrix = np.zeros((num_labels, auxnum_labels), dtype=float)
+trans_matrix[0,0]=1 # pad to pad
+trans_matrix[1,1]=1
+trans_matrix[2,2]=1
+trans_matrix[4,2]=1
+trans_matrix[6,2]=1
+trans_matrix[8,2]=1
+trans_matrix[3,3]=1
+trans_matrix[5,3]=1
+trans_matrix[7,3]=1
+trans_matrix[9,3]=1
+trans_matrix[10,4]=1
+trans_matrix[11,5]=1
+trans_matrix[12,6]=1
+'''
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
+net = getattr(resnet, 'resnet152')()
+net.load_state_dict(torch.load(os.path.join(args.resnet_root, 'resnet152.pth')))
+encoder = myResnet(net, args.fine_tune_cnn, device)
+output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
+# output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
+output_encoder_file = os.path.join(args.output_dir, "pytorch_encoder.bin")
+temp = args.temp
+temp_lamb = args.temp_lamb
+lamb = args.lamb
+negative_rate = args.negative_rate
+# # loadmodel
+#     model = UMT.from_pretrained(args.bert_model,
+#                                 cache_dir=args.cache_dir, layer_num1=args.layer_num1,
+#                                 layer_num2=args.layer_num2,
+#                                 layer_num3=args.layer_num3,
+#                                 num_labels_=num_labels, auxnum_labels=auxnum_labels)
+#     model.load_state_dict(torch.load(output_model_file,map_location=torch.device('cpu')))
+#     model.to(device)
+#     encoder_state_dict = torch.load(output_encoder_file,map_location=torch.device('cpu'))
+#     encoder.load_state_dict(encoder_state_dict)
+#     encoder.to(device)
+#     print(model)
+def load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels):
+    model = UMT.from_pretrained(args.bert_model,
+                                cache_dir=args.cache_dir, layer_num1=args.layer_num1,
+                                layer_num2=args.layer_num2,
+                                layer_num3=args.layer_num3,
+                                num_labels_=num_labels, auxnum_labels=auxnum_labels)
+    model.load_state_dict(torch.load(output_model_file, map_location=torch.device('cpu')))
+    model.to(device)
+    encoder_state_dict = torch.load(output_encoder_file, map_location=torch.device('cpu'))
+    encoder.load_state_dict(encoder_state_dict)
+    encoder.to(device)
+    return model, encoder
+model_umt,encoder_umt=load_model(output_model_file, output_encoder_file,encoder,num_labels,auxnum_labels)
+#
+#     # sentence = 'Thương biết_mấy những Thuận, những Liên, những Luận, Xuân, Nghĩa mỗi người một hoàn_cảnh nhưng đều rất giống nhau: rất ham học, rất cố_gắng để đạt mức hiểu biết cao nhất.'
+#     # image_path = '/kaggle/working/data/014715.jpg'
+#     # # crop_size = 224'
+path_image='E:\demo_datn\pythonProject1\Model\MultimodelNER\VLSP2016\Image'
+trans_matrix = np.zeros((auxnum_labels,num_labels), dtype=float)
+trans_matrix[0,0]=1 # pad to pad
+trans_matrix[1,1]=1 # O to O
+trans_matrix[2,2]=0.25 # B to B-MISC
+trans_matrix[2,4]=0.25 # B to B-PER
+trans_matrix[2,6]=0.25 # B to B-ORG
+trans_matrix[2,8]=0.25 # B to B-LOC
+trans_matrix[3,3]=0.25 # I to I-MISC
+trans_matrix[3,5]=0.25 # I to I-PER
+trans_matrix[3,7]=0.25 # I to I-ORG
+trans_matrix[3,9]=0.25 # I to I-LOC
+trans_matrix[4,10]=1   # X to X
+trans_matrix[5,11]=1   # [CLS] to [CLS]
+trans_matrix[6,12]=1   # [SE
+path_image='E:\demo_datn\pythonProject1\Model\MultimodelNER\VLSP2016\Image'
+def predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix):
+    features = convert_mm_examples_to_features_predict(eval_examples, 256, tokenizer, 224,path_image)
+    input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
+    input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
+    added_input_mask = torch.tensor([f.added_input_mask for f in features], dtype=torch.long)
+    segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
+    img_feats = torch.stack([f.img_feat for f in features])
+    print(img_feats)
+    eval_data = TensorDataset(input_ids, input_mask, added_input_mask, segment_ids, img_feats)
+    eval_sampler = SequentialSampler(eval_data)
+    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=16)
+    model_umt.eval()
+    encoder_umt.eval()
+    y_pred = []
+    label_map = {i: label for i, label in enumerate(label_list, 1)}
+    label_map[0] = "<pad>"
+    for input_ids, input_mask, added_input_mask, segment_ids, img_feats in tqdm(eval_dataloader, desc="Evaluating"):
+        input_ids = input_ids.to(device)
+        input_mask = input_mask.to(device)
+        added_input_mask = added_input_mask.to(device)
+        segment_ids = segment_ids.to(device)
+        img_feats = img_feats.to(device)
+        with torch.no_grad():
+            imgs_f, img_mean, img_att = encoder_umt(img_feats)
+            predicted_label_seq_ids = model_umt(input_ids, segment_ids, input_mask, added_input_mask, img_att,
+                                                trans_matrix)
+        logits = predicted_label_seq_ids
+        input_mask = input_mask.to('cpu').numpy()
+        for i, mask in enumerate(input_mask):
+            temp_1 = []
+            for j, m in enumerate(mask):
+                if j == 0:
+                    continue
+                if m:
+                    if label_map[logits[i][j]] not in ["<pad>", "<s>", "</s>", "X"]:
+                        temp_1.append(label_map[logits[i][j]])
+                else:
+                    break
+            y_pred.append(temp_1)
+    a = eval_examples[0].text_a.split(" ")
+    return y_pred, a
+eval_examples = get_test_examples_predict('E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2016/Filetxt/')
+y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device,path_image,trans_matrix)
+print(y_pred)
+formatted_output = format_predictions(a, y_pred[0])
+print(formatted_output)
+final= process_predictions(formatted_output)
+final2= combine_entities(final)
+final3= remove_B_prefix(final2)
+final4=combine_i_tags(final3)
+print(final4)

Model/MultimodelNER/VLSP2021/Filetxt/test.txt ADDED Viewed

	@@ -0,0 +1,97 @@

+IMGID:taybannha
+Dân
+trí
+Chức
+vô
+địch
+Euro
+2008
+đầy
+thuyết
+phục
+của
+Tây
+Ban
+Nha
+trên
+đất
+Áo
+và
+Thụy
+Sĩ
+đã
+mở
+ra
+kỷ
+nguyên
+vinh
+quanh
+của
+La
+Furia
+Roja
+,
+với
+lối
+chơi
+tiqui
+taka
+đầy
+biến
+ảo
+.
+Trong
+quá
+khứ
+,
+Tây
+Ban
+Nha
+nổi
+tiếng
+với
+biệt
+danh
+Vua
+vòng
+loại
+.
+Họ
+thường
+thi
+đấu
+rất
+tốt
+ở
+vòng
+loại
+nhưng
+lại
+chưa
+bao
+giờ
+chứng
+minh
+được
+sức
+mạnh
+ở
+vòng
+chung
+kết
+giải
+đấu
+lớn
+.
+Lần
+duy
+nhất
+họ
+lên
+ngôi
+là
+ở
+kỳ
+Euro
+1964
+.

Model/MultimodelNER/VLSP2021/Image/taybannha.jpg ADDED Viewed

Model/MultimodelNER/VLSP2021/MNER_2021.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import streamlit as st
+from spacy import displacy
+from Model.NER.VLSP2021.Predict_Ner import ViTagger
+import re
+from thunghiemxuly import save_uploaded_image,convert_text_to_txt,add_string_to_txt
+import os
+from transformers import AutoTokenizer, BertConfig
+from Model.MultimodelNER.VLSP2021.train_umt_2021 import load_model,predict
+from Model.MultimodelNER.Ner_processing import format_predictions,process_predictions,combine_entities,remove_B_prefix,combine_i_tags
+from Model.MultimodelNER.predict import get_test_examples_predict
+from Model.MultimodelNER import resnet as resnet
+from Model.MultimodelNER.resnet_utils import myResnet
+import torch
+import numpy as np
+from Model.MultimodelNER.VLSP2021.dataset_roberta import MNERProcessor_2021
+CONFIG_NAME = 'bert_config.json'
+WEIGHTS_NAME = 'pytorch_model.bin'
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+net = getattr(resnet, 'resnet152')()
+net.load_state_dict(torch.load(os.path.join('E:/demo_datn/pythonProject1/Model/Resnet/', 'resnet152.pth')))
+encoder = myResnet(net, True, device)
+def process_text(text):
+    # Loại bỏ dấu cách thừa và dấu cách ở đầu và cuối văn bản
+    processed_text = re.sub(r'\s+', ' ', text.strip())
+    return processed_text
+def show_mner_2021():
+    multimodal_text = st.text_area("Enter your text for MNER:", height=300)
+    multimodal_text = process_text(multimodal_text)  # Xử lý văn bản
+    image = st.file_uploader("Upload an image (only jpg):", type=["jpg"])
+    if st.button("Process Multimodal NER"):
+            save_image = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Image'
+            save_txt = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Filetxt/test.txt'
+            image_name = image.name
+            save_uploaded_image(image, save_image)
+            convert_text_to_txt(multimodal_text, save_txt)
+            add_string_to_txt(image_name, save_txt)
+            st.image(image, caption="Uploaded Image", use_column_width=True)
+            bert_model = 'vinai/phobert-base-v2'
+            output_dir = 'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/best_model'
+            output_model_file = os.path.join(output_dir, WEIGHTS_NAME)
+            output_encoder_file = os.path.join(output_dir, "pytorch_encoder.bin")
+            processor = MNERProcessor_2021()
+            label_list = processor.get_labels()
+            auxlabel_list = processor.get_auxlabels()
+            num_labels = len(label_list) + 1
+            auxnum_labels = len(auxlabel_list) + 1
+            trans_matrix = np.zeros((auxnum_labels, num_labels), dtype=float)
+            trans_matrix[0, 0] = 1  # pad to pad
+            trans_matrix[1, 1] = 1  # O to O
+            trans_matrix[2, 2] = 0.25  # B to B-MISC
+            trans_matrix[2, 4] = 0.25  # B to B-PER
+            trans_matrix[2, 6] = 0.25  # B to B-ORG
+            trans_matrix[2, 8] = 0.25  # B to B-LOC
+            trans_matrix[3, 3] = 0.25  # I to I-MISC
+            trans_matrix[3, 5] = 0.25  # I to I-PER
+            trans_matrix[3, 7] = 0.25  # I to I-ORG
+            trans_matrix[3, 9] = 0.25  # I to I-LOC
+            trans_matrix[4, 10] = 1  # X to X
+            trans_matrix[5, 11] = 1  # [CLS] to [CLS]
+            trans_matrix[6, 12] = 1
+            tokenizer = AutoTokenizer.from_pretrained(bert_model, do_lower_case=False)
+            model_umt, encoder_umt = load_model(output_model_file, output_encoder_file, encoder, num_labels,
+                                                auxnum_labels)
+            eval_examples = get_test_examples_predict(
+                'E:/demo_datn/pythonProject1/Model/MultimodelNER/VLSP2021/Filetxt/')
+            y_pred, a = predict(model_umt, encoder_umt, eval_examples, tokenizer, device, save_image, trans_matrix)
+            formatted_output = format_predictions(a, y_pred[0])
+            final = process_predictions(formatted_output)
+            final2 = combine_entities(final)
+            final3 = remove_B_prefix(final2)
+            final4 = combine_i_tags(final3)
+            words_and_labels = final4
+            # Tạo danh sách từ
+            words = [word for word, _ in words_and_labels]
+            # Tạo danh sách thực thể và nhãn cho mỗi từ, loại bỏ nhãn 'O'
+            entities = [{'start': sum(len(word) + 1 for word, _ in words_and_labels[:i]),
+                         'end': sum(len(word) + 1 for word, _ in words_and_labels[:i + 1]), 'label': label} for
+                        i, (word, label)
+                        in enumerate(words_and_labels) if label != 'O']
+            # print(entities)
+            # Render the visualization without color for 'O' labels
+            html = displacy.render(
+                {"text": " ".join(words), "ents": entities, "title": None},
+                style="ent",
+                manual=True,
+                options={"colors": {"DATETIME-DATERANGE": "#66c2ff",
+                                        "LOCATION-GPE": "#ffcc99",
+                                        "O": None,  # Màu cho nhãn 'O'
+                                        "QUANTITY-NUM": "#ffdf80",
+                                        "EVENT-CUL": "#bfbfbf",
+                                        "DATETIME": "#80ff80",
+                                        "PERSONTYPE": "#ff80ff",
+                                        "PERSON": "#bf80ff",
+                                        "QUANTITY-PER": "#80cccc",
+                                        "ORGANIZATION": "#ff6666",
+                                        "LOCATION-GEO": "#66cc66",
+                                        "LOCATION-STRUC": "#cccc66",
+                                        "PRODUCT-COM": "#ffff66",
+                                        "DATETIME-DATE": "#66cccc",
+                                        "QUANTITY-DIM": "#6666ff",
+                                        "PRODUCT": "#cc6666",
+                                        "QUANTITY": "#6666cc",
+                                        "DATETIME-DURATION": "#9966ff",
+                                        "QUANTITY-CUR": "#ff9966",
+                                        "DATETIME-TIME": "#cdbf93",
+                                        "QUANTITY-TEM": "#cc9966",
+                                        "DATETIME-TIMERANGE": "#cc8566",
+                                        "EVENT-GAMESHOW": "#8c8c5a",
+                                        "QUANTITY-AGE": "#70db70",
+                                        "QUANTITY-ORD": "#e699ff",
+                                        "PRODUCT-LEGAL": "#806699",
+                                        "LOCATION": "#993366",
+                                        "ORGANIZATION-MED": "#339933",
+                                        "URL": "#ff4d4d",
+                                        "PHONENUMBER": "#99cc99",
+                                        "ORGANIZATION-SPORTS": "#6666ff",
+                                        "EVENT-SPORT": "#ffff80",
+                                        "SKILL": "#b38f66",
+                                        "EVENT-NATURAL": "#ff9966",
+                                        "ADDRESS": "#cc9966",
+                                        "IP": "#b38f66",
+                                        "EMAIL": "#cc8566",
+                                        "ORGANIZATION-STOCK": "#666633",
+                                        "DATETIME-SET": "#70db70",
+                                        "PRODUCT-AWARD": "#e699ff",
+                                        "MISCELLANEOUS": "#806699",
+                                        "LOCATION-GPE-GEO": "#99ffff"}}
+            )
+            # print(html)
+            st.markdown(html, unsafe_allow_html=True)
+        # Sử dụng widget st.html để hiển thị HTML
+    # Hiển thị văn bản đã nhập
+    # st.write("Văn bản đã nhập:", text)
+###Ví dụ 1 : Một trận hỗn chiến đã xảy ra tại trận đấu khúc côn cầu giữa  Penguins và Islanders ở Mỹ (image:penguin)

Model/MultimodelNER/VLSP2021/__pycache__/MNER_2021.cpython-39.pyc ADDED Viewed

Binary file (5.34 kB). View file

Model/MultimodelNER/VLSP2021/__pycache__/dataset_roberta.cpython-39.pyc ADDED Viewed

Binary file (10.7 kB). View file

Model/MultimodelNER/VLSP2021/__pycache__/train_umt_2021.cpython-39.pyc ADDED Viewed

Binary file (8.82 kB). View file

Model/MultimodelNER/VLSP2021/best_model/bert_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "vinai/phobert-base-v2",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 258,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "PhobertTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 64001
+}

Model/MultimodelNER/VLSP2021/best_model/eval_results.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+                     precision    recall  f1-score   support
+                /s>     0.0000    0.0000    0.0000         0
+            ADDRESS     0.0455    0.0455    0.0455        22
+           DATETIME     0.4883    0.6221    0.5472       606
+      DATETIME-DATE     0.6016    0.5204    0.5581       563
+ DATETIME-DATERANGE     0.3426    0.2701    0.3020       137
+  DATETIME-DURATION     0.7900    0.6337    0.7033       475
+       DATETIME-SET     0.0000    0.0000    0.0000         4
+      DATETIME-TIME     0.3291    0.5200    0.4031        50
+ DATETIME-TIMERANGE     0.2540    0.1231    0.1658       130
+              EMAIL     1.0000    1.0000    1.0000         2
+              EVENT     0.1687    0.0782    0.1069       179
+          EVENT-CUL     0.5000    0.4375    0.4667        16
+     EVENT-GAMESHOW     0.4085    0.5370    0.4640        54
+      EVENT-NATURAL     0.0000    0.0000    0.0000         9
+        EVENT-SPORT     0.5634    0.5517    0.5575       145
+                 IP     1.0000    1.0000    1.0000        15
+           LOCATION     0.1120    0.2668    0.1578       431
+       LOCATION-GEO     0.5556    0.3333    0.4167       120
+       LOCATION-GPE     0.7486    0.6113    0.6730      2367
+     LOCATION-STRUC     0.5286    0.5248    0.5267       141
+      MISCELLANEOUS     0.0000    0.0000    0.0000         0
+       ORGANIZATION     0.6576    0.7000    0.6782      1630
+   ORGANIZATION-MED     0.5395    0.6833    0.6029       120
+ORGANIZATION-SPORTS     0.6362    0.7827    0.7019       382
+ ORGANIZATION-STOCK     0.0000    0.0000    0.0000        29
+             PERSON     0.9347    0.9574    0.9459      2466
+         PERSONTYPE     0.5262    0.6104    0.5652       806
+        PHONENUMBER     0.7273    0.8000    0.7619        10
+            PRODUCT     0.4419    0.3834    0.4106       446
+      PRODUCT-AWARD     0.0000    0.0000    0.0000        78
+        PRODUCT-COM     0.3488    0.5068    0.4132       148
+      PRODUCT-LEGAL     0.4107    0.1322    0.2000       174
+           QUANTITY     0.0756    0.3464    0.1241       153
+       QUANTITY-AGE     0.8433    0.9187    0.8794       246
+       QUANTITY-CUR     0.8256    0.8367    0.8311       447
+       QUANTITY-DIM     0.5763    0.2787    0.3757       244
+       QUANTITY-NUM     0.8569    0.5573    0.6754      2182
+       QUANTITY-ORD     0.5388    0.2907    0.3777       454
+       QUANTITY-PER     0.9088    0.8911    0.8999       358
+       QUANTITY-TEM     0.6923    0.8182    0.7500        11
+              SKILL     0.0000    0.0000    0.0000         2
+                URL     0.5714    0.5714    0.5714         7
+                  _     0.0000    0.0000    0.0000         0
+          micro avg     0.6371    0.6260    0.6315     15859
+          macro avg     0.4546    0.4451    0.4386     15859
+       weighted avg     0.6838    0.6260    0.6430     15859
+Overall: 0.6368360277136259 0.6259537171322278 0.6313479823194582

Model/MultimodelNER/VLSP2021/best_model/model_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"bert_model": "vinai/phobert-base-v2", "do_lower": false, "max_seq_length": 256, "num_labels": 89, "label_map": {"1": "O", "2": "I-PRODUCT-AWARD", "3": "B-MISCELLANEOUS", "4": "B-QUANTITY-NUM", "5": "B-ORGANIZATION-SPORTS", "6": "B-DATETIME", "7": "I-ADDRESS", "8": "I-PERSON", "9": "I-EVENT-SPORT", "10": "B-ADDRESS", "11": "B-EVENT-NATURAL", "12": "I-LOCATION-GPE", "13": "B-EVENT-GAMESHOW", "14": "B-DATETIME-TIMERANGE", "15": "I-QUANTITY-NUM", "16": "I-QUANTITY-AGE", "17": "B-EVENT-CUL", "18": "I-QUANTITY-TEM", "19": "I-PRODUCT-LEGAL", "20": "I-LOCATION-STRUC", "21": "I-ORGANIZATION", "22": "B-PHONENUMBER", "23": "B-IP", "24": "B-QUANTITY-AGE", "25": "I-DATETIME-TIME", "26": "I-DATETIME", "27": "B-ORGANIZATION-MED", "28": "B-DATETIME-SET", "29": "I-EVENT-CUL", "30": "B-QUANTITY-DIM", "31": "I-QUANTITY-DIM", "32": "B-EVENT", "33": "B-DATETIME-DATERANGE", "34": "I-EVENT-GAMESHOW", "35": "B-PRODUCT-AWARD", "36": "B-LOCATION-STRUC", "37": "B-LOCATION", "38": "B-PRODUCT", "39": "I-MISCELLANEOUS", "40": "B-SKILL", "41": "I-QUANTITY-ORD", "42": "I-ORGANIZATION-STOCK", "43": "I-LOCATION-GEO", "44": "B-PERSON", "45": "B-PRODUCT-COM", "46": "B-PRODUCT-LEGAL", "47": "I-LOCATION", "48": "B-QUANTITY-TEM", "49": "I-PRODUCT", "50": "B-QUANTITY-CUR", "51": "I-QUANTITY-CUR", "52": "B-LOCATION-GPE", "53": "I-PHONENUMBER", "54": "I-ORGANIZATION-MED", "55": "I-EVENT-NATURAL", "56": "I-EMAIL", "57": "B-ORGANIZATION", "58": "B-URL", "59": "I-DATETIME-TIMERANGE", "60": "I-QUANTITY", "61": "I-IP", "62": "B-EVENT-SPORT", "63": "B-PERSONTYPE", "64": "B-QUANTITY-PER", "65": "I-QUANTITY-PER", "66": "I-PRODUCT-COM", "67": "I-DATETIME-DURATION", "68": "B-LOCATION-GPE-GEO", "69": "B-QUANTITY-ORD", "70": "I-EVENT", "71": "B-DATETIME-TIME", "72": "B-QUANTITY", "73": "I-DATETIME-SET", "74": "I-LOCATION-GPE-GEO", "75": "B-ORGANIZATION-STOCK", "76": "I-ORGANIZATION-SPORTS", "77": "I-SKILL", "78": "I-URL", "79": "B-DATETIME-DURATION", "80": "I-DATETIME-DATE", "81": "I-PERSONTYPE", "82": "B-DATETIME-DATE", "83": "I-DATETIME-DATERANGE", "84": "B-LOCATION-GEO", "85": "B-EMAIL", "86": "X", "87": "<s>", "88": "</s>"}}

Model/MultimodelNER/VLSP2021/best_model/mtmner_pred.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Model/MultimodelNER/VLSP2021/best_model/pytorch_encoder.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9314f35060b4df32a623855c3e6a665cdaee354eeb5cd4925085fc7b00cc180c
+size 241699561

Model/MultimodelNER/VLSP2021/best_model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd439026a1a847a3c53a011acff0ca41b734b4084fd801d99c5e1ba962358d20
+size 699784873