Spaces:

WwYc
/

explain-LXMERT

Sleeping

App Files Files Community

WwYc commited on Mar 13, 2024

Commit

4f00e9e

verified ·

1 Parent(s): dd0b9c4

Delete lxmert/src/pretrain

Browse files

Files changed (4) hide show

lxmert/src/pretrain/__init__.py +0 -0
lxmert/src/pretrain/lxmert_data.py +0 -255
lxmert/src/pretrain/lxmert_pretrain.py +0 -435
lxmert/src/pretrain/qa_answer_table.py +0 -158

lxmert/src/pretrain/__init__.py DELETED Viewed

File without changes

lxmert/src/pretrain/lxmert_data.py DELETED Viewed

@@ -1,255 +0,0 @@
-# coding=utf-8
-# Copyleft 2019 project LXRT.
-from collections import defaultdict
-import json
-import random
-import numpy as np
-from torch.utils.data import Dataset
-from param import args
-from pretrain.qa_answer_table import AnswerTable
-from utils import load_obj_tsv
-TINY_IMG_NUM = 500
-FAST_IMG_NUM = 5000
-Split2ImgFeatPath = {
-    'mscoco_train': 'data/mscoco_imgfeat/train2014_obj36.tsv',
-    'mscoco_minival': 'data/mscoco_imgfeat/val2014_obj36.tsv',
-    'mscoco_nominival': 'data/mscoco_imgfeat/val2014_obj36.tsv',
-    'vgnococo': 'data/vg_gqa_imgfeat/vg_gqa_obj36.tsv',
-}
-class InputExample(object):
-    """A single training/test example for the language model."""
-    def __init__(self, uid, sent, visual_feats=None,
-                 obj_labels=None, attr_labels=None,
-                 is_matched=None, label=None):
-        self.uid = uid
-        self.sent = sent
-        self.visual_feats = visual_feats
-        self.obj_labels = obj_labels
-        self.attr_labels = attr_labels
-        self.is_matched = is_matched  # whether the visual and obj matched
-        self.label = label
-class LXMERTDataset:
-    def __init__(self, splits: str, qa_sets=None):
-        """
-        :param splits: The data sources to be loaded
-        :param qa_sets: if None, no action
-                        o.w., only takes the answers appearing in these dsets
-                              and remove all unlabeled data (MSCOCO captions)
-        """
-        self.name = splits
-        self.sources = splits.split(',')
-        # Loading datasets to data
-        self.data = []
-        for source in self.sources:
-            self.data.extend(json.load(open("data/lxmert/%s.json" % source)))
-        print("Load %d data from %s" % (len(self.data), self.name))
-        # Create answer table according to the qa_sets
-        self.answer_table = AnswerTable(qa_sets)
-        print("Load an answer table of size %d." % (len(self.answer_table.ans2id_map())))
-        # Modify the answers
-        for datum in self.data:
-            labelf = datum['labelf']
-            for cat, labels in labelf.items():
-                for label in labels:
-                    for ans in list(label.keys()):
-                        new_ans = self.answer_table.convert_ans(ans)
-                        if self.answer_table.used(new_ans):
-                            if ans != new_ans:
-                                label[new_ans] = label.pop(ans)
-                        else:
-                            label.pop(ans)
-    def __len__(self):
-        return len(self.data)
-def make_uid(img_id, dset, sent_idx):
-    return "%s_%s_%03d" % (img_id, dset, sent_idx),
-"""
-Example in obj tsv:
-FIELDNAMES = ["img_id", "img_h", "img_w", "objects_id", "objects_conf",
-              "attrs_id", "attrs_conf", "num_boxes", "boxes", "features"]
-"""
-class LXMERTTorchDataset(Dataset):
-    def __init__(self, dataset: LXMERTDataset, topk=-1):
-        super().__init__()
-        self.raw_dataset = dataset
-        self.task_matched = args.task_matched
-        if args.tiny:
-            topk = TINY_IMG_NUM
-        elif args.fast:
-            topk = FAST_IMG_NUM
-        # Load the dataset
-        img_data = []
-        for source in self.raw_dataset.sources:
-            img_data.extend(load_obj_tsv(Split2ImgFeatPath[source], topk))
-        self.imgid2img = {}
-        for img_datum in img_data:
-            self.imgid2img[img_datum['img_id']] = img_datum
-        # Filter out the dataset
-        used_data = []
-        for datum in self.raw_dataset.data:
-            if datum['img_id'] in self.imgid2img:
-                used_data.append(datum)
-        # Flatten the dataset (into one sent + one image entries)
-        self.data = []
-        for datum in used_data:
-            sentf = datum['sentf']
-            for sents_cat, sents in sentf.items():
-                if sents_cat in datum['labelf']:
-                    labels = datum['labelf'][sents_cat]
-                else:
-                    labels = None
-                for sent_idx, sent in enumerate(sents):
-                    new_datum = {
-                        'uid': make_uid(datum['img_id'], sents_cat, sent_idx),
-                        'img_id': datum['img_id'],
-                        'sent': sent
-                    }
-                    if labels is not None:
-                        new_datum['label'] = labels[sent_idx]
-                    self.data.append(new_datum)
-        print("Use %d data in torch dataset" % (len(self.data)))
-    def __len__(self):
-        return len(self.data)
-    def random_feat(self):
-        """Get a random obj feat from the dataset."""
-        datum = self.data[random.randint(0, len(self.data)-1)]
-        img_id = datum['img_id']
-        img_info = self.imgid2img[img_id]
-        feat = img_info['features'][random.randint(0, 35)]
-        return feat
-    def __getitem__(self, item: int):
-        datum = self.data[item]
-        uid = datum['uid']
-        img_id = datum['img_id']
-        # Get image info
-        img_info = self.imgid2img[img_id]
-        obj_num = img_info['num_boxes']
-        feats = img_info['features'].copy()
-        boxes = img_info['boxes'].copy()
-        obj_labels = img_info['objects_id'].copy()
-        obj_confs = img_info['objects_conf'].copy()
-        attr_labels = img_info['attrs_id'].copy()
-        attr_confs = img_info['attrs_conf'].copy()
-        assert obj_num == len(boxes) == len(feats)
-        # Normalize the boxes (to 0 ~ 1)
-        img_h, img_w = img_info['img_h'], img_info['img_w']
-        boxes = boxes.copy()
-        boxes[:, (0, 2)] /= img_w
-        boxes[:, (1, 3)] /= img_h
-        np.testing.assert_array_less(boxes, 1+1e-5)
-        np.testing.assert_array_less(-boxes, 0+1e-5)
-        # If calculating the matched loss, replace the sentence with an sentence
-        # corresponding to other image.
-        is_matched = 1
-        sent = datum['sent']
-        if self.task_matched:
-            if random.random() < 0.5:
-                is_matched = 0
-                other_datum = self.data[random.randint(0, len(self.data)-1)]
-                while other_datum['img_id'] == img_id:
-                    other_datum = self.data[random.randint(0, len(self.data)-1)]
-                sent = other_datum['sent']
-        # Label, convert answer to id
-        if 'label' in datum:
-            label = datum['label'].copy()
-            for ans in list(label.keys()):
-                label[self.raw_dataset.answer_table.ans2id(ans)] = label.pop(ans)
-        else:
-            label = None
-        # Create target
-        example = InputExample(
-            uid, sent, (feats, boxes),
-            (obj_labels, obj_confs), (attr_labels, attr_confs),
-            is_matched, label
-        )
-        return example
-class LXMERTEvaluator:
-    def __init__(self, dataset: LXMERTDataset):
-        self.raw_dataset = dataset
-        # Create QA Eval Data
-        self.data = []
-        for datum in self.raw_dataset.data:
-            sentf = datum['sentf']
-            for sents_cat, sents in sentf.items():
-                if sents_cat in datum['labelf']:    # A labeled dataset
-                    labels = datum['labelf'][sents_cat]
-                    for sent_idx, sent in enumerate(sents):
-                        new_datum = {
-                            'uid': make_uid(datum['img_id'], sents_cat, sent_idx),
-                            'img_id': datum['img_id'],
-                            'sent': sent,
-                            'dset': sents_cat,
-                            'label': labels[sent_idx]
-                        }
-                        self.data.append(new_datum)
-        # uid2datum
-        self.uid2datum = {}
-        for datum in self.data:
-            self.uid2datum[datum['uid']] = datum
-    def evaluate(self, uid2ans: dict, pprint=False):
-        score = 0.
-        cnt = 0
-        dset2score = defaultdict(lambda: 0.)
-        dset2cnt = defaultdict(lambda: 0)
-        for uid, ans in uid2ans.items():
-            if uid not in self.uid2datum:   # Not a labeled data
-                continue
-            datum = self.uid2datum[uid]
-            label = datum['label']
-            dset = datum['dset']
-            if ans in label:
-                score += label[ans]
-                dset2score[dset] += label[ans]
-            cnt += 1
-            dset2cnt[dset] += 1
-        accu = score / cnt
-        dset2accu = {}
-        for dset in dset2cnt:
-            dset2accu[dset] = dset2score[dset] / dset2cnt[dset]
-        if pprint:
-            accu_str = "Overall Accu %0.4f, " % (accu)
-            sorted_keys = sorted(dset2accu.keys())
-            for key in sorted_keys:
-                accu_str += "%s Accu %0.4f, " % (key, dset2accu[key])
-            print(accu_str)
-        return accu, dset2accu
-    def dump_result(self, uid2ans: dict, path):
-        raise NotImplemented

lxmert/src/pretrain/lxmert_pretrain.py DELETED Viewed

@@ -1,435 +0,0 @@
-# coding=utf-8
-# Copyleft 2019 project LXRT.
-import collections
-import os
-import random
-from tqdm import tqdm
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader
-from param import args
-from pretrain.lxmert_data import InputExample, LXMERTDataset, LXMERTTorchDataset, LXMERTEvaluator
-from lxrt.entry import set_visual_config
-from lxrt.tokenization import BertTokenizer
-from lxrt.modeling import LXRTPretraining
-DataTuple = collections.namedtuple("DataTuple", 'dataset torchdset loader evaluator')
-def get_tuple(splits: str, bs: int, shuffle=False, drop_last=False, topk=-1) -> DataTuple:
-    # Decide which QA datasets would be used in pre-training.
-    # Options: vqa, gqa, visual7w
-    # Note: visual7w is a part of vgqa, we take the name here.
-    qa_sets = args.qa_sets
-    if qa_sets is not None:
-        qa_sets = set(qa_set.lower().strip() for qa_set in qa_sets.split(","))
-    # Build dataset, data loader, and evaluator.
-    dset = LXMERTDataset(splits, qa_sets=qa_sets)
-    tset = LXMERTTorchDataset(dset, topk)
-    data_loader = DataLoader(
-        tset, batch_size=bs,
-        shuffle=shuffle, num_workers=args.num_workers,
-        collate_fn=lambda x: x,
-        drop_last=drop_last, pin_memory=True
-    )
-    evaluator = LXMERTEvaluator(dset)
-    print()
-    return DataTuple(dataset=dset, torchdset=tset, loader=data_loader, evaluator=evaluator)
-train_tuple = get_tuple(args.train, args.batch_size, shuffle=True, drop_last=True)
-valid_batch_size = 2048 if args.multiGPU else 512
-valid_tuple = get_tuple(args.valid, valid_batch_size, shuffle=False, drop_last=False, topk=5000)
-class InputFeatures(object):
-    """A single set of features of data."""
-    def __init__(self,
-                 input_ids, input_mask, segment_ids, lm_label_ids,
-                 visual_feats, obj_labels,
-                 is_matched, ans):
-        self.input_ids = input_ids
-        self.input_mask = input_mask
-        self.segment_ids = segment_ids
-        self.lm_label_ids = lm_label_ids
-        self.visual_feats = visual_feats
-        self.obj_labels = obj_labels
-        self.is_matched = is_matched
-        self.ans = ans
-def random_word(tokens, tokenizer):
-    """
-    Masking some random tokens for Language Model task with probabilities as in the original BERT paper.
-    :param tokens: list of str, tokenized sentence.
-    :param tokenizer: Tokenizer, object used for tokenization (we need it's vocab here)
-    :return: (list of str, list of int), masked tokens and related labels for LM prediction
-    """
-    output_label = []
-    for i, token in enumerate(tokens):
-        prob = random.random()
-        # mask token with probability
-        ratio = args.word_mask_rate
-        if prob < ratio:
-            prob /= ratio
-            # 80% randomly change token to mask token
-            if prob < 0.8:
-                tokens[i] = "[MASK]"
-            # 10% randomly change token to random token
-            elif prob < 0.9:
-                tokens[i] = random.choice(list(tokenizer.vocab.items()))[0]
-            # -> rest 10% randomly keep current token
-            # append current token to output (we will predict these later)
-            try:
-                output_label.append(tokenizer.vocab[token])
-            except KeyError:
-                # For unknown words (should not occur with BPE vocab)
-                output_label.append(tokenizer.vocab["[UNK]"])
-        else:
-            # no masking token (will be ignored by loss function later)
-            output_label.append(-1)
-    return tokens, output_label
-def random_feat(feats):
-    mask_feats = feats.copy()
-    feat_mask = np.zeros(len(feats), dtype=np.float32)
-    for i in range(len(feats)):
-        prob = random.random()
-        # mask token with probability
-        if prob < args.obj_mask_rate:
-            prob /= args.obj_mask_rate
-            # 80% randomly change token to zero feat
-            if prob < 0.8:
-                mask_feats[i, :] = 0.
-            # 10% randomly change token to random feat
-            elif prob < 0.9:
-                mask_feats[i, :] = train_tuple.torchdset.random_feat()
-            # -> rest 10% randomly keep current feat
-            # Need to predict this feat
-            feat_mask[i] = 1.
-    return mask_feats, feat_mask
-def convert_example_to_features(example: InputExample, max_seq_length, tokenizer)->InputFeatures:
-    """
-    Convert a raw sample (pair of sentences as tokenized strings) into a proper training sample with
-    IDs, LM labels, input_mask, CLS and SEP tokens etc.
-    :param example: InputExample, containing sentence input as strings and is_next label
-    :param max_seq_length: int, maximum length of sequence.
-    :param tokenizer: Tokenizer
-    :return: InputFeatures, containing all inputs and labels of one sample as IDs (as used for model training)
-    """
-    tokens = tokenizer.tokenize(example.sent.strip())
-    # Account for [CLS] and [SEP] with "- 2"
-    if len(tokens) > max_seq_length - 2:
-        tokens = tokens[:(max_seq_length - 2)]
-    # Ge random words
-    masked_tokens, masked_label = random_word(tokens, tokenizer)
-    # concatenate lm labels and account for CLS, SEP, SEP
-    masked_tokens = ['[CLS]'] + masked_tokens + ['[SEP]']
-    input_ids = tokenizer.convert_tokens_to_ids(masked_tokens)
-    # Mask & Segment Word
-    lm_label_ids = ([-1] + masked_label + [-1])
-    input_mask = [1] * len(input_ids)
-    segment_ids = [0] * len(input_ids)
-    # Zero-pad up to the sequence length.
-    while len(input_ids) < max_seq_length:
-        input_ids.append(0)
-        input_mask.append(0)
-        segment_ids.append(0)
-        lm_label_ids.append(-1)
-    assert len(input_ids) == max_seq_length
-    assert len(input_mask) == max_seq_length
-    assert len(segment_ids) == max_seq_length
-    assert len(lm_label_ids) == max_seq_length
-    feat, boxes = example.visual_feats
-    obj_labels, obj_confs = example.obj_labels
-    attr_labels, attr_confs = example.attr_labels
-    # Mask Image Features:
-    masked_feat, feat_mask = random_feat(feat)
-    # QA answer label
-    if example.label is None or len(example.label) == 0 or example.is_matched != 1:
-        # 1. No label 2. Label is pruned 3. unmatched visual + language pair
-        ans = -1
-    else:
-        keys, values = zip(*example.label.items())
-        if len(keys) == 1:
-            ans = keys[0]
-        else:
-            value_sum = sum(values)
-            prob = [value / value_sum for value in values]
-            choice = np.random.multinomial(1, prob).argmax()
-            ans = keys[choice]
-    features = InputFeatures(
-        input_ids=input_ids,
-        input_mask=input_mask,
-        segment_ids=segment_ids,
-        lm_label_ids=lm_label_ids,
-        visual_feats=(masked_feat, boxes),
-        obj_labels={
-            'obj': (obj_labels, obj_confs),
-            'attr': (attr_labels, attr_confs),
-            'feat': (feat, feat_mask),
-        },
-        is_matched=example.is_matched,
-        ans=ans,
-    )
-    return features
-LOSSES_NAME = ('Mask_LM', 'Matched', 'Obj', 'Attr', 'Feat', 'QA')
-class LXMERT:
-    def __init__(self, max_seq_length):
-        super().__init__()
-        self.max_seq_length = max_seq_length
-        self.tokenizer = BertTokenizer.from_pretrained(
-            "bert-base-uncased",
-            do_lower_case=True
-        )
-        # Build model
-        set_visual_config(args)
-        self.model = LXRTPretraining.from_pretrained(
-            "bert-base-uncased",
-            task_mask_lm=args.task_mask_lm,
-            task_obj_predict=args.task_obj_predict,
-            task_matched=args.task_matched,
-            task_qa=args.task_qa,
-            visual_losses=args.visual_losses,
-            num_answers=train_tuple.dataset.answer_table.num_answers
-        )
-        # Weight initialization and loading
-        if args.from_scratch:
-            print("Train from Scratch: re-initialize all BERT weights.")
-            self.model.apply(self.model.init_bert_weights)
-        if args.load is not None:
-            self.load(args.load)
-        if args.load_lxmert is not None:
-            # Load lxmert would not load the answer head.
-            self.load_lxmert(args.load_lxmert)
-        # GPU Options
-        self.model = self.model.cuda()
-        if args.multiGPU:
-            self.model = nn.DataParallel(self.model)
-    def forward(self, examples):
-        train_features = [convert_example_to_features(example, self.max_seq_length, self.tokenizer)
-                          for example in examples]
-        # language Inputs
-        input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long).cuda()
-        input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long).cuda()
-        segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long).cuda()
-        # Visual Inputs
-        feats = torch.from_numpy(np.stack([f.visual_feats[0] for f in train_features])).cuda()
-        pos = torch.from_numpy(np.stack([f.visual_feats[1] for f in train_features])).cuda()
-        # Language Prediction
-        lm_labels = torch.tensor([f.lm_label_ids for f in train_features], dtype=torch.long).cuda()
-        # Visual Prediction
-        obj_labels = {}
-        for key in ('obj', 'attr', 'feat'):
-            visn_labels = torch.from_numpy(np.stack([f.obj_labels[key][0] for f in train_features])).cuda()
-            visn_mask = torch.from_numpy(np.stack([f.obj_labels[key][1] for f in train_features])).cuda()
-            assert visn_labels.size(0) == visn_mask.size(0) and visn_labels.size(1) == visn_mask.size(1)
-            obj_labels[key] = (visn_labels, visn_mask)
-        # Joint Prediction
-        matched_labels = torch.tensor([f.is_matched for f in train_features], dtype=torch.long).cuda()
-        ans = torch.from_numpy(np.stack([f.ans for f in train_features])).cuda()
-        """
-        forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None,
-                visual_feats=None, pos=None, obj_labels=None, matched_label=None, ans=None):
-        """
-        loss, losses, ans_logit = self.model(
-            input_ids, segment_ids, input_mask, lm_labels,
-            feats, pos, obj_labels, matched_labels, ans
-        )
-        return loss, losses.detach().cpu(), ans_logit
-    def train_batch(self, optim, batch):
-        optim.zero_grad()
-        loss, losses, ans_logit = self.forward(batch)
-        if args.multiGPU:
-            loss = loss.mean()
-            losses = losses.mean(0)
-        loss.backward()
-        nn.utils.clip_grad_norm_(self.model.parameters(), 1.)
-        optim.step()
-        return loss.item(), losses.cpu().numpy(), ans_logit
-    def valid_batch(self, batch):
-        with torch.no_grad():
-            loss, losses, ans_logit = self.forward(batch)
-            if args.multiGPU:
-                loss = loss.mean()
-                losses = losses.mean(0)
-        return loss.item(), losses.cpu().numpy(), ans_logit
-    def train(self, train_tuple: DataTuple, eval_tuple: DataTuple):
-        train_ld = train_tuple.loader
-        # Optimizer
-        from lxrt.optimization import BertAdam
-        batch_per_epoch = len(train_ld)
-        t_total = int(batch_per_epoch * args.epochs)
-        warmup_ratio = 0.05
-        warmup_iters = int(t_total * warmup_ratio)
-        print("Batch per epoch: %d" % batch_per_epoch)
-        print("Total Iters: %d" % t_total)
-        print("Warm up Iters: %d" % warmup_iters)
-        optim = BertAdam(self.model.parameters(), lr=args.lr, warmup=warmup_ratio, t_total=t_total)
-        # Train
-        best_eval_loss = 9595.
-        for epoch in range(args.epochs):
-            # Train
-            self.model.train()
-            total_loss = 0.
-            total_losses = 0.
-            uid2ans = {}
-            for batch in tqdm(train_ld, total=len(train_ld)):
-                loss, losses, logit = self.train_batch(optim, batch)
-                total_loss += loss
-                total_losses += losses
-                if args.task_qa:
-                    score, label = logit.max(1)
-                    for datum, l in zip(batch, label.cpu().numpy()):
-                        uid = datum.uid
-                        ans = train_tuple.dataset.answer_table.id2ans(l)
-                        uid2ans[uid] = ans
-            print("The training loss for Epoch %d is %0.4f" % (epoch, total_loss / batch_per_epoch))
-            losses_str = "The losses are "
-            for name, loss in zip(LOSSES_NAME, total_losses):
-                losses_str += "%s: %0.4f " % (name, loss / batch_per_epoch)
-            print(losses_str)
-            if args.task_qa:
-                train_tuple.evaluator.evaluate(uid2ans, pprint=True)
-            # Eval
-            avg_eval_loss = self.evaluate_epoch(eval_tuple, iters=-1)
-            # Save
-            if avg_eval_loss < best_eval_loss:
-                best_eval_loss = avg_eval_loss
-                self.save("BEST_EVAL_LOSS")
-            self.save("Epoch%02d" % (epoch+1))
-    def evaluate_epoch(self, eval_tuple: DataTuple, iters: int=-1):
-        self.model.eval()
-        eval_ld = eval_tuple.loader
-        total_loss = 0.
-        total_losses = 0.
-        uid2ans = {}
-        for i, batch in enumerate(eval_ld):
-            loss, losses, logit = self.valid_batch(batch)
-            total_loss += loss
-            total_losses += losses
-            if args.task_qa:
-                score, label = logit.max(1)
-                for datum, l in zip(batch, label.cpu().numpy()):
-                    uid = datum.uid
-                    ans = train_tuple.dataset.answer_table.id2ans(l)
-                    uid2ans[uid] = ans
-            if i == iters:
-                break
-        print("The valid loss is %0.4f" % (total_loss / len(eval_ld)))
-        losses_str = "The losses are "
-        for name, loss in zip(LOSSES_NAME, total_losses / len(eval_ld)):
-            losses_str += "%s: %0.4f " % (name, loss)
-        print(losses_str)
-        if args.task_qa:
-            eval_tuple.evaluator.evaluate(uid2ans, pprint=True)
-        return total_loss / len(eval_ld)
-    def save(self, name):
-        torch.save(self.model.state_dict(),
-                   os.path.join(args.output, "%s_LXRT.pth" % name))
-    def load(self, path):
-        print("Load BERT extractor from %s" % path)
-        state_dict = torch.load("%s_LXRT.pth" % path)
-        self.model.load_state_dict(state_dict)
-    def load_lxmert(self, path):
-        print("Load lxmert model from %s" % path)
-        state_dict = torch.load("%s_LXRT.pth" % path)
-        # Do not load any answer head
-        for key in list(state_dict.keys()):
-            if 'answer' in key:
-                state_dict.pop(key)
-        # Change Multi GPU to single GPU
-        new_state_dict = {}
-        for key, value in state_dict.items():
-            if key.startswith("module."):
-                new_state_dict[key[len("module."):]] = value
-        state_dict = new_state_dict
-        load_keys = set(state_dict.keys())
-        model_keys = set(self.model.state_dict().keys())
-        print()
-        print("Keys in loaded but not in model:")
-        for key in sorted(load_keys.difference(model_keys)):
-            print(key)
-        print()
-        print("Keys in model but not in loaded:")
-        for key in sorted(model_keys.difference(load_keys)):
-            print(key)
-        print()
-        self.model.load_state_dict(state_dict, strict=False)
-if __name__ == "__main__":
-    lxmert = LXMERT(max_seq_length=20)
-    lxmert.train(train_tuple, valid_tuple)

lxmert/src/pretrain/qa_answer_table.py DELETED Viewed

@@ -1,158 +0,0 @@
-# coding=utf-8
-# Copyleft 2019 project LXRT.
-import json
-import torch
-class AnswerTable:
-    ANS_CONVERT = {
-        "a man": "man",
-        "the man": "man",
-        "a woman": "woman",
-        "the woman": "woman",
-        'one': '1',
-        'two': '2',
-        'three': '3',
-        'four': '4',
-        'five': '5',
-        'six': '6',
-        'seven': '7',
-        'eight': '8',
-        'nine': '9',
-        'ten': '10',
-        'grey': 'gray',
-    }
-    def __init__(self, dsets=None):
-        self.all_ans = json.load(open("data/lxmert/all_ans.json"))
-        if dsets is not None:
-            dsets = set(dsets)
-            # If the answer is used in the dsets
-            self.anss = [ans['ans'] for ans in self.all_ans if
-                         len(set(ans['dsets']) & dsets) > 0]
-        else:
-            self.anss = [ans['ans'] for ans in self.all_ans]
-        self.ans_set = set(self.anss)
-        self._id2ans_map = self.anss
-        self._ans2id_map = {ans: ans_id for ans_id, ans in enumerate(self.anss)}
-        assert len(self._id2ans_map) == len(self._ans2id_map)
-        for ans_id, ans in enumerate(self._id2ans_map):
-            assert self._ans2id_map[ans] == ans_id
-    def convert_ans(self, ans):
-        if len(ans) == 0:
-            return ""
-        ans = ans.lower()
-        if ans[-1] == '.':
-            ans = ans[:-1].strip()
-        if ans.startswith("a "):
-            ans = ans[2:].strip()
-        if ans.startswith("an "):
-            ans = ans[3:].strip()
-        if ans.startswith("the "):
-            ans = ans[4:].strip()
-        if ans in self.ANS_CONVERT:
-            ans = self.ANS_CONVERT[ans]
-        return ans
-    def ans2id(self, ans):
-        return self._ans2id_map[ans]
-    def id2ans(self, ans_id):
-        return self._id2ans_map[ans_id]
-    def ans2id_map(self):
-        return self._ans2id_map.copy()
-    def id2ans_map(self):
-        return self._id2ans_map.copy()
-    def used(self, ans):
-        return ans in self.ans_set
-    def all_answers(self):
-        return self.anss.copy()
-    @property
-    def num_answers(self):
-        return len(self.anss)
-def load_lxmert_qa(path, model, label2ans):
-    """
-    Load model weights from lxmert pre-training.
-    The answers in the fine-tuned QA task (indicated by label2ans)
-        would also be properly initialized with lxmert pre-trained
-        QA heads.
-    :param path: Path to lxmert snapshot.
-    :param model: LXRT model instance.
-    :param label2ans: The label2ans dict of fine-tuned QA datasets, like
-        {0: 'cat', 1: 'dog', ...}
-    :return:
-    """
-    print("Load QA pre-trained lxmert from %s " % path)
-    loaded_state_dict = torch.load("%s_LXRT.pth" % path)
-    model_state_dict = model.state_dict()
-    # Handle Multi-GPU pre-training --> Single GPU fine-tuning
-    for key in list(loaded_state_dict.keys()):
-        loaded_state_dict[key.replace("module.", '')] = loaded_state_dict.pop(key)
-    # Isolate bert model
-    bert_state_dict = {}
-    for key, value in loaded_state_dict.items():
-        if key.startswith('bert.'):
-            bert_state_dict[key] = value
-    # Isolate answer head
-    answer_state_dict = {}
-    for key, value in loaded_state_dict.items():
-        if key.startswith("answer_head."):
-            answer_state_dict[key.replace('answer_head.', '')] = value
-    # Do surgery on answer state dict
-    ans_weight = answer_state_dict['logit_fc.3.weight']
-    ans_bias = answer_state_dict['logit_fc.3.bias']
-    import copy
-    new_answer_weight = copy.deepcopy(model_state_dict['logit_fc.3.weight'])
-    new_answer_bias = copy.deepcopy(model_state_dict['logit_fc.3.bias'])
-    answer_table = AnswerTable()
-    loaded = 0
-    unload = 0
-    if type(label2ans) is list:
-        label2ans = {label: ans for label, ans in enumerate(label2ans)}
-    for label, ans in label2ans.items():
-        new_ans = answer_table.convert_ans(ans)
-        if answer_table.used(new_ans):
-            ans_id_9500 = answer_table.ans2id(new_ans)
-            new_answer_weight[label] = ans_weight[ans_id_9500]
-            new_answer_bias[label] = ans_bias[ans_id_9500]
-            loaded += 1
-        else:
-            new_answer_weight[label] = 0.
-            new_answer_bias[label] = 0.
-            unload += 1
-    print("Loaded %d answers from LXRTQA pre-training and %d not" % (loaded, unload))
-    print()
-    answer_state_dict['logit_fc.3.weight'] = new_answer_weight
-    answer_state_dict['logit_fc.3.bias'] = new_answer_bias
-    # Load Bert Weights
-    bert_model_keys = set(model.lxrt_encoder.model.state_dict().keys())
-    bert_loaded_keys = set(bert_state_dict.keys())
-    assert len(bert_model_keys - bert_loaded_keys) == 0
-    model.lxrt_encoder.model.load_state_dict(bert_state_dict, strict=False)
-    # Load Answer Logic FC Weights
-    model_keys = set(model.state_dict().keys())
-    ans_loaded_keys = set(answer_state_dict.keys())
-    assert len(ans_loaded_keys - model_keys) == 0
-    model.load_state_dict(answer_state_dict, strict=False)