Spaces:
Build error
Build error
File size: 2,131 Bytes
7f7285f 817b9ee 7f7285f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# -*- coding: utf-8 -*-
'''
@Author : Jiangjie Chen
@Time : 2020/9/20 11:42
@Contact : jjchen19@fudan.edu.cn
@Description:
'''
import torch
from transformers import BertTokenizer
from .retrieval_model.bert_model import BertForSequenceEncoder
from .retrieval_model.models import inference_model
from .retrieval_model.data_loader import DataLoaderTest
class SentSelector:
def __init__(self, pretrained_bert_path, select_model_path, args):
self.args = args
self.use_cuda = self.args.use_cuda and torch.cuda.is_available()
self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
self.bert_model = BertForSequenceEncoder.from_pretrained(pretrained_bert_path)
self.rank_model = inference_model(self.bert_model, self.args)
self.rank_model.load_state_dict(torch.load(select_model_path,
map_location=None if self.use_cuda else torch.device('cpu'))['model'])
if self.use_cuda:
self.bert_model = self.bert_model.cuda()
self.rank_model.cuda()
def rank_sentences(self, js: list):
'''
:param js: [{'claim': xxx, 'id': xx, 'evidence': xxx}]
:return: [(ent, num, sent, prob), (ent, num, sent, prob)]
'''
data_reader = DataLoaderTest(js, self.tokenizer, self.args, self.use_cuda)
self.rank_model.eval()
all_predict = dict()
for inp_tensor, msk_tensor, seg_tensor, ids, evi_list in data_reader:
probs = self.rank_model(inp_tensor, msk_tensor, seg_tensor)
probs = probs.tolist()
assert len(probs) == len(evi_list)
for i in range(len(probs)):
if ids[i] not in all_predict:
all_predict[ids[i]] = []
# if probs[i][1] >= probs[i][0]:
all_predict[ids[i]].append(tuple(evi_list[i]) + (probs[i],))
results = {}
for k, v in all_predict.items():
sorted_v = sorted(v, key=lambda x: x[-1], reverse=True)
results[k] = sorted_v[:self.args.evi_num]
return results
|