File size: 2,505 Bytes
fa01b79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ab090
fa01b79
 
 
 
 
 
 
 
 
 
31f75a0
 
fa01b79
 
 
 
 
62ab090
fa01b79
 
 
62ab090
fa01b79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from pathlib import Path
from transformers import AutoTokenizer, pipeline
import numpy as np
import torch
import torch.nn as nn
from text_utils import post_process_answer
from graph_utils import find_best_cluster
from optimum.intel import OVModelForQuestionAnswering
import os
import json
from text_utils import * 


# os.environ['HTTP_PROXY'] = 'http://proxy.hcm.fpt.vn:80'
class QAEnsembleModel_modify(nn.Module):
    
    # def __init__(self, model_name, model_checkpoints, entity_dict,
    #              thr=0.1, device="cuda:0"):
    def __init__(self, model_name, entity_dict,
                 thr=0.1, device="cpu"):
        super(QAEnsembleModel_modify, self).__init__()
        self.nlps = []
        # model_checkpoint = "./data/qa_model_robust.bin"
        AUTH_TOKEN = "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"
        # model_checkpoint = "letrunglinh/qa_pnc"
        model_convert = OVModelForQuestionAnswering.from_pretrained(model_name, export= True, use_auth_token= AUTH_TOKEN)
        model_convert.half()
        model_convert.compile()
        nlp = pipeline('question-answering', model=model_convert,
                        tokenizer=model_name)
        self.nlps.append(nlp)
        self.entity_dict = entity_dict
        self.thr = thr
    
    def forward(self, question, texts, ranking_scores=None):
        if ranking_scores is None:
            ranking_scores = np.ones((len(texts),))
    
        curr_answers = []
        curr_scores = []
        best_score = 0
        for i, nlp in enumerate(self.nlps):
            for text, score in zip(texts, ranking_scores):
                QA_input = {
                    'question': question,
                    'context': text
                }
                res = nlp(QA_input)
                print(res)
                if res["score"] > self.thr:
                    curr_answers.append(res["answer"])
                    curr_scores.append(res["score"])
                res["score"] = res["score"] * score
                if i == 0:
                    if res["score"] > best_score:
                        answer = res["answer"]
                        best_score = res["score"]
        if len(curr_answers) == 0:
            return None
        curr_answers = [post_process_answer(x, self.entity_dict) for x in curr_answers]
        answer = post_process_answer(answer, self.entity_dict)
        new_best_answer = post_process_answer(find_best_cluster(curr_answers, answer), self.entity_dict)
        return new_best_answer