|
from transformers import QuestionAnsweringPipeline, PretrainedConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering, AutoModelForCausalLM |
|
from transformers import pipeline, PretrainedConfig |
|
|
|
from huggingface_hub import PyTorchModelHubMixin |
|
|
|
import torch |
|
import torch.nn as nn |
|
import torch.nn.functional as F |
|
import os |
|
import json |
|
|
|
class HybridQAPipeline(QuestionAnsweringPipeline): |
|
def __init__(self, model=None, tokenizer=None, **kwargs): |
|
extractive_id = "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad" |
|
generative_id = "microsoft/Phi-3-mini-4k-instruct" |
|
self.config = HybridQAConfig(extractive_id, generative_id) |
|
self.model = HybridQAModel(self.config) |
|
super().__init__(model=self.model, tokenizer=tokenizer, **kwargs) |
|
self.model = HybridQAModel(self.config) |
|
|
|
def __call__(self, question, context): |
|
return self.model.predict(question, context) |
|
|
|
|
|
class HybridQAConfig(PretrainedConfig): |
|
def __init__( |
|
self, |
|
extractive_id=None, |
|
generative_id = None, |
|
**kwargs |
|
): |
|
self.extractive_id = extractive_id |
|
self.generative_id = generative_id |
|
super().__init__(**kwargs) |
|
|
|
class HybridQAModel(nn.Module, PyTorchModelHubMixin): |
|
|
|
def __init__(self, config): |
|
super().__init__() |
|
self.config = config |
|
self.load_models(config.extractive_id, config.generative_id) |
|
|
|
def can_generate(self): |
|
return False |
|
|
|
def load_models(self, extractive_id, generative_id): |
|
self.tokenizer_extractive = AutoTokenizer.from_pretrained(extractive_id, trust_remote_code=True) |
|
self.tokenizer_generative = AutoTokenizer.from_pretrained(generative_id, trust_remote_code=True) |
|
|
|
self.model_extractive = AutoModelForQuestionAnswering.from_pretrained(extractive_id, trust_remote_code=True) |
|
self.model_generative = AutoModelForCausalLM.from_pretrained(generative_id, trust_remote_code=True) |
|
|
|
def predict(self, question, context): |
|
result_gen, conf_gen = self.infer_generative(self.model_generative, self.tokenizer_generative, question) |
|
result_ext, conf_ext = self.infer_extractive(self.model_extractive, self.tokenizer_extractive, question, context) |
|
|
|
if len(result_gen) < 30 and conf_gen > conf_ext: |
|
return {'guess':result_gen, 'confidence':conf_gen} |
|
else: |
|
return {'guess':result_ext, 'confidence':conf_ext} |
|
|
|
def infer_generative(self, model, tokenizer, input_text, **generate_kwargs): |
|
max_input_length = min(tokenizer.model_max_length, model.config.max_length) |
|
input_text += " Do not output anything but the question's answer." |
|
messages = [ |
|
{"role": "user", "content": input_text} |
|
] |
|
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device) |
|
generated_outputs = model.generate(input_ids, max_new_tokens=256, temperature=0.5, output_scores=True, return_dict_in_generate=True) |
|
|
|
|
|
logits = generated_outputs.scores |
|
softmax_scores = [torch.softmax(logit, dim=-1) for logit in logits] |
|
max_confidence_scores = [score.max().item() for score in softmax_scores] |
|
average_confidence = sum(max_confidence_scores) / len(max_confidence_scores) |
|
|
|
decoded_output = tokenizer.decode(generated_outputs.sequences[0], skip_special_tokens=True) |
|
final_output = decoded_output[len(input_text):].split("\n")[-1] |
|
average_confidence, final_output |
|
return final_output, average_confidence |
|
|
|
|
|
def infer_extractive(self, model, tokenizer, question, context): |
|
qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer) |
|
result = qa_pipeline(question=question, context=context) |
|
confidence_score = result['score'] |
|
return result['answer'], confidence_score |
|
|
|
def save_pretrained(self, save_directory, **kwargs): |
|
if not os.path.exists(save_directory): |
|
os.makedirs(save_directory, exist_ok=True) |
|
self.config.save_pretrained(save_directory, **kwargs) |
|
self.model_extractive.save_pretrained(save_directory, **kwargs) |
|
self.tokenizer_extractive.save_pretrained(save_directory, **kwargs) |
|
self.model_generative.save_pretrained(save_directory, **kwargs) |
|
self.tokenizer_generative.save_pretrained(save_directory, **kwargs) |
|
|
|
def from_pretrained(cls, save_directory, *model_args, **model_kwargs): |
|
config = PretrainedConfig.from_pretrained(save_directory, trust_remote_code=True) |
|
model = HybridQAModel(config) |
|
|
|
model.model_extractive = AutoModelForQuestionAnswering.from_pretrained(save_directory, trust_remote_code=True) |
|
model.tokenizer_extractive = AutoTokenizer.from_pretrained(save_directory, trust_remote_code=True) |
|
model.model_generative = AutoModelForCausalLM.from_pretrained(save_directory, trust_remote_code=True) |
|
model.tokenizer_generative = AutoTokenizer.from_pretrained(save_directory, trust_remote_code=True) |
|
|
|
return model |