|
import numpy as np |
|
import tensorflow as tf |
|
from transformers import QuestionAnsweringPipeline, pipeline, AutoModelForQuestionAnswering, TFAutoModelForQuestionAnswering, AutoTokenizer |
|
from transformers.pipelines import PIPELINE_REGISTRY |
|
|
|
class DemoT5QAPipeline(QuestionAnsweringPipeline): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess(self, inputs): |
|
|
|
print("Received inputs:", inputs) |
|
if isinstance(inputs, dict) and 'question' in inputs and 'context' in inputs: |
|
return super().preprocess(question=inputs['question'], context=inputs['context']) |
|
else: |
|
raise ValueError("Inputs must be a dictionary with 'question' and 'context' keys.") |
|
|
|
|
|
def _forward(self, model_inputs, **generate_kwargs): |
|
if self.framework == "pt": |
|
in_b, input_length = model_inputs["input_ids"].shape |
|
elif self.framework == "tf": |
|
in_b, input_length = tf.shape(model_inputs["input_ids"]).numpy() |
|
|
|
self.check_inputs( |
|
input_length, |
|
generate_kwargs.get("min_length", self.model.config.min_length), |
|
generate_kwargs.get("max_length", self.model.config.max_length), |
|
) |
|
|
|
outputs = self.model.generate(**model_inputs, **generate_kwargs, return_dict_in_generate=True, output_scores=True) |
|
output_ids = outputs.sequences |
|
out_b = output_ids.shape[0] |
|
if self.framework == "pt": |
|
output_ids = output_ids.reshape(in_b, out_b // in_b, *output_ids.shape[1:]) |
|
elif self.framework == "tf": |
|
output_ids = tf.reshape(output_ids, (in_b, out_b // in_b, *output_ids.shape[1:])) |
|
|
|
return {"output_ids": output_ids, "output_sequences": outputs.sequences, "output_scores": outputs.scores} |
|
|
|
def postprocess(self, model_outputs): |
|
guess_text = super().postprocess(model_outputs)[0]['generated_text'] |
|
transition_scores = self.model.compute_transition_scores(model_outputs['output_sequences'], model_outputs['output_scores']) |
|
log_probs = np.round(np.exp(transition_scores.cpu().numpy()), 3)[0] |
|
guess_prob = np.product(log_probs) |
|
return {'guess': guess_text, 'confidence': guess_prob} |