question-answering-qa-may-tablang / custom_pipeline.py
nathantablang's picture
Upload DemoT5QAPipeline
6da32f8 verified
import numpy as np
import tensorflow as tf
from transformers import QuestionAnsweringPipeline, pipeline, AutoModelForQuestionAnswering, TFAutoModelForQuestionAnswering, AutoTokenizer
from transformers.pipelines import PIPELINE_REGISTRY
class DemoT5QAPipeline(QuestionAnsweringPipeline):
# def preprocess(self, inputs, **kwargs):
# print(inputs)
# # The context is expected to be the GPT cache guess
# self.gpt_guess = inputs.context
# # The main input is the question
# inputs = inputs.question
# return super().preprocess(inputs, **kwargs)
def preprocess(self, inputs):
# Ensure inputs are in the correct format
print("Received inputs:", inputs)
if isinstance(inputs, dict) and 'question' in inputs and 'context' in inputs:
return super().preprocess(question=inputs['question'], context=inputs['context'])
else:
raise ValueError("Inputs must be a dictionary with 'question' and 'context' keys.")
def _forward(self, model_inputs, **generate_kwargs):
if self.framework == "pt":
in_b, input_length = model_inputs["input_ids"].shape
elif self.framework == "tf":
in_b, input_length = tf.shape(model_inputs["input_ids"]).numpy()
self.check_inputs(
input_length,
generate_kwargs.get("min_length", self.model.config.min_length),
generate_kwargs.get("max_length", self.model.config.max_length),
)
outputs = self.model.generate(**model_inputs, **generate_kwargs, return_dict_in_generate=True, output_scores=True)
output_ids = outputs.sequences
out_b = output_ids.shape[0]
if self.framework == "pt":
output_ids = output_ids.reshape(in_b, out_b // in_b, *output_ids.shape[1:])
elif self.framework == "tf":
output_ids = tf.reshape(output_ids, (in_b, out_b // in_b, *output_ids.shape[1:]))
return {"output_ids": output_ids, "output_sequences": outputs.sequences, "output_scores": outputs.scores}
def postprocess(self, model_outputs):
guess_text = super().postprocess(model_outputs)[0]['generated_text']
transition_scores = self.model.compute_transition_scores(model_outputs['output_sequences'], model_outputs['output_scores'])
log_probs = np.round(np.exp(transition_scores.cpu().numpy()), 3)[0]
guess_prob = np.product(log_probs)
return {'guess': guess_text, 'confidence': guess_prob}