josepablonevermined
/

haystack_inference

Model card Files Files and versions

haystack_inference / handler.py

josepablonevermined's picture

josepablonevermined

Upload 2 files

a734561 over 2 years ago

history blame contribute delete

2.04 kB

	import os
	from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs
	from haystack.schema import Answer
	from haystack.document_stores import InMemoryDocumentStore
	from haystack.pipelines import ExtractiveQAPipeline
	from haystack.nodes import FARMReader, TfidfRetriever
	import logging
	import json

	os.environ['TOKENIZERS_PARALLELISM'] ="false"

	#Haystack Components
	def start_haystack():
	document_store = InMemoryDocumentStore()
	load_and_write_data(document_store)
	retriever = TfidfRetriever(document_store=document_store)
	reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2-distilled", use_gpu=True)
	pipeline = ExtractiveQAPipeline(reader, retriever)
	return pipeline

	def load_and_write_data(document_store):

	doc_dir = './dao_data'
	print("Loading data ...")

	docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
	document_store.write_documents(docs)


	class EndpointHandler():
	def __init__(self, path=""):
	# load the optimized model
	self.pipeline = start_haystack()


	def __call__(self, data):
	"""
	Args:
	data (:obj:):
	includes the input data and the parameters for the inference.
	Return:
	A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
	- "label": A string representing what the label/class is. There can be multiple labels.
	- "score": A score between 0 and 1 describing how confident the model is for this label/class.
	"""
	question = data.pop("question", None)
	if question is not None:
	prediction = self.pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
	else:
	return {}

	# postprocess the prediction
	response = { "answer": prediction['answers'][0].answer}
	return json.dumps(response)