|
|
|
|
|
def check_answer(prediction, ground_truth):
|
|
prediction = prediction.lower()
|
|
if type(ground_truth) is not list:
|
|
ground_truth = [ground_truth]
|
|
labels = []
|
|
for instance in ground_truth:
|
|
flag = True
|
|
if isinstance(instance, list):
|
|
flag = False
|
|
instance = [i.lower() for i in instance]
|
|
for i in instance:
|
|
if i in prediction:
|
|
flag = True
|
|
break
|
|
else:
|
|
instance = instance.lower()
|
|
if instance not in prediction:
|
|
flag = False
|
|
labels.append(int(flag))
|
|
return labels
|
|
|
|
|
|
def get_evaluation(results):
|
|
return 0 not in results
|
|
|
|
|
|
def predict(query, ground_truth, docs, model, instruction, temperature):
|
|
'''
|
|
label: 0 for positive, 1 for negative, -1 for not enough information
|
|
'''
|
|
system_message = (
|
|
'You are an accurate and reliable AI assistant that can answer questions with the help of external documents. '
|
|
'Please note that external documents may contain noisy or factually incorrect information. If the information '
|
|
'in the document contains the correct answer, you will give an accurate answer. If the information in the '
|
|
'document does not contain the answer, you will generate "I can not answer the question because of the insufficient information in documents." '
|
|
'If there are inconsistencies with the facts in some of the documents, please generate the response: "There are factual errors in the provided documents and provide the correct answer."'
|
|
)
|
|
|
|
if len(docs) == 0:
|
|
text = instruction.format(QUERY=query, DOCS='')
|
|
prediction = model.generate(text, temperature)
|
|
else:
|
|
docs = '\n'.join(docs)
|
|
text = instruction.format(QUERY=query, DOCS=docs)
|
|
prediction = model.generate(text, temperature, system_message)
|
|
|
|
|
|
if 'insufficient information' in prediction:
|
|
labels = [-1]
|
|
else:
|
|
labels = check_answer(prediction, ground_truth)
|
|
|
|
|
|
fact_label = 0
|
|
if 'factual errors' in prediction:
|
|
fact_label = 1
|
|
|
|
return labels, prediction, fact_label
|
|
|