Spaces:

gourisankar85
/

rag-bench-evaluation

Running

App Files Files Community

rag-bench-evaluation / scripts /prediction.py

gourisankar85

Upload 159 files

d1eeaf0 verified 5 months ago

raw

history blame contribute delete

2.54 kB


	# Check if the predicted answer matches the ground truth
	def check_answer(prediction, ground_truth):
	prediction = prediction.lower()
	if type(ground_truth) is not list:
	ground_truth = [ground_truth]
	labels = []
	for instance in ground_truth:
	flag = True
	if isinstance(instance, list):
	flag = False
	instance = [i.lower() for i in instance]
	for i in instance:
	if i in prediction:
	flag = True
	break
	else:
	instance = instance.lower()
	if instance not in prediction:
	flag = False
	labels.append(int(flag))
	return labels

	# Evaluate if the result is correct (non-zero indicates correctness)
	def get_evaluation(results):
	return 0 not in results

	# Generate prediction based on query, documents, and model
	def predict(query, ground_truth, docs, model, instruction, temperature):
	'''
	label: 0 for positive, 1 for negative, -1 for not enough information
	'''
	system_message = (
	'You are an accurate and reliable AI assistant that can answer questions with the help of external documents. '
	'Please note that external documents may contain noisy or factually incorrect information. If the information '
	'in the document contains the correct answer, you will give an accurate answer. If the information in the '
	'document does not contain the answer, you will generate "I can not answer the question because of the insufficient information in documents." '
	'If there are inconsistencies with the facts in some of the documents, please generate the response: "There are factual errors in the provided documents and provide the correct answer."'
	)

	if len(docs) == 0:
	text = instruction.format(QUERY=query, DOCS='')
	prediction = model.generate(text, temperature)
	else:
	docs = '\n'.join(docs)
	text = instruction.format(QUERY=query, DOCS=docs)
	prediction = model.generate(text, temperature, system_message)

	# Check if the prediction contains the 'insufficient information' phrase
	if 'insufficient information' in prediction:
	labels = [-1]
	else:
	labels = check_answer(prediction, ground_truth)

	# Check for factual errors in the prediction
	fact_label = 0
	if 'factual errors' in prediction:
	fact_label = 1

	return labels, prediction, fact_label