Spaces:

rm-lht
/

lightrag

Configuration error

lightrag / examples /batch_eval.py

Sanketh Kumar

chore: added pre-commit-hooks and ruff formatting for commit-hooks

df22b26 about 1 year ago

3.57 kB

	import re
	import json
	import jsonlines

	from openai import OpenAI


	def batch_eval(query_file, result1_file, result2_file, output_file_path):
	client = OpenAI()

	with open(query_file, "r") as f:
	data = f.read()

	queries = re.findall(r"- Question \d+: (.+)", data)

	with open(result1_file, "r") as f:
	answers1 = json.load(f)
	answers1 = [i["result"] for i in answers1]

	with open(result2_file, "r") as f:
	answers2 = json.load(f)
	answers2 = [i["result"] for i in answers2]

	requests = []
	for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):
	sys_prompt = """
	---Role---
	You are an expert tasked with evaluating two answers to the same question based on three criteria: Comprehensiveness, Diversity, and Empowerment.
	"""

	prompt = f"""
	You will evaluate two answers to the same question based on three criteria: Comprehensiveness, Diversity, and Empowerment.

	- Comprehensiveness: How much detail does the answer provide to cover all aspects and details of the question?
	- Diversity: How varied and rich is the answer in providing different perspectives and insights on the question?
	- Empowerment: How well does the answer help the reader understand and make informed judgments about the topic?

	For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.

	Here is the question:
	{query}

	Here are the two answers:

	Answer 1:
	{answer1}

	Answer 2:
	{answer2}

	Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.

	Output your evaluation in the following JSON format:

	{{
	"Comprehensiveness": {{
	"Winner": "[Answer 1 or Answer 2]",
	"Explanation": "[Provide explanation here]"
	}},
	"Empowerment": {{
	"Winner": "[Answer 1 or Answer 2]",
	"Explanation": "[Provide explanation here]"
	}},
	"Overall Winner": {{
	"Winner": "[Answer 1 or Answer 2]",
	"Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
	}}
	}}
	"""

	request_data = {
	"custom_id": f"request-{i+1}",
	"method": "POST",
	"url": "/v1/chat/completions",
	"body": {
	"model": "gpt-4o-mini",
	"messages": [
	{"role": "system", "content": sys_prompt},
	{"role": "user", "content": prompt},
	],
	},
	}

	requests.append(request_data)

	with jsonlines.open(output_file_path, mode="w") as writer:
	for request in requests:
	writer.write(request)

	print(f"Batch API requests written to {output_file_path}")

	batch_input_file = client.files.create(
	file=open(output_file_path, "rb"), purpose="batch"
	)
	batch_input_file_id = batch_input_file.id

	batch = client.batches.create(
	input_file_id=batch_input_file_id,
	endpoint="/v1/chat/completions",
	completion_window="24h",
	metadata={"description": "nightly eval job"},
	)

	print(f"Batch {batch.id} has been created.")


	if __name__ == "__main__":
	batch_eval()