Spaces:

ml-visoft
/

c-reviewer

Sleeping

App Files Files Community

c-reviewer / eval_code.py

ml-visoft

GUI for evaluation feedback

17cd01c 3 months ago

raw

history blame

3.82 kB

	import copy
	import json
	import time

	"""
	We will handle here the code evaluation phase.

	"""
	import json


	EVAL_ANSWER_NOEVAL = 0
	EVAL_ANSWER_POSITIVE = 1
	EVAL_ANSWER_NEGATIVE = -1

	CODE_AUGMENTATIONS=[
	("NO_COMPILE", "The code provided is not a valid C code"),
	("DRY", "Don't repeat yourself."),
	("SRP", "Single object[function] responsability"),
	("MC", "Magic constants."),
	("NAME", "Meaningful names in the code."),
	]


	def clean_prompt_answer(answer):
	"""
	Chatgpt4 is ok, does not pollute the code but 3.5 encloses it in ```

	:param answer:
	:return:
	"""
	cleaned = []
	for l in answer.split("\n"):
	if l.startswith("```"):
	continue
	else:
	cleaned.append(l)
	return "\n".join(cleaned)

	def parse_chatgpt_answer(ans_text):
	try:
	js_answer = json.loads(ans_text)
	except:
	# for now we dump the error in console
	import traceback
	exception = traceback.format_exc()
	print(exception)
	return {"error":exception}
	return js_answer

	def eval_code_by_chatgpt(openai_client, ccode):
	"""
	Will evaluate a piece of code using our heavily tuned prompt!

	:param openai_client:
	:param ccode:
	:return:
	"""
	# time.sleep(3)
	try:
	return """[
	{
	"criteria": "DRY",
	"explanation": "The memory allocation and initialization for ``p1``, ``p2``, and ``p3`` are repetitive. Consider creating a function like ``allocateAndInitializeMemory``."
	},
	{
	"criteria": "DRY",
	"explanation": "Tne second DRY failure, because this is the observed ChatGPT behaviour."
	},

	{
	"criteria": "SRP",
	"explanation": "The ``main`` function handles memory allocation, initialization, and printing. You should separate these responsibilities into different functions like ``allocateMemory``, ``initializeData``, and ``printData``."
	},
	{
	"criteria": "NAME",
	"explanation": "``x1`` should be called ``title``, ``y1`` should be called ``author``, ``z1`` should be called ``year``, ``p1`` should be called ``titlePtr``, ``p2`` should be called ``authorPtr``, ``p3`` should be called ``yearPtr``."
	}
	]"""
	assert openai_client is not None
	except:
	import traceback
	traceback.print_exc()
	return {"error":"There was an error while parsing the answer. Maybe ChatGPT is overloaded?"}



	def add_evaluation_fields_on_js_answer(json_answer, all_criterias = None):
	"""
	Adds some JSON fields to store the human feedback.

	The textual human feedback will always be in the 0 position.

	:param json_answer:
	:return:
	"""
	if all_criterias is None:
	all_criterias = CODE_AUGMENTATIONS
	enhanced_answer = []
	overall_feedback = {
	"criteria":"HUMAN_FEEDBACK",
	"explanation":"",
	"EVAL": EVAL_ANSWER_NOEVAL
	}

	if all_criterias is not None:
	existing = {c["criteria"] for c in json_answer}
	for criteria in all_criterias:
	if criteria[0] not in existing:
	json_answer.append({"criteria":criteria[0], "explanation":"Not infringed"})

	enhanced_answer.append(overall_feedback)
	for ans in json_answer:
	ans = copy.deepcopy(ans)
	ans["EVAL"] = EVAL_ANSWER_NOEVAL
	enhanced_answer.append(ans)

	return enhanced_answer

	def eval_the_piece_of_c_code(openai_client, ccode):
	"""
	Main entrypoint to this module. Will be called from backend. Will block so multithreading pls.

	Will return a proprer json and will have EVAL fields, too.

	:param ccode:
	:return:
	"""
	chatgpt_ans = eval_code_by_chatgpt(openai_client, ccode)
	chatgpt_js = parse_chatgpt_answer(chatgpt_ans)
	enhanced_answer = add_evaluation_fields_on_js_answer(chatgpt_js)
	return enhanced_answer