c-reviewer / eval_code.py
ml-visoft's picture
GUI for evaluation feedback
17cd01c
raw
history blame
3.82 kB
import copy
import json
import time
"""
We will handle here the code evaluation phase.
"""
import json
EVAL_ANSWER_NOEVAL = 0
EVAL_ANSWER_POSITIVE = 1
EVAL_ANSWER_NEGATIVE = -1
CODE_AUGMENTATIONS=[
("NO_COMPILE", "The code provided is not a valid C code"),
("DRY", "Don't repeat yourself."),
("SRP", "Single object[function] responsability"),
("MC", "Magic constants."),
("NAME", "Meaningful names in the code."),
]
def clean_prompt_answer(answer):
"""
Chatgpt4 is ok, does not pollute the code but 3.5 encloses it in ```
:param answer:
:return:
"""
cleaned = []
for l in answer.split("\n"):
if l.startswith("```"):
continue
else:
cleaned.append(l)
return "\n".join(cleaned)
def parse_chatgpt_answer(ans_text):
try:
js_answer = json.loads(ans_text)
except:
# for now we dump the error in console
import traceback
exception = traceback.format_exc()
print(exception)
return {"error":exception}
return js_answer
def eval_code_by_chatgpt(openai_client, ccode):
"""
Will evaluate a piece of code using our heavily tuned prompt!
:param openai_client:
:param ccode:
:return:
"""
# time.sleep(3)
try:
return """[
{
"criteria": "DRY",
"explanation": "The memory allocation and initialization for ``p1``, ``p2``, and ``p3`` are repetitive. Consider creating a function like ``allocateAndInitializeMemory``."
},
{
"criteria": "DRY",
"explanation": "Tne second DRY failure, because this is the observed ChatGPT behaviour."
},
{
"criteria": "SRP",
"explanation": "The ``main`` function handles memory allocation, initialization, and printing. You should separate these responsibilities into different functions like ``allocateMemory``, ``initializeData``, and ``printData``."
},
{
"criteria": "NAME",
"explanation": "``x1`` should be called ``title``, ``y1`` should be called ``author``, ``z1`` should be called ``year``, ``p1`` should be called ``titlePtr``, ``p2`` should be called ``authorPtr``, ``p3`` should be called ``yearPtr``."
}
]"""
assert openai_client is not None
except:
import traceback
traceback.print_exc()
return {"error":"There was an error while parsing the answer. Maybe ChatGPT is overloaded?"}
def add_evaluation_fields_on_js_answer(json_answer, all_criterias = None):
"""
Adds some JSON fields to store the human feedback.
The textual human feedback will always be in the 0 position.
:param json_answer:
:return:
"""
if all_criterias is None:
all_criterias = CODE_AUGMENTATIONS
enhanced_answer = []
overall_feedback = {
"criteria":"HUMAN_FEEDBACK",
"explanation":"",
"EVAL": EVAL_ANSWER_NOEVAL
}
if all_criterias is not None:
existing = {c["criteria"] for c in json_answer}
for criteria in all_criterias:
if criteria[0] not in existing:
json_answer.append({"criteria":criteria[0], "explanation":"Not infringed"})
enhanced_answer.append(overall_feedback)
for ans in json_answer:
ans = copy.deepcopy(ans)
ans["EVAL"] = EVAL_ANSWER_NOEVAL
enhanced_answer.append(ans)
return enhanced_answer
def eval_the_piece_of_c_code(openai_client, ccode):
"""
Main entrypoint to this module. Will be called from backend. Will block so multithreading pls.
Will return a proprer json and will have EVAL fields, too.
:param ccode:
:return:
"""
chatgpt_ans = eval_code_by_chatgpt(openai_client, ccode)
chatgpt_js = parse_chatgpt_answer(chatgpt_ans)
enhanced_answer = add_evaluation_fields_on_js_answer(chatgpt_js)
return enhanced_answer