reproduce / evaluations /pitfalls.py
attilasimko's picture
new evaluations
8ac76ef
raw
history blame contribute delete
635 Bytes
from .utils import log, fetch_code
import re
def evaluate(verbose, llm, zip, readme):
log(verbose, "TITLE", "\nLooking for common pitfalls (in development)...")
codebase = fetch_code(zip)
if (llm):
for code in codebase:
pitfall_check = llm.predict("STRICT", f"{codebase[code]}Do you find any signs of serious issues in this code?")
if (("Yes" in pitfall_check) & ("No" not in pitfall_check)):
log(verbose, "ERROR", f"Found possible issues in {code}")
log(verbose, "LOG", llm.predict("PITFALL", f"File name {code} file {codebase[code]}\n Can you find any signs of common pitfalls in this code?"))