| from typing import * |
| import contextlib |
| import signal |
|
|
| from .executor_utils import function_with_timeout |
|
|
|
|
| def evaluate_io( |
| sample_io: list[str], |
| completion: str, |
| timeout: int = 5, |
| stop_early: bool = False, |
| ): |
| test_log = "" |
| passed = True |
| for io in sample_io: |
| try: |
| code = ("from typing import *\n" if "from typing import *" not in completion else "") + \ |
| completion + "\n" + io + "\n" |
| function_with_timeout( |
| exec, |
| (code, globals()), |
| timeout |
| ) |
| test_log += f"passed in test case: {io}\n" |
| except Exception as e: |
| if stop_early: |
| return False, f"failed in test case: {io}\n" |
| passed = False |
| test_log += f"failed in test case: {io}\n" |
|
|
| return passed, test_log |
|
|
|
|
| def evaluate_io_et( |
| sample_io: list[str], |
| completion: str, |
| timeout: int = 5, |
| prompt: str = "", |
| ): |
| io = "\n".join(sample_io) |
| try: |
| code = ("from typing import *\n" if "from typing import *" not in completion else "") + \ |
| prompt + completion + "\n" + io + "\n" |
| function_with_timeout( |
| exec, |
| (code, globals()), |
| timeout |
| ) |
| return True |
| except Exception as e: |
| return False |
|
|
|
|
| def evaluate_functional_correctness( |
| problem: Dict, |
| completion: str, |
| timeout: int = 5, |
| test_key: str = "test", |
| ): |
| |
| |
| try: |
| code = ("from typing import *\n" if "from typing import *" not in completion else "") + \ |
| completion + "\n" + problem[test_key] + \ |
| "\n" + f"check({problem['entry_point']})" |
|
|
| function_with_timeout( |
| exec, |
| (code, globals()), |
| timeout |
| ) |
| return "passed" |
| except Exception as e: |
| return f"failed: {e}" |
|
|
|
|
| def evaluate_functional_correctness2( |
| problem: Dict, |
| completion: str, |
| timeout: float = 10, |
| ) -> Dict: |
|
|
| check_program = ( |
| |
| "from typing import *\n" + |
| completion + "\n" + |
| problem["test"] + "\n" + |
| f"check({problem['entry_point']})" |
| ) |
| |
|
|
| try: |
| exec(check_program) |
| return "passed" |
| except TimeoutException: |
| return "timed out" |
| except BaseException as e: |
| return f"failed: {e}" |
|
|
|
|
| class TimeoutException(Exception): |
| pass |
|
|