|
from gpt_engineer.benchmark.types import Benchmark, Task |
|
from gpt_engineer.core.files_dict import FilesDict |
|
|
|
|
|
def load_gptme(): |
|
return Benchmark( |
|
name="gptme", |
|
tasks=[ |
|
Task( |
|
name="hello", |
|
initial_code=FilesDict({"hello.py": "print('Hello, world!')"}), |
|
command="python hello.py", |
|
prompt="Change the code in hello.py to print 'Hello, human!'", |
|
assertions={ |
|
"correct output": lambda assertable: assertable.stdout |
|
== "Hello, human!\n", |
|
"correct file": lambda assertable: assertable.files[ |
|
"hello.py" |
|
].strip() |
|
== "print('Hello, human!')", |
|
}, |
|
), |
|
Task( |
|
name="hello-patch", |
|
initial_code=FilesDict({"hello.py": "print('Hello, world!')"}), |
|
command="python hello.py", |
|
prompt="Patch the code in hello.py to print 'Hello, human!'", |
|
assertions={ |
|
"correct output": lambda assertable: assertable.stdout |
|
== "Hello, human!\n", |
|
"correct file": lambda assertable: assertable.files[ |
|
"hello.py" |
|
].strip() |
|
== "print('Hello, human!')", |
|
}, |
|
), |
|
Task( |
|
name="hello-ask", |
|
initial_code=FilesDict({"hello.py": "print('Hello, world!')"}), |
|
command="echo 'Erik' | python hello.py", |
|
prompt="modify hello.py to ask the user for their name and print 'Hello, <name>!'. don't try to execute it", |
|
assertions={ |
|
"correct output": lambda assertable: "Hello, Erik!" |
|
in assertable.stdout, |
|
}, |
|
), |
|
Task( |
|
name="prime100", |
|
initial_code=FilesDict( |
|
{} |
|
), |
|
command="python prime.py", |
|
prompt="write a script prime.py that computes and prints the 100th prime number", |
|
assertions={ |
|
"correct output": lambda assertable: "541" |
|
in assertable.stdout.split(), |
|
}, |
|
), |
|
Task( |
|
name="init-git", |
|
initial_code=FilesDict( |
|
{} |
|
), |
|
command="git status", |
|
prompt="initialize a git repository, write a main.py file, and commit it", |
|
assertions={ |
|
"clean exit": lambda assertable: assertable.process.returncode == 0, |
|
"clean working tree": lambda assertable: "nothing to commit, working tree clean" |
|
in assertable.stdout, |
|
"main.py exists": lambda assertable: "main.py" in assertable.files, |
|
"we have a commit": lambda assertable: "No commits yet" |
|
not in assertable.stdout, |
|
}, |
|
), |
|
], |
|
) |
|
|