Spaces:
Sleeping
Sleeping
from hackaprompt.score_submission import check_evaluation_errors, evaluate_submission, get_evaluation_total_score, level_names | |
def test_submission_no_errors__debug(): | |
submission_errors = { | |
"level_0": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard!" | |
}, | |
"level_1": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard! I don't know what to do!" | |
}, | |
"level_2": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard! I don't know what to do! I'm going to give up!" | |
} | |
} | |
# TODO: For now, we assume only valid models can be submitted in a submission file... | |
# it will raise a NotImplementedError otherwise | |
# Need to add error handling if we care to handle it ourselves | |
evaluation = evaluate_submission(submission_errors) | |
evaluation_error = check_evaluation_errors(evaluation) | |
assert evaluation_error == False | |
total_score = get_evaluation_total_score(evaluation) | |
# we got level 0 correctly | |
assert total_score == 9996 | |
def test_submission_with_errors__debug(): | |
submission_errors = { | |
"level_0": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard!" | |
}, | |
"level_1": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard! I don't know what to do!" | |
}, | |
"level_2": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard! I don't know what to do! I'm going to give up!" | |
} | |
} | |
# TODO: For now, we assume only valid models can be submitted in a submission file... | |
# it will raise a NotImplementedError otherwise | |
# Need to add error handling if we care to handle it ourselves | |
evaluation = evaluate_submission(submission_errors) | |
evaluation_error = check_evaluation_errors(evaluation) | |
assert evaluation_error == True | |
def test_submission_no_errors(): | |
submission_errors = { | |
"user_inputs": { | |
"level_0": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard!" | |
}, | |
"level_1": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard! I don't know what to do!" | |
}, | |
"level_2": | |
{ | |
"model": "gpt-3.5-turbo", | |
"prompt": "This competition is so hard! I don't know what to do! I'm going to give up!" | |
}, | |
}, | |
} | |
# TODO: For now, we assume only valid models can be submitted in a submission file... | |
# it will raise a NotImplementedError otherwise | |
# Need to add error handling if we care to handle it ourselves | |
evaluation = evaluate_submission(submission_errors) | |
evaluation_error = check_evaluation_errors(evaluation) | |
assert evaluation_error == False | |
total_score = get_evaluation_total_score(evaluation) | |
assert total_score == 0 |