from hackaprompt.score_submission import check_evaluation_errors, evaluate_submission, get_evaluation_total_score, level_names def test_submission_no_errors__debug(): submission_errors = { "level_0": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard!" }, "level_1": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard! I don't know what to do!" }, "level_2": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard! I don't know what to do! I'm going to give up!" } } # TODO: For now, we assume only valid models can be submitted in a submission file... # it will raise a NotImplementedError otherwise # Need to add error handling if we care to handle it ourselves evaluation = evaluate_submission(submission_errors) evaluation_error = check_evaluation_errors(evaluation) assert evaluation_error == False total_score = get_evaluation_total_score(evaluation) # we got level 0 correctly assert total_score == 9996 def test_submission_with_errors__debug(): submission_errors = { "level_0": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard!" }, "level_1": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard! I don't know what to do!" }, "level_2": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard! I don't know what to do! I'm going to give up!" } } # TODO: For now, we assume only valid models can be submitted in a submission file... # it will raise a NotImplementedError otherwise # Need to add error handling if we care to handle it ourselves evaluation = evaluate_submission(submission_errors) evaluation_error = check_evaluation_errors(evaluation) assert evaluation_error == True def test_submission_no_errors(): submission_errors = { "user_inputs": { "level_0": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard!" }, "level_1": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard! I don't know what to do!" }, "level_2": { "model": "gpt-3.5-turbo", "prompt": "This competition is so hard! I don't know what to do! I'm going to give up!" }, }, } # TODO: For now, we assume only valid models can be submitted in a submission file... # it will raise a NotImplementedError otherwise # Need to add error handling if we care to handle it ourselves evaluation = evaluate_submission(submission_errors) evaluation_error = check_evaluation_errors(evaluation) assert evaluation_error == False total_score = get_evaluation_total_score(evaluation) assert total_score == 0