import re | |
# Used to check our models performance on multiple choice tasks. This can also be done in a more involved way with e.g. LLM-as-a-judge | |
def check_multiple_choice_with_regex(model_outputs, correct_answers): | |
results = [] | |
for model_output, correct_answer in zip(model_outputs, correct_answers): | |
correct_answer = correct_answer.upper() | |
# Look for the answer letter at the beginning of a line or as the last word | |
patterns = [ | |
rf"\b{correct_answer}\b", # Word boundary around the answer letter | |
rf"\b{correct_answer}[.,)]", # Answer followed by punctuation | |
rf"\(.*{correct_answer}.*\)", # Answer within parentheses | |
] | |
match_found = False | |
for pattern in patterns: | |
if re.search(pattern, model_output): | |
match_found = True | |
break # Exit inner loop once a match is found | |
results.append(match_found) | |
return results |