File size: 963 Bytes
f2c2a4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
import re
# Used to check our models performance on multiple choice tasks. This can also be done in a more involved way with e.g. LLM-as-a-judge
def check_multiple_choice_with_regex(model_outputs, correct_answers):
results = []
for model_output, correct_answer in zip(model_outputs, correct_answers):
correct_answer = correct_answer.upper()
# Look for the answer letter at the beginning of a line or as the last word
patterns = [
rf"\b{correct_answer}\b", # Word boundary around the answer letter
rf"\b{correct_answer}[.,)]", # Answer followed by punctuation
rf"\(.*{correct_answer}.*\)", # Answer within parentheses
]
match_found = False
for pattern in patterns:
if re.search(pattern, model_output):
match_found = True
break # Exit inner loop once a match is found
results.append(match_found)
return results |