Spaces:
Paused
Paused
def get_question_text(problem): | |
question = problem['question'] | |
return question | |
def get_context_text(problem, use_caption): | |
txt_context = problem['hint'] | |
img_context = problem['caption'] if use_caption else "" | |
context = " ".join([txt_context, img_context]).strip() | |
if context == "": | |
context = "N/A" | |
return context | |
def get_choice_text(probelm, options): | |
choices = probelm['choices'] | |
choice_list = [] | |
for i, c in enumerate(choices): | |
choice_list.append("({}) {}".format(options[i], c)) | |
choice_txt = " ".join(choice_list) | |
#print(choice_txt) | |
return choice_txt | |
def get_answer(problem, options): | |
return options[problem['answer']] | |
def get_lecture_text(problem): | |
# \\n: GPT-3 can generate the lecture with more tokens. | |
lecture = problem['lecture'].replace("\n", "\\n") | |
return lecture | |
def get_solution_text(problem): | |
# \\n: GPT-3 can generate the solution with more tokens | |
solution = problem['solution'].replace("\n", "\\n") | |
return solution | |
def create_one_example_chatbot(format, question, context, choice, answer, lecture, solution, test_example=True): | |
input_format, output_format = format.split("-") | |
## Inputs | |
if input_format == "CQM": | |
input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" | |
elif input_format == "QCM": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" | |
# upper bound experiment | |
elif input_format == "QCML": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" | |
elif input_format == "QCME": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" | |
elif input_format == "QCMLE": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" | |
elif input_format == "QCLM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" | |
elif input_format == "QCEM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" | |
elif input_format == "QCLEM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" | |
# Outputs | |
if test_example: | |
output = "Answer:" | |
elif output_format == 'A': | |
output = f"Answer: The answer is {answer}." | |
elif output_format == 'AL': | |
output = f"Answer: The answer is {answer}. BECAUSE: {solution}" | |
elif output_format == 'AE': | |
output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" | |
elif output_format == 'ALE': | |
output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" | |
elif output_format == 'AEL': | |
output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" | |
elif output_format == 'LA': | |
output = f"Answer: {lecture} The answer is {answer}." | |
elif output_format == 'EA': | |
output = f"Answer: {solution} The answer is {answer}." | |
elif output_format == 'LEA': | |
output = f"Answer: {lecture} {solution} The answer is {answer}." | |
elif output_format == 'ELA': | |
output = f"Answer: {solution} {lecture} The answer is {answer}." | |
elif output_format == 'LEPA': | |
output = '' | |
if len(lecture.strip()) > 0: | |
output += f"LECTURE: {lecture}\n" | |
if len(solution.strip()) > 0: | |
output += f"SOLUTION: {solution}\n" | |
output += '###\n' | |
output += f"ANSWER: {answer}." | |
input = input.replace(" ", " ").strip() | |
output = output.replace(" ", " ").strip() | |
if input.endswith("BECAUSE:"): | |
input = input.replace("BECAUSE:", "").strip() | |
if output.endswith("BECAUSE:"): | |
output = output.replace("BECAUSE:", "").strip() | |
return input, output | |
def create_one_example(format, question, context, choice, answer, lecture, solution, test_example=True): | |
input_format, output_format = format.split("-") | |
## Inputs | |
if input_format == "CQM": | |
input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" | |
elif input_format == "QCM": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" | |
# upper bound experiment | |
elif input_format == "QCML": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" | |
elif input_format == "QCME": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" | |
elif input_format == "QCMLE": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" | |
elif input_format == "QCLM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" | |
elif input_format == "QCEM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" | |
elif input_format == "QCLEM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" | |
# Outputs | |
if test_example: | |
output = "Answer:" | |
elif output_format == 'A': | |
output = f"Answer: The answer is {answer}." | |
elif output_format == 'AL': | |
output = f"Answer: The answer is {answer}. BECAUSE: {solution}" | |
elif output_format == 'AE': | |
output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" | |
elif output_format == 'ALE': | |
output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" | |
elif output_format == 'AEL': | |
output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" | |
elif output_format == 'LA': | |
output = f"Answer: {lecture} The answer is {answer}." | |
elif output_format == 'EA': | |
output = f"Answer: {solution} The answer is {answer}." | |
elif output_format == 'LEA': | |
output = f"Answer: {lecture} {solution} The answer is {answer}." | |
elif output_format == 'ELA': | |
output = f"Answer: {solution} {lecture} The answer is {answer}." | |
text = input + output | |
text = text.replace(" ", " ").strip() | |
if text.endswith("BECAUSE:"): | |
text = text.replace("BECAUSE:", "").strip() | |
return text | |
def create_one_example_gpt4(format, question, context, choice, answer, lecture, solution, test_example=True): | |
input_format, output_format = format.split("-") | |
## Inputs | |
if input_format == "CQM": | |
input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" | |
elif input_format == "QCM": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" | |
# upper bound experiment | |
elif input_format == "QCML": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" | |
elif input_format == "QCME": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" | |
elif input_format == "QCMLE": | |
input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" | |
elif input_format == "QCLM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" | |
elif input_format == "QCEM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" | |
elif input_format == "QCLEM": | |
input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" | |
# Outputs | |
if test_example: | |
output = "Answer:" | |
elif output_format == 'A': | |
output = f"Answer: The answer is {answer}." | |
elif output_format == 'AL': | |
output = f"Answer: The answer is {answer}. BECAUSE: {solution}" | |
elif output_format == 'AE': | |
output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" | |
elif output_format == 'ALE': | |
output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" | |
elif output_format == 'AEL': | |
output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" | |
elif output_format == 'LA': | |
output = f"Answer: {lecture} The answer is {answer}." | |
elif output_format == 'EA': | |
output = f"Answer: {solution} The answer is {answer}." | |
elif output_format == 'LEA': | |
output = f"Answer: {lecture} {solution} The answer is {answer}." | |
elif output_format == 'ELA': | |
output = f"Answer: {solution} {lecture} The answer is {answer}." | |
input = input.replace(" ", " ").strip() | |
output = output.replace(" ", " ").strip() | |
if output.endswith("BECAUSE:"): | |
output = output.replace("BECAUSE:", "").strip() | |
user_prompt = {"role": "user", "content": f"Can you explain {input}?"} | |
assistant_prompt = {"role": "assistant", "content": f"{output}"} | |
return user_prompt, assistant_prompt | |
def build_prompt_chatbot(problems, shot_qids, prompt_format, use_caption=False, options=["A", "B", "C", "D", "E"], is_test=False): | |
examples = {} | |
for qid in shot_qids: | |
question = get_question_text(problems[qid]) | |
context = get_context_text(problems[qid], use_caption) | |
choice = get_choice_text(problems[qid], options) | |
answer = get_answer(problems[qid], options) | |
lecture = get_lecture_text(problems[qid]).replace('\\n', '\n') | |
solution = get_solution_text(problems[qid]).replace('\\n', '\n') | |
train_example = create_one_example_chatbot(prompt_format, | |
question, | |
context, | |
choice, | |
answer, | |
lecture, | |
solution, | |
test_example=is_test) | |
examples[qid] = train_example | |
return examples | |
def build_prompt(problems, shot_qids, test_qid, args): | |
examples = [] | |
# n-shot training examples | |
for qid in shot_qids: | |
question = get_question_text(problems[qid]) | |
context = get_context_text(problems[qid], args.use_caption) | |
choice = get_choice_text(problems[qid], args.options) | |
answer = get_answer(problems[qid], args.options) | |
lecture = get_lecture_text(problems[qid]) | |
solution = get_solution_text(problems[qid]) | |
train_example = create_one_example(args.prompt_format, | |
question, | |
context, | |
choice, | |
answer, | |
lecture, | |
solution, | |
test_example=False) | |
examples.append(train_example) | |
# test example | |
question = get_question_text(problems[test_qid]) | |
context = get_context_text(problems[test_qid], args.use_caption) | |
choice = get_choice_text(problems[test_qid], args.options) | |
answer = get_answer(problems[test_qid], args.options) | |
lecture = get_lecture_text(problems[test_qid]) | |
solution = get_solution_text(problems[test_qid]) | |
test_example = create_one_example(args.prompt_format, | |
question, | |
context, | |
choice, | |
answer, | |
lecture, | |
solution, | |
test_example=True) | |
examples.append(test_example) | |
# create the prompt input | |
prompt_input = '\n\n'.join(examples) | |
return prompt_input | |
def build_prompt_gpt4(problems, shot_qids, test_qid, args): | |
prompt_array = [{"role": "system", "content": "You are a helpful assistant."}] | |
# n-shot training examples | |
for qid in shot_qids: | |
question = get_question_text(problems[qid]) | |
context = get_context_text(problems[qid], args.use_caption) | |
choice = get_choice_text(problems[qid], args.options) | |
answer = get_answer(problems[qid], args.options) | |
lecture = get_lecture_text(problems[qid]) | |
solution = get_solution_text(problems[qid]) | |
user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format, | |
question, | |
context, | |
choice, | |
answer, | |
lecture, | |
solution, | |
test_example=False) | |
prompt_array.append(user_prompt) | |
prompt_array.append(assistant_prompt) | |
# test example | |
question = get_question_text(problems[test_qid]) | |
context = get_context_text(problems[test_qid], args.use_caption) | |
choice = get_choice_text(problems[test_qid], args.options) | |
answer = get_answer(problems[test_qid], args.options) | |
lecture = get_lecture_text(problems[test_qid]) | |
solution = get_solution_text(problems[test_qid]) | |
user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format, | |
question, | |
context, | |
choice, | |
answer, | |
lecture, | |
solution, | |
test_example=True) | |
prompt_array.append(user_prompt) | |
prompt_array.append(assistant_prompt) | |
return prompt_array |