|
import re |
|
import json |
|
from typing import List, Union, Optional |
|
|
|
|
|
def extract_final_answer(output: str) -> str: |
|
""" |
|
Extracts the text after 'FINAL ANSWER:' in the model's output. |
|
Strips whitespace and ensures clean formatting. |
|
If the answer is a comma-separated list, ensures a space after each comma. |
|
""" |
|
output = str(output) |
|
marker = "FINAL ANSWER:" |
|
lower_output = output.lower() |
|
|
|
if marker.lower() in lower_output: |
|
|
|
idx = lower_output.rfind(marker.lower()) |
|
raw_answer = output[idx + len(marker) :].strip() |
|
|
|
|
|
cleaned_answer = re.sub(r",\s*", ", ", raw_answer) |
|
return cleaned_answer |
|
|
|
return output |
|
|
|
|
|
def replace_tool_mentions(prompt: str) -> str: |
|
|
|
prompt = re.sub(r"(?<!\w)`search`(?!\w)", "`web_search`", prompt) |
|
prompt = re.sub(r"(?<!\w)`wiki`(?!\w)", "`wikipedia_search`", prompt) |
|
|
|
|
|
|
|
prompt = re.sub(r"(?<!\w)(?<!_)search\(", "web_search(", prompt) |
|
prompt = re.sub(r"(?<!\w)(?<!_)wiki\(", "wikipedia_search(", prompt) |
|
|
|
return prompt |
|
|
|
def _question_matches(question: str, filters: Union[str, List[str]]) -> bool: |
|
"""Helper: check if question matches any string in filters.""" |
|
if isinstance(filters, str): |
|
filters = [filters] |
|
return any(f.lower() in question.lower() for f in filters) |
|
|
|
def load_online_qas( |
|
qa_type: Union[str, List[str]] = "all", |
|
has_file: Optional[bool] = None, |
|
file_path = "Final_Assignment_Template/allqas.jsonl" |
|
) -> List[dict]: |
|
""" |
|
Load online QAs from example_gaiaqa.json. |
|
|
|
Parameters: |
|
- qa_type: str or List[str], used to match substrings in the Question. Use "all" for no filtering. |
|
- has_file: bool or None, filters QAs by presence of 'file_name': |
|
- True: only include QAs with file_name |
|
- False: only include QAs without file_name |
|
- None: no file_name filtering |
|
- file_path: a path |
|
|
|
""" |
|
data = [] |
|
with open(file_path ,"r") as f: |
|
for line in f: |
|
entry = json.loads(line) |
|
data.append(entry) |
|
|
|
|
|
if has_file is True: |
|
data = [qa for qa in data if qa.get("file_name", "").strip()] |
|
elif has_file is False: |
|
data = [qa for qa in data if not qa.get("file_name", "").strip()] |
|
|
|
|
|
if qa_type == "all": |
|
return data |
|
|
|
return [qa for qa in data if _question_matches(qa.get("Question", ""), qa_type)] |
|
|
|
|
|
def load_test_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]: |
|
"""Loads test QAs with no attached files. Optionally filters by topic keywords in questions.""" |
|
test_docs = [] |
|
with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f: |
|
for line in f: |
|
entry = json.loads(line) |
|
if entry.get("file_name", "").strip() == "": |
|
test_docs.append(entry) |
|
|
|
if qa_type == "all": |
|
return [ |
|
{ |
|
"Question": e["Question"], |
|
"Final answer": e.get("Final answer"), |
|
"task_id": e["task_id"], |
|
"tools": e.get("Annotator Metadata", {}).get("Tools"), |
|
"file_name": e.get("file_name", "") |
|
} |
|
for e in test_docs |
|
] |
|
|
|
return [ |
|
{ |
|
"Question": e["Question"], |
|
"Final answer": e.get("Final answer"), |
|
"task_id": e["task_id"], |
|
"tools": e.get("Annotator Metadata", {}).get("Tools"), |
|
"file_name": e.get("file_name", "") |
|
} |
|
for e in test_docs |
|
if _question_matches(e["Question"], qa_type) |
|
] |
|
|
|
|
|
def load_val_qas(qa_type: Union[str, List[str]] = "all") -> List[dict]: |
|
"""Loads validation QAs with no attached files. Optionally filters by topic keywords in questions.""" |
|
val_docs = [] |
|
with open("Final_Assignment_Template/gaia_val.jsonl", "r") as f: |
|
for line in f: |
|
entry = json.loads(line) |
|
if entry.get("file_name", "").strip() == "": |
|
val_docs.append(entry) |
|
|
|
if qa_type == "all": |
|
return [ |
|
{ |
|
"Question": e["Question"], |
|
"Final answer": e.get("Final answer"), |
|
"task_id": e["task_id"], |
|
"tools": e.get("Annotator Metadata", {}).get("Tools"), |
|
"file_name": e.get("file_name", "") |
|
} |
|
for e in val_docs |
|
] |
|
|
|
return [ |
|
{ |
|
"Question": e["Question"], |
|
"Final answer": e.get("Final answer"), |
|
"task_id": e["task_id"], |
|
"tools": e.get("Annotator Metadata", {}).get("Tools"), |
|
"file_name": e.get("file_name", "") |
|
} |
|
for e in val_docs |
|
if _question_matches(e["Question"], qa_type) |
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|