Final_Assignment_GAIAAgent

Sleeping

App Files Files Community

Final_Assignment_GAIAAgent / src /gaia /agent /answer_formatter.py

JoachimVC

Implement full GAIA agent solution with formatter and multimodal processing

460ec88 3 months ago

raw

history blame contribute delete

4.91 kB

	"""
	GAIA Answer Formatter

	This module handles reformatting of agent responses to meet the GAIA benchmark requirements.
	It removes prefixes, citations, and other metadata to provide direct, concise answers.
	"""

	import re
	import logging
	from typing import Optional, Union, Dict, Any, List

	# Set up logging
	logger = logging.getLogger("gaia_agent.answer_formatter")

	def format_answer(answer: str) -> str:
	"""
	Format an answer according to GAIA benchmark requirements.

	Removes:
	- "Based on my search..." prefixes
	- Question repetition
	- Citation information
	- Other metadata

	Args:
	answer: The original answer text

	Returns:
	str: A clean, direct answer
	"""
	if not answer:
	return ""

	# Log original answer for debugging
	logger.debug(f"Original answer: {answer}")

	# Remove "Based on my search" prefixes
	answer = re.sub(r'^Based on my search[,:]?\s*', '', answer, flags=re.IGNORECASE)

	# Remove "Here's what I found about..." prefixes
	answer = re.sub(r'^Here\'s what I found about [\'"].?[\'"]:?\s', '', answer, flags=re.IGNORECASE)

	# Remove question repetition patterns
	answer = re.sub(r'^(Regarding\|About\|Concerning\|On) [\'"].?[\'"]:?\s', '', answer, flags=re.IGNORECASE)
	answer = re.sub(r'^You asked about [\'"].?[\'"]:?\s', '', answer, flags=re.IGNORECASE)

	# Remove citation information
	answer = re.sub(r'\n\nThis information comes from.*$', '', answer, flags=re.DOTALL)
	answer = re.sub(r'\n\nThis information is compiled from multiple sources.*$', '', answer, flags=re.DOTALL)
	answer = re.sub(r'\n\nSource:.*$', '', answer, flags=re.DOTALL)

	# Remove additional metadata sections
	answer = re.sub(r'\n\nAdditionally:.*$', '', answer, flags=re.DOTALL)

	# Clean up any remaining citation markers
	answer = re.sub(r'\[\d+\]', '', answer)

	# Trim whitespace
	answer = answer.strip()

	# Log the formatted answer
	logger.debug(f"Formatted answer: {answer}")

	return answer

	def format_numerical_answer(answer: str) -> str:
	"""
	Format a numerical answer to extract just the number.

	Args:
	answer: The original answer text

	Returns:
	str: Just the numerical value if one can be extracted, otherwise the formatted answer
	"""
	# First apply general formatting
	cleaned_answer = format_answer(answer)

	# Extract numerical values
	numerical_match = re.search(r'(\d+(?:,\d+)*(?:\.\d+)?)', cleaned_answer)
	if numerical_match:
	return numerical_match.group(1)

	return cleaned_answer

	def format_list_answer(answer: str) -> str:
	"""
	Format a list-type answer to maintain the list structure but remove unnecessary text.

	Args:
	answer: The original answer text

	Returns:
	str: A cleaned list answer
	"""
	# First apply general formatting
	cleaned_answer = format_answer(answer)

	# If the answer contains numbered or bulleted items, preserve the list structure
	if re.search(r'(\d+\.\s+\|\*\s+\|•\s+\|-\s+)', cleaned_answer):
	# Extract the list items but remove any preamble
	list_items = re.findall(r'(?:\d+\.\s+\|\\s+\|•\s+\|-\s+)(.+?)(?=\n\n\|\n(?:\d+\.\s+\|\\s+\|•\s+\|-\s+)\|$)', cleaned_answer, re.DOTALL)
	if list_items:
	return '\n'.join([f"- {item.strip()}" for item in list_items])

	return cleaned_answer

	def detect_answer_type(question: str) -> str:
	"""
	Detect the type of answer expected based on the question.

	Args:
	question: The question text

	Returns:
	str: The detected answer type ('numerical', 'list', or 'text')
	"""
	question_lower = question.lower()

	# Check for numerical questions
	if re.search(r'how many\|how much\|count\|number of\|total of\|population\|percentage\|age\|height\|weight\|distance\|length\|width\|depth\|area\|volume', question_lower):
	return 'numerical'

	# Check for list questions
	if re.search(r'list\|name\|enumerate\|what are the\|examples of', question_lower):
	return 'list'

	# Default to text
	return 'text'

	def format_answer_by_type(answer: str, question: Optional[str] = None) -> str:
	"""
	Format an answer according to the detected answer type from the question.

	Args:
	answer: The original answer text
	question: The original question (optional)

	Returns:
	str: A formatted answer appropriate for the question type
	"""
	if not question:
	return format_answer(answer)

	answer_type = detect_answer_type(question)

	if answer_type == 'numerical':
	return format_numerical_answer(answer)
	elif answer_type == 'list':
	return format_list_answer(answer)
	else:
	return format_answer(answer)