Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Chris commited on May 29, 2025

Commit

a248c93

1 Parent(s): d57fa7d

Final 5.0

Browse files

Files changed (13) hide show

src/__pycache__/app.cpython-310.pyc +0 -0
src/agents/__pycache__/reasoning_agent.cpython-310.pyc +0 -0
src/agents/__pycache__/router.cpython-310.pyc +0 -0
src/agents/__pycache__/synthesizer.cpython-310.pyc +0 -0
src/agents/__pycache__/web_researcher.cpython-310.pyc +0 -0
src/agents/reasoning_agent.py +101 -17
src/agents/router.py +51 -16
src/agents/synthesizer.py +77 -14
src/agents/web_researcher.py +91 -16
src/app.py +174 -18
src/test_production_fixes.py +231 -0
src/tools/__pycache__/web_search_tool.cpython-310.pyc +0 -0
src/tools/web_search_tool.py +86 -46

src/__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/app.cpython-310.pyc and b/src/__pycache__/app.cpython-310.pyc differ

src/agents/__pycache__/reasoning_agent.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/reasoning_agent.cpython-310.pyc and b/src/agents/__pycache__/reasoning_agent.cpython-310.pyc differ

src/agents/__pycache__/router.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/router.cpython-310.pyc and b/src/agents/__pycache__/router.cpython-310.pyc differ

src/agents/__pycache__/synthesizer.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/synthesizer.cpython-310.pyc and b/src/agents/__pycache__/synthesizer.cpython-310.pyc differ

src/agents/__pycache__/web_researcher.cpython-310.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/web_researcher.cpython-310.pyc and b/src/agents/__pycache__/web_researcher.cpython-310.pyc differ

src/agents/reasoning_agent.py CHANGED Viewed

@@ -36,21 +36,37 @@ class ReasoningAgent:
             strategy = self._determine_reasoning_strategy(state.question)
             state.add_processing_step(f"Reasoning Agent: Strategy = {strategy}")
-            # Execute reasoning based on strategy
-            if strategy == "mathematical":
-                result = self._process_mathematical(state)
-            elif strategy == "statistical":
-                result = self._process_statistical(state)
-            elif strategy == "unit_conversion":
-                result = self._process_unit_conversion(state)
-            elif strategy == "logical_deduction":
-                result = self._process_logical_deduction(state)
-            elif strategy == "pattern_analysis":
-                result = self._process_pattern_analysis(state)
-            elif strategy == "step_by_step":
-                result = self._process_step_by_step(state)
-            else:
-                result = self._process_general_reasoning(state)
             # Add result to state
             state.add_agent_result(result)
@@ -63,8 +79,18 @@ class ReasoningAgent:
             state.add_error(error_msg)
             logger.error(error_msg)
-            # Create failure result
-            failure_result = self._create_failure_result(error_msg)
             state.add_agent_result(failure_result)
             return state
@@ -630,4 +656,62 @@ class ReasoningAgent:
             model_used="error",
             processing_time=0.0,
             cost_estimate=0.0
         )

             strategy = self._determine_reasoning_strategy(state.question)
             state.add_processing_step(f"Reasoning Agent: Strategy = {strategy}")
+            # Execute reasoning with enhanced error handling
+            result = None
+            try:
+                # Execute reasoning based on strategy
+                if strategy == "mathematical":
+                    result = self._process_mathematical(state)
+                elif strategy == "statistical":
+                    result = self._process_statistical(state)
+                elif strategy == "unit_conversion":
+                    result = self._process_unit_conversion(state)
+                elif strategy == "logical_deduction":
+                    result = self._process_logical_deduction(state)
+                elif strategy == "pattern_analysis":
+                    result = self._process_pattern_analysis(state)
+                elif strategy == "step_by_step":
+                    result = self._process_step_by_step(state)
+                else:
+                    result = self._process_general_reasoning(state)
+            except Exception as strategy_error:
+                logger.warning(f"Strategy {strategy} failed: {strategy_error}, trying fallback")
+                # Try fallback reasoning
+                try:
+                    result = self._process_fallback_reasoning(state, strategy, str(strategy_error))
+                except Exception as fallback_error:
+                    logger.error(f"Fallback reasoning also failed: {fallback_error}")
+                    result = self._create_graceful_failure_result(state, f"Reasoning failed: {fallback_error}")
+            # Ensure we always have a valid result
+            if not result or not isinstance(result, AgentResult):
+                result = self._create_graceful_failure_result(state, "No reasoning results available")
             # Add result to state
             state.add_agent_result(result)
             state.add_error(error_msg)
             logger.error(error_msg)
+            # Create failure result but ensure system continues
+            failure_result = AgentResult(
+                agent_role=AgentRole.REASONING_AGENT,
+                success=False,
+                result=f"Processing encountered difficulties: Reasoning failed",
+                confidence=0.1,  # Very low but not zero to allow synthesis
+                reasoning=f"Exception during reasoning: {str(e)}",
+                tools_used=[],
+                model_used="error",
+                processing_time=0.0,
+                cost_estimate=0.0
+            )
             state.add_agent_result(failure_result)
             return state
             model_used="error",
             processing_time=0.0,
             cost_estimate=0.0
+        )
+    def _process_fallback_reasoning(self, state: GAIAAgentState, original_strategy: str, error_msg: str) -> AgentResult:
+        """Enhanced fallback reasoning when primary strategy fails"""
+        logger.info(f"Executing fallback reasoning after {original_strategy} failure")
+        # Try simple general reasoning as fallback
+        try:
+            fallback_prompt = f"""
+            Please answer this question using basic reasoning:
+            Question: {state.question}
+            Note: Original strategy '{original_strategy}' failed with: {error_msg}
+            Please provide the best answer you can using simple analysis and reasoning.
+            Focus on extracting key information from the question and providing a helpful response.
+            """
+            # Use main model for fallback
+            llm_result = self.llm_client.generate(fallback_prompt, tier=ModelTier.MAIN, max_tokens=400)
+            if llm_result.success:
+                return AgentResult(
+                    agent_role=AgentRole.REASONING_AGENT,
+                    success=True,
+                    result=llm_result.response,
+                    confidence=0.3,  # Lower confidence for fallback
+                    reasoning=f"Fallback reasoning after {original_strategy} failed: {error_msg}",
+                    tools_used=[],
+                    model_used=llm_result.model_used,
+                    processing_time=llm_result.response_time,
+                    cost_estimate=llm_result.cost_estimate
+                )
+            else:
+                raise Exception(f"Fallback LLM reasoning failed: {llm_result.error}")
+        except Exception as fallback_error:
+            logger.error(f"Fallback reasoning failed: {fallback_error}")
+            return self._create_graceful_failure_result(state, f"All reasoning methods failed: {fallback_error}")
+    def _create_graceful_failure_result(self, state: GAIAAgentState, error_context: str) -> AgentResult:
+        """Create a graceful failure result that allows the system to continue"""
+        # Try to extract any useful information from the question itself
+        question_analysis = f"Question analysis: {state.question[:200]}"
+        return AgentResult(
+            agent_role=AgentRole.REASONING_AGENT,
+            success=False,
+            result=f"Processing encountered difficulties: {error_context}",
+            confidence=0.1,
+            reasoning=f"Reasoning failed: {error_context}",
+            tools_used=[],
+            model_used="none",
+            processing_time=0.0,
+            cost_estimate=0.0
         )

src/agents/router.py CHANGED Viewed

@@ -102,40 +102,75 @@ class RouterAgent:
         # Content-based classification
         classification_patterns = {
             QuestionType.MATHEMATICAL: [
-                r'calculate', r'compute', r'solve', r'equation', r'formula',
-                r'sum', r'total', r'average', r'percentage', r'ratio',
-                r'how many', r'how much', r'\d+.*\d+', r'math'
             ],
             QuestionType.CODE_EXECUTION: [
-                r'code', r'program', r'script', r'function', r'algorithm',
-                r'execute', r'run.*code', r'python', r'javascript'
             ],
             QuestionType.TEXT_MANIPULATION: [
-                r'reverse', r'encode', r'decode', r'transform', r'convert',
-                r'uppercase', r'lowercase', r'replace', r'extract'
             ],
             QuestionType.REASONING: [
-                r'why', r'explain', r'analyze', r'reasoning', r'logic',
-                r'relationship', r'compare', r'contrast', r'conclusion'
             ],
             QuestionType.WEB_RESEARCH: [
-                r'search', r'find.*information', r'research', r'look up',
-                r'website', r'online', r'internet'
             ]
         }
-        # Score each category
         type_scores = {}
         for question_type, patterns in classification_patterns.items():
-            score = sum(1 for pattern in patterns if re.search(pattern, question_lower))
             if score > 0:
                 type_scores[question_type] = score
-        # Return highest scoring type, or UNKNOWN if no clear match
         if type_scores:
-            return max(type_scores.keys(), key=lambda t: type_scores[t])
-        return QuestionType.UNKNOWN
     def _assess_complexity(self, question: str) -> str:
         """Assess question complexity"""

         # Content-based classification
         classification_patterns = {
             QuestionType.MATHEMATICAL: [
+                r'\bcalculate\b', r'\bcompute\b', r'\bsolve\b', r'\bequation\b', r'\bformula\b',
+                r'\bsum\b', r'\btotal\b', r'\baverage\b', r'\bpercentage\b', r'\bratio\b',
+                r'\bhow many\b', r'\bhow much\b', r'\d+\s*[\+\-\*/]\s*\d+', r'\bmath\b',
+                r'\bsquare root\b', r'\bfactorial\b', r'\bdivided by\b', r'\bmultiply\b'
             ],
             QuestionType.CODE_EXECUTION: [
+                r'\bcode\b', r'\bprogram\b', r'\bscript\b', r'\bfunction\b', r'\balgorithm\b',
+                r'\bexecute\b', r'\brun.*code\b', r'\bpython\b', r'\bjavascript\b'
             ],
             QuestionType.TEXT_MANIPULATION: [
+                r'\breverse\b', r'\bencode\b', r'\bdecode\b', r'\btransform\b', r'\bconvert\b',
+                r'\buppercase\b', r'\blowercase\b', r'\breplace\b', r'\bextract\b'
             ],
             QuestionType.REASONING: [
+                r'\bwhy\b', r'\bexplain\b', r'\banalyze\b', r'\breasoning\b', r'\blogic\b',
+                r'\brelationship\b', r'\bcompare\b', r'\bcontrast\b', r'\bconclusion\b'
             ],
             QuestionType.WEB_RESEARCH: [
+                r'\bsearch\b', r'\bfind.*information\b', r'\bresearch\b', r'\blook up\b',
+                r'\bwebsite\b', r'\bonline\b', r'\binternet\b', r'\bwho\s+(?:is|was|are|were)\b',
+                r'\bwhat\s+(?:is|was|are|were)\b', r'\bwhen\s+(?:is|was|did|does)\b',
+                r'\bwhere\s+(?:is|was|are|were)\b'
             ]
         }
+        # Score each category with refined scoring
         type_scores = {}
         for question_type, patterns in classification_patterns.items():
+            score = 0
+            for pattern in patterns:
+                matches = re.findall(pattern, question_lower)
+                score += len(matches)
             if score > 0:
                 type_scores[question_type] = score
+        # Special handling for specific question patterns
+        # Check for fictional/non-existent content (should be WEB_RESEARCH)
+        if any(term in question_lower for term in ['fictional', 'imaginary', 'non-existent', 'nonexistent']):
+            type_scores[QuestionType.WEB_RESEARCH] = type_scores.get(QuestionType.WEB_RESEARCH, 0) + 2
+        # Check for research questions about people, places, things
+        if re.search(r'\bwho\s+(?:is|was|are|were|did|does)\b', question_lower):
+            type_scores[QuestionType.WEB_RESEARCH] = type_scores.get(QuestionType.WEB_RESEARCH, 0) + 2
+        # Check for historical or factual queries
+        if any(term in question_lower for term in ['history', 'historical', 'century', 'year', 'published', 'author']):
+            type_scores[QuestionType.WEB_RESEARCH] = type_scores.get(QuestionType.WEB_RESEARCH, 0) + 1
+        # Check for specific mathematical operations (boost mathematical score)
+        if re.search(r'\d+\s*[\+\-\*/]\s*\d+', question_lower):
+            type_scores[QuestionType.MATHEMATICAL] = type_scores.get(QuestionType.MATHEMATICAL, 0) + 3
+        # Return highest scoring type, or WEB_RESEARCH as default for informational questions
         if type_scores:
+            best_type = max(type_scores.keys(), key=lambda t: type_scores[t])
+            # If it's a tie or low score, check for general informational patterns
+            max_score = type_scores[best_type]
+            if max_score <= 1:
+                # Check if it's a general informational question
+                info_patterns = [r'\bwhat\b', r'\bwho\b', r'\bwhen\b', r'\bwhere\b', r'\bhow\b']
+                if any(re.search(pattern, question_lower) for pattern in info_patterns):
+                    return QuestionType.WEB_RESEARCH
+            return best_type
+        # Default to WEB_RESEARCH for unknown informational questions
+        return QuestionType.WEB_RESEARCH
     def _assess_complexity(self, question: str) -> str:
         """Assess question complexity"""

src/agents/synthesizer.py CHANGED Viewed

@@ -52,6 +52,8 @@ class SynthesizerAgent:
                 final_result = self._synthesize_confidence_weighted(state)
             elif synthesis_strategy == "llm_synthesis":
                 final_result = self._synthesize_with_llm(state)
             else:
                 final_result = self._synthesize_fallback(state)
@@ -96,9 +98,11 @@ class SynthesizerAgent:
         """Determine the best synthesis strategy based on available results"""
         successful_results = [r for r in state.agent_results.values() if r.success]
-        if len(successful_results) == 0:
-            return "fallback"
         elif len(successful_results) == 1:
             return "single_agent"
         elif len(successful_results) == 2:
@@ -245,32 +249,91 @@ Focus on accuracy and be direct in your response.
             return self._synthesize_confidence_weighted(state)
     def _synthesize_fallback(self, state: GAIAAgentState) -> Dict[str, Any]:
-        """Fallback synthesis when other strategies fail"""
         # Try to get any result, even if not successful
         all_results = list(state.agent_results.values())
         if all_results:
-            # Use the result with highest confidence, even if failed
-            best_attempt = max(all_results, key=lambda r: r.confidence if r.success else 0.0)
-            if best_attempt.success:
                 return {
                     "answer": best_attempt.result,
                     "confidence": max(0.3, best_attempt.confidence * 0.8),  # Reduce confidence for fallback
                     "reasoning": f"Fallback result from {best_attempt.agent_role.value}: {best_attempt.reasoning}",
                     "source": f"fallback_{best_attempt.agent_role.value}"
                 }
-            else:
-                return {
-                    "answer": f"Processing encountered difficulties: {best_attempt.result}",
-                    "confidence": 0.2,
-                    "reasoning": f"Fallback from failed attempt by {best_attempt.agent_role.value}",
-                    "source": "failed_fallback"
-                }
         else:
             return self._create_fallback_result("No agent results available")
     def _create_fallback_result(self, reason: str) -> Dict[str, Any]:
         """Create a fallback result when synthesis is impossible"""
         return {

                 final_result = self._synthesize_confidence_weighted(state)
             elif synthesis_strategy == "llm_synthesis":
                 final_result = self._synthesize_with_llm(state)
+            elif synthesis_strategy == "failure_analysis":
+                final_result = self._synthesize_failure_analysis(state)
             else:
                 final_result = self._synthesize_fallback(state)
         """Determine the best synthesis strategy based on available results"""
         successful_results = [r for r in state.agent_results.values() if r.success]
+        failed_results = [r for r in state.agent_results.values() if not r.success]
+        # If we have some results but they're mostly failures, try to extract useful info
+        if len(successful_results) == 0 and len(failed_results) > 0:
+            return "failure_analysis"
         elif len(successful_results) == 1:
             return "single_agent"
         elif len(successful_results) == 2:
             return self._synthesize_confidence_weighted(state)
     def _synthesize_fallback(self, state: GAIAAgentState) -> Dict[str, Any]:
+        """Enhanced fallback synthesis when other strategies fail"""
         # Try to get any result, even if not successful
         all_results = list(state.agent_results.values())
         if all_results:
+            # First try successful results
+            successful_results = [r for r in all_results if r.success]
+            if successful_results:
+                best_attempt = max(successful_results, key=lambda r: r.confidence)
                 return {
                     "answer": best_attempt.result,
                     "confidence": max(0.3, best_attempt.confidence * 0.8),  # Reduce confidence for fallback
                     "reasoning": f"Fallback result from {best_attempt.agent_role.value}: {best_attempt.reasoning}",
                     "source": f"fallback_{best_attempt.agent_role.value}"
                 }
+            # If no successful results, try to extract useful info from failures
+            return self._synthesize_failure_analysis(state)
         else:
             return self._create_fallback_result("No agent results available")
+    def _synthesize_failure_analysis(self, state: GAIAAgentState) -> Dict[str, Any]:
+        """Analyze failed results to provide some useful response"""
+        failed_results = [r for r in state.agent_results.values() if not r.success]
+        if not failed_results:
+            return self._create_fallback_result("No results to analyze")
+        # Look for patterns in failures
+        error_patterns = []
+        attempted_agents = []
+        for result in failed_results:
+            attempted_agents.append(result.agent_role.value)
+            # Extract meaningful error information
+            result_text = result.result.lower()
+            if "research sources failed" in result_text:
+                error_patterns.append("external_research_unavailable")
+            elif "reasoning failed" in result_text:
+                error_patterns.append("complex_reasoning_required")
+            elif "conversion" in result_text:
+                error_patterns.append("conversion_difficulty")
+            elif "mathematical" in result_text:
+                error_patterns.append("mathematical_complexity")
+        # Try to provide a helpful response based on the question type and failures
+        try:
+            analysis_prompt = f"""
+            Question: {state.question}
+            Multiple specialized agents attempted to answer this question but encountered difficulties:
+            - Agents tried: {', '.join(attempted_agents)}
+            - Common issues: {', '.join(set(error_patterns)) if error_patterns else 'processing difficulties'}
+            Based on the question itself, please provide the best answer you can using basic reasoning and knowledge.
+            Even if external resources failed, try to answer based on general knowledge.
+            Be honest about limitations but try to be helpful.
+            """
+            # Use main model for analysis
+            llm_result = self.llm_client.generate(analysis_prompt, tier=ModelTier.MAIN, max_tokens=300)
+            if llm_result.success:
+                return {
+                    "answer": llm_result.response,
+                    "confidence": 0.25,  # Low confidence but still attempting
+                    "reasoning": f"Generated from failure analysis. Agents tried: {', '.join(attempted_agents)}",
+                    "source": "failure_analysis"
+                }
+        except Exception as analysis_error:
+            logger.warning(f"Failure analysis also failed: {analysis_error}")
+        # Final fallback - provide structured error message
+        return {
+            "answer": f"Processing encountered difficulties: All research sources failed",
+            "confidence": 0.1,
+            "reasoning": f"Multiple agents failed: {', '.join(attempted_agents)}. {', '.join(set(error_patterns)) if error_patterns else 'Various processing issues encountered'}",
+            "source": "structured_failure"
+        }
     def _create_fallback_result(self, reason: str) -> Dict[str, Any]:
         """Create a fallback result when synthesis is impossible"""
         return {

src/agents/web_researcher.py CHANGED Viewed

@@ -39,19 +39,35 @@ class WebResearchAgent:
             strategy = self._determine_research_strategy(state.question, state.file_name)
             state.add_processing_step(f"Web Researcher: Strategy = {strategy}")
-            # Execute research based on strategy
-            if strategy == "wikipedia_direct":
-                result = self._research_wikipedia_direct(state)
-            elif strategy == "wikipedia_search":
-                result = self._research_wikipedia_search(state)
-            elif strategy == "youtube_analysis":
-                result = self._research_youtube(state)
-            elif strategy == "web_search":
-                result = self._research_web_general(state)
-            elif strategy == "url_extraction":
-                result = self._research_url_content(state)
-            else:
-                result = self._research_multi_source(state)
             # Add result to state
             state.add_agent_result(result)
@@ -64,13 +80,14 @@ class WebResearchAgent:
             state.add_error(error_msg)
             logger.error(error_msg)
-            # Create failure result
             failure_result = AgentResult(
                 agent_role=AgentRole.WEB_RESEARCHER,
                 success=False,
-                result=f"Research failed: {str(e)}",
-                confidence=0.0,
                 reasoning=f"Exception during web research: {str(e)}",
                 model_used="error",
                 processing_time=0.0,
                 cost_estimate=0.0
@@ -309,6 +326,64 @@ class WebResearchAgent:
         else:
             return self._create_failure_result(f"Fallback failed: {reason}")
     def _extract_wikipedia_topic(self, question: str) -> str:
         """Extract Wikipedia topic from question"""

             strategy = self._determine_research_strategy(state.question, state.file_name)
             state.add_processing_step(f"Web Researcher: Strategy = {strategy}")
+            # Execute research with enhanced error handling
+            result = None
+            try:
+                # Execute research based on strategy
+                if strategy == "wikipedia_direct":
+                    result = self._research_wikipedia_direct(state)
+                elif strategy == "wikipedia_search":
+                    result = self._research_wikipedia_search(state)
+                elif strategy == "youtube_analysis":
+                    result = self._research_youtube(state)
+                elif strategy == "web_search":
+                    result = self._research_web_general(state)
+                elif strategy == "url_extraction":
+                    result = self._research_url_content(state)
+                else:
+                    result = self._research_multi_source(state)
+            except Exception as strategy_error:
+                logger.warning(f"Strategy {strategy} failed: {strategy_error}, trying fallback")
+                # Try fallback strategy
+                try:
+                    result = self._research_fallback_strategy(state, str(strategy_error))
+                except Exception as fallback_error:
+                    logger.error(f"Fallback strategy also failed: {fallback_error}")
+                    result = self._create_basic_response(state, f"Research failed: {fallback_error}")
+            # Ensure we always have a valid result
+            if not result or not isinstance(result, AgentResult):
+                result = self._create_basic_response(state, "No research results available")
             # Add result to state
             state.add_agent_result(result)
             state.add_error(error_msg)
             logger.error(error_msg)
+            # Create failure result but ensure system continues
             failure_result = AgentResult(
                 agent_role=AgentRole.WEB_RESEARCHER,
                 success=False,
+                result=f"Research encountered difficulties: {str(e)}",
+                confidence=0.1,  # Very low but not zero to allow synthesis
                 reasoning=f"Exception during web research: {str(e)}",
+                tools_used=[],
                 model_used="error",
                 processing_time=0.0,
                 cost_estimate=0.0
         else:
             return self._create_failure_result(f"Fallback failed: {reason}")
+    def _research_fallback_strategy(self, state: GAIAAgentState, original_error: str) -> AgentResult:
+        """Enhanced fallback strategy when primary research fails"""
+        logger.info("Executing fallback research strategy")
+        # Try simple web search as universal fallback
+        try:
+            search_terms = self._extract_search_terms(state.question)
+            web_result = self.web_search_tool.execute(search_terms)
+            if web_result.success and web_result.result.get('found'):
+                # Analyze results with basic processing
+                search_results = web_result.result.get('results', [])
+                if search_results:
+                    first_result = search_results[0]
+                    fallback_answer = f"Based on web search: {first_result.get('snippet', 'Limited information available')}"
+                    return AgentResult(
+                        agent_role=AgentRole.WEB_RESEARCHER,
+                        success=True,
+                        result=fallback_answer,
+                        confidence=0.4,  # Lower confidence for fallback
+                        reasoning=f"Fallback web search after: {original_error}",
+                        tools_used=[ToolResult(
+                            tool_name="web_search_fallback",
+                            success=True,
+                            result={"summary": "Fallback search completed"},
+                            execution_time=web_result.execution_time
+                        )],
+                        model_used="fallback",
+                        processing_time=web_result.execution_time,
+                        cost_estimate=0.0
+                    )
+        except Exception as fallback_error:
+            logger.warning(f"Web search fallback failed: {fallback_error}")
+        # If all else fails, try basic text processing
+        return self._create_basic_response(state, f"Fallback failed: {original_error}")
+    def _create_basic_response(self, state: GAIAAgentState, error_context: str) -> AgentResult:
+        """Create a basic response when all research methods fail"""
+        # Try to extract any useful information from the question itself
+        basic_analysis = f"Unable to conduct external research. Question analysis: {state.question[:100]}"
+        return AgentResult(
+            agent_role=AgentRole.WEB_RESEARCHER,
+            success=False,
+            result=f"Processing encountered difficulties: {error_context}",
+            confidence=0.1,
+            reasoning=f"All research sources failed: {error_context}",
+            tools_used=[],
+            model_used="none",
+            processing_time=0.0,
+            cost_estimate=0.0
+        )
     def _extract_wikipedia_topic(self, question: str) -> str:
         """Extract Wikipedia topic from question"""

src/app.py CHANGED Viewed

@@ -345,26 +345,182 @@ def create_interface():
     # Custom CSS for better styling
     css = """
-    .container {max-width: 1200px; margin: auto; padding: 20px;}
-    .output-markdown {font-size: 16px; line-height: 1.6; color: #333;}
-    .details-box {background-color: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0; color: #333;}
-    .reasoning-box {background-color: #fff; padding: 20px; border: 1px solid #dee2e6; border-radius: 8px; color: #333;}
-    .unit4-section {background-color: #e3f2fd; padding: 20px; border-radius: 8px; margin: 20px 0; color: #1565c0;}
-    /* Fix white text on white background issues */
-    .gradio-container {color: #333 !important;}
-    .gradio-container .gr-button-primary {background: #007bff; color: white;}
-    .gradio-container .gr-button-secondary {background: #6c757d; color: white;}
-    .gradio-container .gr-textbox textarea {color: #333; background: white;}
-    .gradio-container .gr-markdown {color: #333;}
-    /* Ensure login section has proper contrast */
-    .oauth-login {background: #f8f9fa; padding: 10px; border-radius: 5px; margin: 10px 0;}
     /* Fix any remaining text contrast issues */
-    .gradio-container * {color: inherit;}
-    .gradio-container h1, .gradio-container h2, .gradio-container h3 {color: #333 !important;}
-    .gradio-container p {color: #555 !important;}
     """
     with gr.Blocks(css=css, title="GAIA Agent System", theme=gr.themes.Soft()) as interface:

     # Custom CSS for better styling
     css = """
+    /* Base styling for proper contrast */
+    .gradio-container {
+        color: #333 !important;
+        background-color: #ffffff !important;
+    }
+    /* Fix all text elements */
+    .gradio-container *,
+    .gradio-container *::before,
+    .gradio-container *::after {
+        color: #333 !important;
+    }
+    /* Headers */
+    .gradio-container h1,
+    .gradio-container h2,
+    .gradio-container h3,
+    .gradio-container h4,
+    .gradio-container h5,
+    .gradio-container h6 {
+        color: #1a1a1a !important;
+        font-weight: 600 !important;
+    }
+    /* Paragraphs and text content */
+    .gradio-container p,
+    .gradio-container div,
+    .gradio-container span,
+    .gradio-container label {
+        color: #333 !important;
+    }
+    /* Input fields */
+    .gradio-container input,
+    .gradio-container textarea {
+        color: #333 !important;
+        background-color: #ffffff !important;
+        border: 1px solid #ccc !important;
+    }
+    /* Buttons */
+    .gradio-container .gr-button-primary {
+        background: #007bff !important;
+        color: white !important;
+        border: none !important;
+    }
+    .gradio-container .gr-button-secondary {
+        background: #6c757d !important;
+        color: white !important;
+        border: none !important;
+    }
+    .gradio-container button {
+        color: white !important;
+    }
+    /* Markdown content */
+    .gradio-container .gr-markdown,
+    .gradio-container .markdown,
+    .gradio-container .prose {
+        color: #333 !important;
+        background-color: transparent !important;
+    }
+    /* Special content boxes */
+    .container {
+        max-width: 1200px;
+        margin: auto;
+        padding: 20px;
+        background-color: #ffffff !important;
+        color: #333 !important;
+    }
+    .output-markdown {
+        font-size: 16px;
+        line-height: 1.6;
+        color: #333 !important;
+        background-color: #ffffff !important;
+    }
+    .details-box {
+        background-color: #f8f9fa !important;
+        padding: 15px;
+        border-radius: 8px;
+        margin: 10px 0;
+        color: #333 !important;
+        border: 1px solid #dee2e6 !important;
+    }
+    .reasoning-box {
+        background-color: #fff !important;
+        padding: 20px;
+        border: 1px solid #dee2e6 !important;
+        border-radius: 8px;
+        color: #333 !important;
+    }
+    .unit4-section {
+        background-color: #e3f2fd !important;
+        padding: 20px;
+        border-radius: 8px;
+        margin: 20px 0;
+        color: #1565c0 !important;
+        border: 1px solid #90caf9 !important;
+    }
+    .unit4-section h1,
+    .unit4-section h2,
+    .unit4-section h3,
+    .unit4-section p,
+    .unit4-section div {
+        color: #1565c0 !important;
+    }
+    /* Login section */
+    .oauth-login {
+        background: #f8f9fa !important;
+        padding: 10px;
+        border-radius: 5px;
+        margin: 10px 0;
+        color: #333 !important;
+        border: 1px solid #dee2e6 !important;
+    }
+    /* Tables */
+    .gradio-container table,
+    .gradio-container th,
+    .gradio-container td {
+        color: #333 !important;
+        background-color: #ffffff !important;
+        border: 1px solid #dee2e6 !important;
+    }
+    .gradio-container th {
+        background-color: #f8f9fa !important;
+        font-weight: 600 !important;
+    }
+    /* Override any white text */
+    .gradio-container [style*="color: white"],
+    .gradio-container [style*="color: #fff"],
+    .gradio-container [style*="color: #ffffff"] {
+        color: #333 !important;
+    }
+    /* Ensure buttons keep white text */
+    .gradio-container button,
+    .gradio-container .gr-button-primary,
+    .gradio-container .gr-button-secondary {
+        color: white !important;
+    }
+    /* Examples and other interactive elements */
+    .gradio-container .gr-examples,
+    .gradio-container .gr-file,
+    .gradio-container .gr-textbox,
+    .gradio-container .gr-checkbox {
+        color: #333 !important;
+        background-color: #ffffff !important;
+    }
     /* Fix any remaining text contrast issues */
+    .gradio-container .gr-form,
+    .gradio-container .gr-panel,
+    .gradio-container .gr-block {
+        color: #333 !important;
+        background-color: transparent !important;
+    }
+    /* Ensure dark text on light backgrounds for all content */
+    .gradio-container .light,
+    .gradio-container [data-theme="light"] {
+        color: #333 !important;
+        background-color: #ffffff !important;
+    }
     """
     with gr.Blocks(css=css, title="GAIA Agent System", theme=gr.themes.Soft()) as interface:

src/test_production_fixes.py ADDED Viewed

	@@ -0,0 +1,231 @@

+#!/usr/bin/env python3
+"""
+Test Production Fixes for GAIA Agent System
+Quick validation that error handling improvements are working
+"""
+import logging
+import time
+from typing import List, Dict, Any
+from models.qwen_client import QwenClient
+from workflow.gaia_workflow import SimpleGAIAWorkflow
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class ProductionFixTester:
+    """Test the production fixes for error handling and robustness"""
+    def __init__(self):
+        try:
+            self.llm_client = QwenClient()
+            self.workflow = SimpleGAIAWorkflow(self.llm_client)
+            logger.info("✅ Test environment initialized")
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize test environment: {e}")
+            raise
+    def test_error_handling_scenarios(self) -> Dict[str, Any]:
+        """Test various error scenarios that were causing production failures"""
+        test_scenarios = [
+            {
+                "name": "Wikipedia Research Failure Simulation",
+                "question": "What is the most obscure fictional character from the imaginary book 'Zzzzz12345NonExistent'?",
+                "expected_behavior": "Should fail gracefully and provide fallback response"
+            },
+            {
+                "name": "Mathematical Reasoning with Complex Data",
+                "question": "Calculate the square root of negative infinity divided by zero plus the factorial of pi",
+                "expected_behavior": "Should handle impossible math gracefully"
+            },
+            {
+                "name": "Conversion with Invalid Units",
+                "question": "Convert 50 zorkples to flibbers using the international zorkple standard",
+                "expected_behavior": "Should recognize invalid units and respond appropriately"
+            },
+            {
+                "name": "Web Research with Rate Limiting Simulation",
+                "question": "What are the current stock prices for all Fortune 500 companies as of this exact moment?",
+                "expected_behavior": "Should handle external API limitations gracefully"
+            },
+            {
+                "name": "Complex Multi-Agent Question",
+                "question": "Analyze the correlation between quantum entanglement and the price of tea in 17th century Mongolia while also calculating the fibonacci sequence backwards from infinity",
+                "expected_behavior": "Should route to multiple agents and synthesize results"
+            }
+        ]
+        results = {
+            "test_summary": {
+                "total_tests": len(test_scenarios),
+                "passed": 0,
+                "failed": 0,
+                "errors": []
+            },
+            "detailed_results": []
+        }
+        for i, scenario in enumerate(test_scenarios, 1):
+            logger.info(f"\n🧪 Test {i}/{len(test_scenarios)}: {scenario['name']}")
+            logger.info(f"Question: {scenario['question']}")
+            start_time = time.time()
+            try:
+                # Process the question
+                result_state = self.workflow.process_question(
+                    question=scenario['question'],
+                    task_id=f"fix_test_{i}"
+                )
+                processing_time = time.time() - start_time
+                # Analyze the result
+                test_result = self._analyze_test_result(scenario, result_state, processing_time)
+                results["detailed_results"].append(test_result)
+                if test_result["passed"]:
+                    results["test_summary"]["passed"] += 1
+                    logger.info(f"✅ PASSED: {test_result['reason']}")
+                else:
+                    results["test_summary"]["failed"] += 1
+                    logger.warning(f"❌ FAILED: {test_result['reason']}")
+                # Log key metrics
+                logger.info(f"   📊 Confidence: {result_state.final_confidence:.2f}")
+                logger.info(f"   ⏱️  Time: {processing_time:.2f}s")
+                logger.info(f"   💰 Cost: ${result_state.total_cost:.4f}")
+                logger.info(f"   🎯 Answer: {result_state.final_answer[:100]}...")
+            except Exception as e:
+                error_msg = f"Exception in test {i}: {str(e)}"
+                logger.error(f"❌ ERROR: {error_msg}")
+                results["test_summary"]["errors"].append(error_msg)
+                results["test_summary"]["failed"] += 1
+                results["detailed_results"].append({
+                    "test_name": scenario['name'],
+                    "passed": False,
+                    "reason": f"Test exception: {str(e)}",
+                    "processing_time": time.time() - start_time,
+                    "confidence": 0.0,
+                    "answer": "Test failed with exception"
+                })
+        return results
+    def _analyze_test_result(self, scenario: Dict[str, Any], result_state, processing_time: float) -> Dict[str, Any]:
+        """Analyze if a test result meets expectations for error handling"""
+        test_result = {
+            "test_name": scenario['name'],
+            "passed": False,
+            "reason": "",
+            "processing_time": processing_time,
+            "confidence": result_state.final_confidence,
+            "answer": result_state.final_answer,
+            "agents_used": [role.value for role in result_state.agent_results.keys()],
+            "error_count": len(result_state.error_messages)
+        }
+        # Check for catastrophic failures
+        if result_state.final_answer is None or result_state.final_answer == "":
+            test_result["reason"] = "Critical failure: No answer generated"
+            return test_result
+        # Check for system crash indicators
+        crash_indicators = [
+            "system not initialized",
+            "workflow execution failed",
+            "unable to process question - no agent results available"
+        ]
+        answer_lower = result_state.final_answer.lower()
+        if any(indicator in answer_lower for indicator in crash_indicators):
+            test_result["reason"] = "System crash detected in response"
+            return test_result
+        # Check for graceful error handling
+        graceful_indicators = [
+            "processing encountered difficulties",
+            "research sources failed",
+            "reasoning failed",
+            "conversion failed",
+            "mathematical complexity",
+            "limited information available"
+        ]
+        has_graceful_handling = any(indicator in answer_lower for indicator in graceful_indicators)
+        # Evaluate based on scenario expectations
+        if has_graceful_handling and result_state.final_confidence >= 0.1:
+            test_result["passed"] = True
+            test_result["reason"] = "Graceful error handling with reasonable confidence"
+        elif not has_graceful_handling and result_state.final_confidence >= 0.3:
+            test_result["passed"] = True
+            test_result["reason"] = "Provided meaningful answer with acceptable confidence"
+        elif result_state.final_confidence > 0.0 and len(result_state.agent_results) > 0:
+            test_result["passed"] = True
+            test_result["reason"] = "System remained stable and attempted processing"
+        else:
+            test_result["reason"] = f"Insufficient error handling or system instability (confidence: {result_state.final_confidence:.2f})"
+        return test_result
+    def run_comprehensive_test(self) -> None:
+        """Run comprehensive test and report results"""
+        logger.info("🚀 Starting Production Fix Validation Tests")
+        logger.info("=" * 60)
+        start_time = time.time()
+        try:
+            results = self.test_error_handling_scenarios()
+            total_time = time.time() - start_time
+            # Print summary
+            summary = results["test_summary"]
+            logger.info("\n" + "=" * 60)
+            logger.info("📋 TEST SUMMARY")
+            logger.info("=" * 60)
+            logger.info(f"Total Tests: {summary['total_tests']}")
+            logger.info(f"✅ Passed: {summary['passed']}")
+            logger.info(f"❌ Failed: {summary['failed']}")
+            logger.info(f"⚠️  Errors: {len(summary['errors'])}")
+            logger.info(f"📊 Success Rate: {summary['passed']/summary['total_tests']*100:.1f}%")
+            logger.info(f"⏱️  Total Time: {total_time:.2f}s")
+            # Success threshold
+            success_rate = summary['passed'] / summary['total_tests']
+            if success_rate >= 0.8:  # 80% success rate for error handling
+                logger.info("🎉 PRODUCTION FIXES VALIDATION: PASSED")
+                logger.info("System demonstrates robust error handling and graceful degradation")
+            else:
+                logger.warning("⚠️  PRODUCTION FIXES VALIDATION: NEEDS IMPROVEMENT")
+                logger.warning(f"Success rate {success_rate*100:.1f}% below 80% threshold")
+            # Print any errors
+            if summary['errors']:
+                logger.error("\n🔥 ERRORS ENCOUNTERED:")
+                for error in summary['errors']:
+                    logger.error(f"   - {error}")
+        except Exception as e:
+            logger.error(f"❌ Comprehensive test failed: {str(e)}")
+            raise
+def main():
+    """Main test execution"""
+    try:
+        tester = ProductionFixTester()
+        tester.run_comprehensive_test()
+    except Exception as e:
+        logger.error(f"Test execution failed: {e}")
+        exit(1)
+if __name__ == "__main__":
+    main()

src/tools/__pycache__/web_search_tool.cpython-310.pyc CHANGED Viewed

Binary files a/src/tools/__pycache__/web_search_tool.cpython-310.pyc and b/src/tools/__pycache__/web_search_tool.cpython-310.pyc differ

src/tools/web_search_tool.py CHANGED Viewed

@@ -88,57 +88,97 @@ class WebSearchTool(BaseTool):
     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
-        Search the web using DuckDuckGo
         """
-        try:
-            logger.info(f"Searching web for: {query}")
-            # Perform DuckDuckGo search
-            with DDGS() as ddgs:
-                search_results = list(ddgs.text(
-                    keywords=query,
-                    max_results=limit,
-                    region='us-en',
-                    safesearch='moderate'
-                ))
-            if not search_results:
                 return {
                     "query": query,
-                    "found": False,
-                    "message": "No web search results found",
-                    "results": []
                 }
-            results = []
-            for result in search_results:
-                web_result = WebSearchResult(
-                    title=result.get('title', 'No title'),
-                    url=result.get('href', ''),
-                    snippet=result.get('body', 'No description')
-                )
-                # Optionally extract full content from each URL
-                if extract_content and web_result.url:
-                    try:
-                        content_result = self._extract_content_from_url(web_result.url)
-                        if content_result.get('found'):
-                            web_result.content = content_result['content'][:1000]  # Limit content size
-                    except Exception as e:
-                        logger.warning(f"Failed to extract content from {web_result.url}: {e}")
-                results.append(web_result.to_dict())
-            return {
-                "query": query,
-                "found": True,
-                "results": results,
-                "total_results": len(results),
-                "message": f"Found {len(results)} web search results"
-            }
-        except Exception as e:
-            raise Exception(f"Web search failed: {str(e)}")
     def _extract_content_from_url(self, url: str) -> Dict[str, Any]:
         """

     def _search_web(self, query: str, limit: int = 5, extract_content: bool = False) -> Dict[str, Any]:
         """
+        Search the web using DuckDuckGo with retry mechanisms
         """
+        max_retries = 3
+        retry_delay = 1.0
+        for attempt in range(max_retries):
+            try:
+                logger.info(f"Searching web for: {query} (attempt {attempt + 1}/{max_retries})")
+                # Perform DuckDuckGo search with timeout
+                with DDGS() as ddgs:
+                    search_results = list(ddgs.text(
+                        keywords=query,
+                        max_results=limit,
+                        region='us-en',
+                        safesearch='moderate'
+                    ))
+                if not search_results:
+                    if attempt < max_retries - 1:
+                        logger.warning(f"No results on attempt {attempt + 1}, retrying...")
+                        time.sleep(retry_delay)
+                        retry_delay *= 2  # Exponential backoff
+                        continue
+                    else:
+                        return {
+                            "query": query,
+                            "found": False,
+                            "message": "No web search results found after retries",
+                            "results": []
+                        }
+                results = []
+                for result in search_results:
+                    try:
+                        web_result = WebSearchResult(
+                            title=result.get('title', 'No title'),
+                            url=result.get('href', ''),
+                            snippet=result.get('body', 'No description')
+                        )
+                        # Optionally extract full content from each URL
+                        if extract_content and web_result.url:
+                            try:
+                                content_result = self._extract_content_from_url(web_result.url)
+                                if content_result.get('found'):
+                                    web_result.content = content_result['content'][:1000]  # Limit content size
+                            except Exception as e:
+                                logger.warning(f"Failed to extract content from {web_result.url}: {e}")
+                                # Continue without content extraction rather than failing
+                        results.append(web_result.to_dict())
+                    except Exception as result_error:
+                        logger.warning(f"Error processing search result: {result_error}")
+                        # Continue with other results rather than failing entire search
+                        continue
+                # Return successful results even if some individual results failed
                 return {
                     "query": query,
+                    "found": len(results) > 0,
+                    "results": results,
+                    "total_results": len(results),
+                    "message": f"Found {len(results)} web search results"
                 }
+            except Exception as e:
+                logger.warning(f"Web search attempt {attempt + 1} failed: {str(e)}")
+                if attempt < max_retries - 1:
+                    time.sleep(retry_delay)
+                    retry_delay *= 2  # Exponential backoff
+                    continue
+                else:
+                    # Final attempt failed, but don't raise exception
+                    logger.error(f"Web search failed after {max_retries} attempts: {str(e)}")
+                    return {
+                        "query": query,
+                        "found": False,
+                        "message": f"Web search failed after retries: {str(e)}",
+                        "results": [],
+                        "error_type": "search_failure"
+                    }
+        # Should not reach here, but just in case
+        return {
+            "query": query,
+            "found": False,
+            "message": "Unexpected search failure",
+            "results": []
+        }
     def _extract_content_from_url(self, url: str) -> Dict[str, Any]:
         """