Final_Assignment_Template

Configuration error

App Files Files Community

oremaz commited on Jun 16

Commit

4c20a23

1 Parent(s): 6e0b138

Update agent2.py

Browse files

Files changed (1) hide show

agent2.py +260 -51

agent2.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import requests
 from typing import Dict, Any, List
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -16,6 +17,16 @@ from io import BytesIO
 from time import sleep
 from smolagents import PythonInterpreterTool, SpeechToTextTool
 class BM25RetrieverTool(Tool):
     """
     BM25 retriever tool for document search when text documents are available
@@ -171,17 +182,20 @@ def save_screenshot_callback(memory_step: ActionStep, agent: CodeAgent) -> None:
 class GAIAAgent:
     """
-    Simplified GAIA agent using smolagents with Gemini 2.0 Flash
     """
-    def __init__(self):
-        """Initialize the agent with Gemini 2.0 Flash and tools"""
-        # Get Gemini API key
         gemini_api_key = os.environ.get("GOOGLE_API_KEY")
         if not gemini_api_key:
             raise ValueError("GOOGLE_API_KEY environment variable not found")
         # Initialize Gemini 2.0 Flash model
         self.model = OpenAIServerModel(
             model_id="gemini-2.0-flash",
@@ -189,6 +203,10 @@ class GAIAAgent:
             api_key=gemini_api_key,
         )
         # GAIA system prompt from the leaderboard
         self.system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts and reasoning process clearly. You should use the available tools to gather information and solve problems step by step.
@@ -214,6 +232,42 @@ Your final answer should be as few words as possible, a number, or a comma-separ
         self.agent = None
         self._create_agent()
     def _create_agent(self):
         """Create the CodeAgent with tools"""
         base_tools = [
@@ -228,8 +282,8 @@ Your final answer should be as few words as possible, a number, or a comma-separ
         self.agent = CodeAgent(
             tools=base_tools + [PythonInterpreterTool(), SpeechToTextTool()],
             model=self.model,
-            add_base_tools=False,  # Adds web search, python execution, etc.
-            planning_interval=2,  # Plan every 2 steps
             additional_authorized_imports=["helium", "requests", "BeautifulSoup", "json"],
             step_callbacks=[save_screenshot_callback] if self.driver else [],
             max_steps=10,
@@ -308,70 +362,225 @@ Your final answer should be as few words as possible, a number, or a comma-separ
             print(f"Failed to download file for task {task_id}: {e}")
             return None
-    def solve_gaia_question(self, question_data: Dict[str, Any]) -> str:
         """
-        Solve a GAIA question
         """
         question = question_data.get("Question", "")
         task_id = question_data.get("task_id", "")
-        # Download and load file if task_id provided
         if task_id:
-            file_path = self.download_gaia_file(task_id)
-            if file_path:
-                self.load_documents_from_file(file_path)
-                print(f"Loaded file for task {task_id}")
-        # Check if this requires web browsing
-        web_indicators = ["navigate", "browser", "website", "webpage", "url", "click", "search on"]
-        needs_browser = any(indicator in question.lower() for indicator in web_indicators)
-        if needs_browser and not self.driver:
-            print("Initializing browser for web automation...")
-            self.initialize_browser()
-        # Prepare the prompt
-        prompt = f"""
 Question: {question}
 {f'Task ID: {task_id}' if task_id else ''}
-{f'File loaded: Yes' if task_id else 'File loaded: No'}
 Solve this step by step. Use the available tools to gather information and provide a precise answer.
         """
-        if needs_browser:
-            prompt += "\n" + helium_instructions
-        try:
-            print("=== AGENT REASONING ===")
-            result = self.agent.run(prompt)
-            print("=== END REASONING ===")
-            return str(result)
         except Exception as e:
-            error_msg = f"Error processing question: {str(e)}"
-            print(error_msg)
-            return error_msg
-        finally:
-            # Clean up browser if initialized
-            if self.driver:
-                try:
-                    helium.kill_browser()
-                except:
-                    pass
-# Example usage
 if __name__ == "__main__":
-    # Test the agent
-    agent = GAIAAgent()
     # Example question
     question_data = {
-        "Question": "How many studio albums Mercedes Sosa has published between 2000-2009 ?",
         "task_id": ""
     }
-    answer = agent.solve_gaia_question(question_data)
-    print(f"Answer: {answer}")

 import os
 import requests
+import base64
 from typing import Dict, Any, List
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from time import sleep
 from smolagents import PythonInterpreterTool, SpeechToTextTool
+# Langfuse observability imports
+from opentelemetry.sdk.trace import TracerProvider
+from openinference.instrumentation.smolagents import SmolagentsInstrumentor
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry import trace
+from opentelemetry.trace import format_trace_id
+from langfuse import Langfuse
 class BM25RetrieverTool(Tool):
     """
     BM25 retriever tool for document search when text documents are available
 class GAIAAgent:
     """
+    GAIA agent using smolagents with Gemini 2.0 Flash and Langfuse observability
     """
+    def __init__(self, user_id: str = None, session_id: str = None):
+        """Initialize the agent with Gemini 2.0 Flash, tools, and Langfuse observability"""
+        # Get API keys
         gemini_api_key = os.environ.get("GOOGLE_API_KEY")
         if not gemini_api_key:
             raise ValueError("GOOGLE_API_KEY environment variable not found")
+        # Initialize Langfuse observability
+        self._setup_langfuse_observability()
         # Initialize Gemini 2.0 Flash model
         self.model = OpenAIServerModel(
             model_id="gemini-2.0-flash",
             api_key=gemini_api_key,
         )
+        # Store user and session IDs for tracking
+        self.user_id = user_id or "gaia-user"
+        self.session_id = session_id or "gaia-session"
         # GAIA system prompt from the leaderboard
         self.system_prompt = """You are a general AI assistant. I will ask you a question. Report your thoughts and reasoning process clearly. You should use the available tools to gather information and solve problems step by step.
         self.agent = None
         self._create_agent()
+        # Initialize Langfuse client
+        self.langfuse = Langfuse()
+    def _setup_langfuse_observability(self):
+        """Set up Langfuse observability with OpenTelemetry"""
+        # Get Langfuse keys from environment variables
+        langfuse_public_key = os.environ.get("LANGFUSE_PUBLIC_KEY")
+        langfuse_secret_key = os.environ.get("LANGFUSE_SECRET_KEY")
+        if not langfuse_public_key or not langfuse_secret_key:
+            print("Warning: LANGFUSE_PUBLIC_KEY or LANGFUSE_SECRET_KEY not found. Observability will be limited.")
+            return
+        # Set up Langfuse environment variables
+        os.environ["LANGFUSE_HOST"] = os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
+        langfuse_auth = base64.b64encode(
+            f"{langfuse_public_key}:{langfuse_secret_key}".encode()
+        ).decode()
+        os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = os.environ.get("LANGFUSE_HOST") + "/api/public/otel"
+        os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {langfuse_auth}"
+        # Create a TracerProvider for OpenTelemetry
+        trace_provider = TracerProvider()
+        # Add a SimpleSpanProcessor with the OTLPSpanExporter to send traces
+        trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
+        # Set the global default tracer provider
+        trace.set_tracer_provider(trace_provider)
+        self.tracer = trace.get_tracer(__name__)
+        # Instrument smolagents with the configured provider
+        SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)
     def _create_agent(self):
         """Create the CodeAgent with tools"""
         base_tools = [
         self.agent = CodeAgent(
             tools=base_tools + [PythonInterpreterTool(), SpeechToTextTool()],
             model=self.model,
+            add_base_tools=False,
+            planning_interval=2,
             additional_authorized_imports=["helium", "requests", "BeautifulSoup", "json"],
             step_callbacks=[save_screenshot_callback] if self.driver else [],
             max_steps=10,
             print(f"Failed to download file for task {task_id}: {e}")
             return None
+    def solve_gaia_question(self, question_data: Dict[str, Any], tags: List[str] = None) -> str:
         """
+        Solve a GAIA question with full Langfuse observability
         """
         question = question_data.get("Question", "")
         task_id = question_data.get("task_id", "")
+        # Prepare tags for observability
+        trace_tags = ["gaia-agent", "question-solving"]
+        if tags:
+            trace_tags.extend(tags)
         if task_id:
+            trace_tags.append(f"task-{task_id}")
+        # Start Langfuse trace with OpenTelemetry
+        with self.tracer.start_as_current_span("GAIA-Question-Solving") as span:
+            try:
+                # Set span attributes for tracking
+                span.set_attribute("langfuse.user.id", self.user_id)
+                span.set_attribute("langfuse.session.id", self.session_id)
+                span.set_attribute("langfuse.tags", trace_tags)
+                span.set_attribute("gaia.task_id", task_id)
+                span.set_attribute("gaia.question_length", len(question))
+                # Get trace ID for Langfuse linking
+                current_span = trace.get_current_span()
+                span_context = current_span.get_span_context()
+                trace_id = span_context.trace_id
+                formatted_trace_id = format_trace_id(trace_id)
+                # Create Langfuse trace
+                langfuse_trace = self.langfuse.trace(
+                    id=formatted_trace_id,
+                    name="GAIA Question Solving",
+                    input={"question": question, "task_id": task_id},
+                    user_id=self.user_id,
+                    session_id=self.session_id,
+                    tags=trace_tags,
+                    metadata={
+                        "model": self.model.model_id,
+                        "question_length": len(question),
+                        "has_file": bool(task_id)
+                    }
+                )
+                # Download and load file if task_id provided
+                file_loaded = False
+                if task_id:
+                    file_path = self.download_gaia_file(task_id)
+                    if file_path:
+                        file_loaded = self.load_documents_from_file(file_path)
+                        span.set_attribute("gaia.file_loaded", file_loaded)
+                        print(f"Loaded file for task {task_id}")
+                # Check if this requires web browsing
+                web_indicators = ["navigate", "browser", "website", "webpage", "url", "click", "search on"]
+                needs_browser = any(indicator in question.lower() for indicator in web_indicators)
+                span.set_attribute("gaia.needs_browser", needs_browser)
+                if needs_browser and not self.driver:
+                    print("Initializing browser for web automation...")
+                    browser_initialized = self.initialize_browser()
+                    span.set_attribute("gaia.browser_initialized", browser_initialized)
+                # Prepare the prompt
+                prompt = f"""
 Question: {question}
 {f'Task ID: {task_id}' if task_id else ''}
+{f'File loaded: Yes' if file_loaded else 'File loaded: No'}
 Solve this step by step. Use the available tools to gather information and provide a precise answer.
+                """
+                if needs_browser:
+                    prompt += "\n" + helium_instructions
+                print("=== AGENT REASONING ===")
+                result = self.agent.run(prompt)
+                print("=== END REASONING ===")
+                # Update Langfuse trace with result
+                langfuse_trace.update(
+                    output={"answer": str(result)},
+                    end_time=None  # Will be set automatically
+                )
+                # Add success attributes
+                span.set_attribute("gaia.success", True)
+                span.set_attribute("gaia.answer_length", len(str(result)))
+                # Flush Langfuse data
+                self.langfuse.flush()
+                return str(result)
+            except Exception as e:
+                error_msg = f"Error processing question: {str(e)}"
+                print(error_msg)
+                # Log error to span and Langfuse
+                span.set_attribute("gaia.success", False)
+                span.set_attribute("gaia.error", str(e))
+                if 'langfuse_trace' in locals():
+                    langfuse_trace.update(
+                        output={"error": error_msg},
+                        level="ERROR"
+                    )
+                self.langfuse.flush()
+                return error_msg
+            finally:
+                # Clean up browser if initialized
+                if self.driver:
+                    try:
+                        helium.kill_browser()
+                    except:
+                        pass
+    def evaluate_answer(self, question: str, answer: str, expected_answer: str = None) -> Dict[str, Any]:
         """
+        Evaluate the agent's answer using LLM-as-a-Judge and optionally compare with expected answer
+        """
+        evaluation_prompt = f"""
+Please evaluate the following answer to a question on a scale of 1-5:
+Question: {question}
+Answer: {answer}
+{f'Expected Answer: {expected_answer}' if expected_answer else ''}
+Rate the answer on:
+1. Accuracy (1-5)
+2. Completeness (1-5)
+3. Clarity (1-5)
+Provide your rating as JSON: {{"accuracy": X, "completeness": Y, "clarity": Z, "overall": W, "reasoning": "explanation"}}
+        """
+        try:
+            # Use the same model to evaluate
+            evaluation_result = self.agent.run(evaluation_prompt)
+            # Try to parse JSON response
+            import json
+            try:
+                scores = json.loads(evaluation_result)
+                return scores
+            except:
+                # Fallback if JSON parsing fails
+                return {
+                    "accuracy": 3,
+                    "completeness": 3,
+                    "clarity": 3,
+                    "overall": 3,
+                    "reasoning": "Could not parse evaluation response",
+                    "raw_evaluation": evaluation_result
+                }
+        except Exception as e:
+            return {
+                "accuracy": 1,
+                "completeness": 1,
+                "clarity": 1,
+                "overall": 1,
+                "reasoning": f"Evaluation failed: {str(e)}"
+            }
+    def add_user_feedback(self, trace_id: str, feedback_score: int, comment: str = None):
+        """
+        Add user feedback to a specific trace
+        Args:
+            trace_id: The trace ID to add feedback to
+            feedback_score: Score from 0-5 (0=very bad, 5=excellent)
+            comment: Optional comment from user
+        """
+        try:
+            self.langfuse.score(
+                trace_id=trace_id,
+                name="user-feedback",
+                value=feedback_score,
+                comment=comment
+            )
+            self.langfuse.flush()
+            print(f"User feedback added: {feedback_score}/5")
         except Exception as e:
+            print(f"Error adding user feedback: {e}")
+# Example usage with observability
 if __name__ == "__main__":
+    # Set up environment variables (you need to set these)
+    # os.environ["GOOGLE_API_KEY"] = "your-gemini-api-key"
+    # os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..."
+    # os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..."
+    # Test the agent with observability
+    agent = GAIAAgent(
+        user_id="test-user-123",
+        session_id="test-session-456"
+    )
     # Example question
     question_data = {
+        "Question": "How many studio albums Mercedes Sosa has published between 2000-2009?",
         "task_id": ""
     }
+    # Solve with full observability
+    answer = agent.solve_gaia_question(
+        question_data,
+        tags=["music-question", "discography"]
+    )
+    print(f"Answer: {answer}")
+    # Evaluate the answer
+    evaluation = agent.evaluate_answer(
+        question_data["Question"],
+        answer
+    )
+    print(f"Evaluation: {evaluation}")