Final_Assignment_Template

Restarting

App Files Files Community

thenativefox commited on 5 days ago

Commit

425ca9e

verified ·

1 Parent(s): 81917a3

Create agent.py (#1)

Browse files

- Create agent.py (1e1bcb3ff90d28d0d3b958ae9980af1c58cbd525)

Files changed (1) hide show

agent.py +311 -0

agent.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import os
+import tempfile
+import requests
+import base64
+from io import BytesIO
+import time
+from llama_index.core.tools import QueryEngineTool
+from llama_index.core.tools import FunctionTool
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
+from llama_index.llms.openai import OpenAI
+from llama_index.core.agent.workflow import AgentStream
+from openai import OpenAI as OpenAIClient
+# Config
+from dotenv import load_dotenv
+load_dotenv()
+USERNAME = os.environ["USERNAME"]
+AGENT_CODE_URL = os.environ["AGENT_CODE_URL"]
+GAIA_BASE_URL = "https://agents-course-unit4-scoring.hf.space"
+open_ai_api_key = os.environ["OPENAI_API_KEY"]
+os.environ['OPENAI_API_KEY'] = open_ai_api_key
+class Agent:
+    def __init__(self, task: dict):
+        self.task = task
+        self.task_id = task["task_id"]
+        self.question = task["question"]
+        self.file_name = task.get("file_name", "")
+        self.llm = OpenAI(model="gpt-4o", api_key=open_ai_api_key)
+        self.client = OpenAIClient()
+        self.file_bytes = None
+        self.query_tool = None
+        self.agent = None
+    def download_file(self, task_id: str) -> bytes:
+        """
+        Download the file associated with a GAIA task ID.
+        :param task_id: The task ID for which to download the file
+        :return: File content as bytes, or b"" if the download fails
+        """
+        try:
+            url = f"{GAIA_BASE_URL}/files/{task_id}"
+            resp = requests.get(url)
+            resp.raise_for_status()
+            return resp.content
+        except Exception as e:
+            print(f"❌ Error downloading file for task {task_id}: {e}")
+            return b""
+    def save_file_to_temp(self) -> str:
+        temp_dir = tempfile.mkdtemp()
+        file_path = os.path.join(temp_dir, f"{self.file_name}")
+        with open(file_path, "wb") as f:
+            f.write(self.file_bytes)
+        return temp_dir
+    def index_from_directory(self, directory_path: str):
+        documents = SimpleDirectoryReader(directory_path).load_data()
+        index = VectorStoreIndex.from_documents(documents)
+        return index
+    def encode_image_bytes(self, image_bytes: bytes) -> str:
+        base64_bytes = base64.b64encode(image_bytes).decode("utf-8")
+        return f"data:image/jpeg;base64,{base64_bytes}"
+    def process_image(self, query: str) -> str:
+        """
+        Process image and reply to the question.
+        """
+        base64_image = self.encode_image_bytes(self.file_bytes)
+        try:
+            response = self.client.responses.create(
+                model="gpt-4o",
+                input=[{
+                    "role": "user",
+                    "content": [
+                        {"type": "input_text", "text": f"Answer the question based on the image: {query}."},
+                        {
+                            "type": "input_image",
+                            "image_url": base64_image,
+                        },
+                    ],
+                }],
+            )
+            result = response.output_text
+            return result
+        except Exception as e:
+            print(f"❌ Error extracting the data from image: {e}")
+            return ""
+    def process_audio(self, query: str) -> str:
+        """
+        Process image and reply to the question.
+        """
+        audio_stream = BytesIO(self.file_bytes)
+        audio_stream.name = "audio.mp3"
+        try:
+            transcription = self.client.audio.transcriptions.create(
+                model="gpt-4o-mini-transcribe",
+                file=audio_stream,
+                response_format="text"
+            )
+            response = self.client.responses.create(
+                model="gpt-4o",
+                input = (
+                    "You're an AI assistant whose task is to answer the following question based on the provided text. "
+                    f"The question is: {query} "
+                    f"The text is: {transcription} "
+                    "Do not provide any additional information or explanation."
+                )
+            )
+            result = response.output_text
+            return result
+        except Exception as e:
+            print(f"❌ Error extracting the data from audio: {e}")
+            return ""
+    def run_code(self, query: str) -> str:
+        try:
+            # Upload the code file
+            uploaded_file = self.client.files.create(
+                file=BytesIO(self.file_bytes),
+                purpose="assistants"
+            )
+            # Create an assistant with Code Interpreter enabled
+            assistant = self.client.beta.assistants.create(
+                instructions=(
+                    "You are a professional programmer. When asked a technical question, "
+                    "analyze and execute the uploaded code using the code interpreter tool."
+                ),
+                model="gpt-4o",
+                tools=[{"type": "code_interpreter"}],
+                tool_resources={"code_interpreter": {"file_ids": [uploaded_file.id]}}
+            )
+            # Create a thread and send message with the user query
+            thread = self.client.beta.threads.create()
+            self.client.beta.threads.messages.create(
+                thread_id=thread.id,
+                role="user",
+                content=query,
+            )
+            # Run the assistant and wait for it to complete
+            run = self.client.beta.threads.runs.create_and_poll(
+                thread_id=thread.id,
+                assistant_id=assistant.id
+            )
+            if run.status != "completed":
+                print(f"⚠️ Run did not complete successfully. Status: {run.status}")
+                return "Code execution failed or was incomplete."
+            # Retrieve and return the assistant's reply
+            messages = self.client.beta.threads.messages.list(thread_id=thread.id)
+            final_response = messages.data[0].content[0].text.value
+            return final_response
+        except Exception as e:
+            print(f"❌ Error running code via assistant: {e}")
+            return ""
+    def validate_query_tool_output(self, query: str, output: str) -> str:
+        """
+        Validate the output of the query against the expected format.
+        """
+        try:
+            response = self.client.responses.create(
+                model="gpt-4o",
+                input = (
+                    "You're an AI assistant that validates the output of a query against the expected format. "
+                    f"The query is: {query}. The output is: {output}. Validate the output and if the output is not correctly formatted as per the query, provide the correct output. "
+                    "The output should be concise. Examples: (1) if you need to provide a move in a chess game, then the output should contain only the move `Qd1+` without any additional details. "
+                    "(2) If the output should be a list of items, provide them without any additional details like `Salt, pepper, chilli`. "
+                    "If the output is already correct, then just return the output. "
+                    "Do not provide any additional information or explanation."
+                )
+            )
+            result = response.output_text
+            return result
+        except Exception as e:
+            print(f"❌ Error validating query output: {e}")
+            print("Returning an original output ...")
+            return output
+    def buld_tools(self, query_engine):
+        query_engine_tool = QueryEngineTool.from_defaults(
+            query_engine=query_engine,
+            name=f"query_tool_task",
+            description="Query the indexed content from the GAIA file.",
+            return_direct=True,
+        )
+        image_question_tool = FunctionTool.from_defaults(
+            self.process_image,
+            name="image_question_tool",
+            description="Answer a question based on an image and its contents."
+        )
+        audio_question_tool = FunctionTool.from_defaults(
+            self.process_audio,
+            name="audio_question_tool",
+            description="Answer a question based on an audio and its contents."
+        )
+        code_execution_tool = FunctionTool.from_defaults(
+            self.run_code,
+            name="load_and_execute_code_tool",
+            description="Loads the full content of a script and executes it to answer the question.",
+        )
+        return [
+            query_engine_tool,
+            image_question_tool,
+            audio_question_tool,
+            code_execution_tool
+        ]
+    async def run_task(self):
+        task_id = self.task["task_id"]
+        question = self.task["question"]
+        self.file_bytes = self.download_file(task_id)
+        if not self.file_bytes:
+            print(f"⚠️ No file found for task {task_id}")
+            return
+        # Save file to temp dir and index it
+        directory_path = self.save_file_to_temp()
+        index = self.index_from_directory(directory_path)
+        if not index:
+            print(f"❌ Could not index task {task_id}")
+            return
+        query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
+        # Create a task-specific tool
+        tools = self.buld_tools(query_engine)
+        # Create a one-off agent for this task
+        rag_agent = ReActAgent(
+            name=f"agent_task_{task_id}",
+            description="Parses and answers the question using indexed content.",
+            llm=self.llm,
+            tools=tools,
+            system_prompt=(
+                "You are an agent designed to answer a GAIA benchmark question using the attached file.\n"
+                "You must always start by choosing the correct tool:\n"
+                "- Use `query_tool_task` for parsing and searching documents (text, tables, PDFs, etc.).\n"
+                "- Use `image_question_tool` if the file is an image and cannot be parsed as text.\n"
+                "- Use `audio_question_tool` if the file is an audio and cannot be parsed as text.\n"
+                "- Use `code_execution_tool` if the file is a code and cannot be parsed as text.\n"
+                "Do not explain or comment on your answer. the output should be formatted as per the query."
+            )
+        )
+        user_msg = (
+            f"GAIA Question:\n{question}\n\n"
+            "Choose the correct tool based on the file type (document or image).\n"
+            "Use `query_tool_task`, `image_question_tool`, `audio_question_tool` or `code_execution_tool` to extract the answer."
+        )
+        try:
+            handler = rag_agent.run(user_msg=user_msg)
+            # 🧠 Show live reasoning/thought process
+            print(f"\n🧠 ReAct Reasoning for question {question}:\n")
+            async for event in handler.stream_events():
+                if isinstance(event, AgentStream):
+                    print(event.delta, end="", flush=True)
+            # Final response
+            response = await handler
+            print(f"\n✅ Final Answer:\n{response}\n")
+            # Optional: print tool call history
+            if response.tool_calls:
+                print("🛠️ Tool Calls:")
+                for call in response.tool_calls:
+                    tool_name = getattr(call, "tool_name", "unknown")
+                    kwargs = getattr(call, "tool_kwargs", {})
+                    print(f"- Tool: {tool_name} | Input: {kwargs}")
+            validated_result = self.validate_query_tool_output(question, response)
+            print("====================================")
+            print(f"✅ Validated Answer:\n{validated_result}\n")
+            print("====================================")
+            return validated_result
+        except Exception as e:
+            print(f"❌ Error for task {task_id}: {e}")