Final_Assignment_Agent

Paused

File size: 10,388 Bytes

dbb14b6
 
596707c
dbb14b6
b3d4017
 
 
c97999e
eaf9631
09a402e
dbb14b6
9f1ce17
dbb14b6
 
 
 
 
 
6fec0c8
 
9f1ce17
 
4022a3e
24062eb
4022a3e
 
 
 
ba66f78
b3d4017
6fec0c8
09a402e
dbb14b6
 
09a402e
 
 
 
 
 
 
 
b3d4017
dbb14b6
b3d4017
dbb14b6
 
2640254
dbb14b6
2fa94b3
dbb14b6
4022a3e
 
2fa94b3
 
 
 
 
 
 
dbb14b6
 
c89b357
dbb14b6
c97999e
 
353745d
24062eb
 
dbb14b6
 
 
7735763
dbb14b6
57aba38
dbb14b6
 
 
 
 
 
 
 
 
4022a3e
dbb14b6
 
 
 
 
57aba38
dbb14b6
 
 
 
4022a3e
dbb14b6
 
 
 
 
d9145d8
dbb14b6
57aba38
dbb14b6
 
 
 
 
 
 
 
 
 
d9145d8
dbb14b6
 
d9145d8
 
 
dbb14b6
 
 
 
 
 
 
f31b7de
d9145d8
dbb14b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f1ce17
dbb14b6
 
b3d4017
9f1ce17
 
6fec0c8
9f1ce17
 
 
 
6fec0c8
 
2640254
 
 
 
 
 
9f1ce17
 
 
 
 
 
 
 
dbb14b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09a402e
df341b9
 
596707c
09a402e
 
615b507
 
 
 
 
 
 
df341b9
20816f3
 
 
 
 
 
 
9f1ce17

import os
import re
import asyncio

from llama_index.core.tools import FunctionTool
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec
from langfuse.llama_index import LlamaIndexInstrumentor
from llama_index.llms.ollama import Ollama
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, MessageRole
from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
from llama_index.core.agent.workflow import (
    AgentOutput,
    ToolCall,
    ToolCallResult,
)

from multimodality_tools import get_image_qa_tool, get_transcription_tool, \
    get_excel_analysis_tool, get_excel_tool, get_csv_analysis_tool, get_csv_tool, _get_file, \
    get_read_file_tool, _build_file_url
from web_tools import get_search_web_tool
from wiki_tool import CustomWikipediaToolSpec

answer_specifics = ("When answering, provide ONLY the precise answer requested. " 
                    "Do not include explanations, steps, reasoning, or additional text. Be direct and specific. "
                    'For example, if asked "What is the capital of France?", respond simply with "Paris".')

class BasicAgent:
    def __init__(self, ollama=False, langfuse=False):
        if not ollama:
            llm = GoogleGenAI(model="gemini-2.0-flash", api_key=os.getenv("GEMINI_API_KEY"))
            # llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen3-32B") #"Qwen/Qwen2.5-Coder-32B-Instruct")
        else:
            llm = Ollama(model="mistral:latest", request_timeout=120.0)

        # Langfuse
        self.langfuse = langfuse
        if self.langfuse:
            self.instrumentor = LlamaIndexInstrumentor()
            self.instrumentor.start()

        # Initialize sub-agents

        main_agent = FunctionAgent(
            name="MainAgent",
            description= ("Can organize and delegate work to different agents and can compile a final answer to a question from other agents' outputs."),
            system_prompt=(
                "You are a general AI assistant. I will ask you a question. "
                "Report your thoughts, delegate work to other agents if necessary, and" 
                "finish your answer with the following template:\n"
                "FINAL ANSWER: [YOUR FINAL ANSWER]. \nYOUR FINAL ANSWER should be a number "
                "OR as few words as possible OR a comma separated list of numbers and/or "
                "strings. If you are asked for a number, don't use comma to write your "
                "number neither use units such as $ or percent sign unless specified otherwise. "
                "If you are asked for a string, don't use articles, neither abbreviations (e.g. "
                "for cities), and write the digits in plain text unless specified otherwise. If "
                "you are asked for a comma separated list, apply the above rules depending of "
                "whether the element to be put in the list is a number or a string."
            ),
            llm=llm,
            tools=[get_read_file_tool()],
            can_handoff_to=["WikiAgent", "WebAgent", "StatsAgent", "AudioAgent", "ImageAgent"],
        )

        # TODO Wikipedia tool does not return the tables from the page...
        wiki_spec = CustomWikipediaToolSpec()
        wiki_search_tool = wiki_spec.to_tool_list()[0]

        wiki_agent = FunctionAgent(
            name="WikiAgent",
            description="Agent that can access Wikipedia to answer a question. Try using this agent if the WebAgent does not find an answer to a question.",
            system_prompt=(
                "You are a Wikipedia agent that can search Wikipedia for information and extract the relevant information to answer a question. "
                "You only give concise answers and if you don't find an answer to the given query on Wikipedia, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[wiki_search_tool],
            can_handoff_to=["MainAgent"],
        )

        tool_spec = DuckDuckGoSearchToolSpec()
        search_tool = FunctionTool.from_defaults(tool_spec.duckduckgo_full_search)    

        web_search_agent = FunctionAgent(
            name="WebAgent",
            description="Uses the web to answer a question.",
            system_prompt=(
                "You are a Web agent that can search the Web and extract the relevant information to answer a question. "
                "You only give concise answers and if you don't find an answer to the given query with your tool, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_search_web_tool()],
            can_handoff_to=["MainAgent"],
        )

        audio_agent = FunctionAgent(
            name="AudioAgent",
            description="Uses transcription tools to analyze audio files. This agent needs a file id and an optional question as input",
            system_prompt=(
                "You are an audio agent that can transcribe an audio file identified by its id and answer questions about the transcript. "
                "You only give concise answers and if you cannot answer the given query using your tool, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_transcription_tool()],
            can_handoff_to=["MainAgent"],
        )

        image_agent = FunctionAgent(
            name="ImageAgent",
            description="Can respond to questions involving image understanding. This agent needs a file id and a question as an input.",
            system_prompt=(
                "You are an agent that can read images from a file identified by its id and answer questions about it. "
                "Give concise answers and only include the relevant information in you response." 
                "If you cannot answer the given query using your tool, you communicate this clearly. "
                "Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_image_qa_tool()],
            can_handoff_to=["MainAgent"],
        )

        stats_agent = FunctionAgent(
            name="StatsAgent",
            description="Uses statistical tools to read and analyse excel and csv files. This agent needs a file id and an optional question as an input",
            system_prompt=(
                "You are an agent that can read excel and csv files and run simple statistical analysis on them. "
                "You can use this information or the loaded file to answer questions about it. "
                "You only give concise answers and if you cannot answer the given query using your tool, "
                "you communicate this clearly. Always hand off your answer to MainAgent."
            ),
            llm=llm,
            tools=[get_csv_analysis_tool(), get_csv_tool(),
                   get_excel_analysis_tool(), get_excel_tool()],
            can_handoff_to=["MainAgent"],
        )
        
        # Main AgentWorkflow
        self.agent = AgentWorkflow(
            agents=[main_agent, wiki_agent, web_search_agent,
                    audio_agent, stats_agent], # Exclude image agent for now because we are using a multimodal model.
            root_agent=main_agent.name,
        )

    async def __call__(self, question: str, task_id: str = None, file_type: str = None) -> str:
        # Prepare Input
        file_str = ""
        img_block = None
        if file_type in ["png", "jpg", "jpeg"]:
            img_block = ImageBlock(url=_build_file_url(task_id))
        elif file_exists(task_id):
            file_str = f'\nIf you need to load a file, do so by providing the id "{task_id}".'

        final_answer = (
            "Remember to always use the template 'FINAL ANSWER: [YOUR FINAL ANSWER]' for your final output. "
            "Always use as few words as possible for your final answer."
        )

        msg = f"{question}{file_str}\n{final_answer}"
        if img_block:
            msg = ChatMessage(
                role=MessageRole.USER,
                blocks=[
                    TextBlock(text=msg),
                    img_block
                ],
            )

        # Stream events
        handler = self.agent.run(user_msg=msg)

        current_agent = None
        current_tool_calls = ""
        async for event in handler.stream_events():
            if (
                hasattr(event, "current_agent_name")
                and event.current_agent_name != current_agent
            ):
                current_agent = event.current_agent_name
                print(f"\n{'='*50}")
                print(f"🤖 Agent: {current_agent}")
                print(f"{'='*50}\n")
            elif isinstance(event, AgentOutput):
                if event.response.content:
                    print("📤 Output:", event.response.content)
                if event.tool_calls:
                    print(
                        "🛠️  Planning to use tools:",
                        [call.tool_name for call in event.tool_calls],
                    )
            elif isinstance(event, ToolCallResult):
                print(f"🔧 Tool Result ({event.tool_name}):")
                print(f"  Arguments: {event.tool_kwargs}")
                print(f"  Output: {event.tool_output}")
            elif isinstance(event, ToolCall):
                print(f"🔨 Calling Tool: {event.tool_name}")
                print(f"  With arguments: {event.tool_kwargs}")

            # Avoid ratelimits - 15 requests per minute
            await asyncio.sleep(4.1)

        if self.langfuse:
            self.instrumentor.flush()
        
        try:
            res = await handler
            res = res.response.content
            res = re.sub(r'^.*?FINAL ANSWER:', '', res, flags=re.DOTALL).strip()
            return res
        except:
            return "Error occured. No valid agent response could be determined."

def file_exists(task_id: str) -> bool:
    try:
        file = _get_file(task_id)
    except:
        return False
    del file
    return True