File size: 3,614 Bytes
bbfc23a
 
99fc878
 
b11304e
 
 
 
 
 
 
 
 
af10fd2
b11304e
 
99fc878
b11304e
 
 
 
 
af10fd2
b11304e
 
 
 
 
 
 
 
 
 
 
60f7dea
b11304e
99fc878
b11304e
bbfc23a
 
60f7dea
bbfc23a
 
 
 
b11304e
 
 
 
 
7bd852d
b11304e
 
7bd852d
bbfc23a
af10fd2
b11304e
99fc878
bbfc23a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.tools import FunctionTool
from llama_index.core.workflow import Context
from llama_index.core.llms import ChatMessage
from excel_file_reader import excel_file_reader
from image_analyzer import analyze_image
from audio_analyzer import analyze_audio
from video_analyzer import analyze_video
import os

async def main(query: str, file_url: str | None) -> str: 
    google_genai_llm = GoogleGenAI(model="gemini-2.0-flash", api_key=os.getenv("GOOGLE_API_KEY"))

    # Create tool instances
    tools = []
    tools.append(FunctionTool.from_defaults(DuckDuckGoSearchToolSpec().duckduckgo_full_search, name="duckduckgo_search", description="Searches the web using DuckDuckGo."))
    tools.append(FunctionTool.from_defaults(WikipediaToolSpec().search_data, name="wikipedia_search", description="Searches Wikipedia for information."))
    tools.append(FunctionTool.from_defaults(CodeInterpreterToolSpec().code_interpreter, name="code_interpreter", description="Executes Python code and returns the output."))

    image_analyzer_tool = FunctionTool.from_defaults(analyze_image, name="analyze_image", description="Analyzes an image provided by URL using Google Gemini API. Creates summary of the image content.")
    tools.append(image_analyzer_tool)

    audio_analyzer_tool = FunctionTool.from_defaults(analyze_audio, name="analyze_audio", description="Transcribes audio files to text using Google Gemini Flash model. Supports various audio formats including MP3, WAV, M4A, etc.")
    tools.append(audio_analyzer_tool)

    video_analyzer_tool = FunctionTool.from_defaults(analyze_video, name="analyze_video", description="Analyzes a YouTube video using Google Gemini API. Creates summary of the video content.")
    tools.append(video_analyzer_tool)

    excel_file_reader_tool = FunctionTool.from_defaults(excel_file_reader, name="excel_file_reader", description="Loads Excel files and analyzes the content of the file.")
    tools.append(excel_file_reader_tool)
    
    agent = AgentWorkflow.from_tools_or_functions(  
        tools_or_functions=tools,
        llm=google_genai_llm,
        system_prompt="""
    You are a general AI assistant. I will ask you a question. 
    Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. 
    YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. 
    If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. 
    If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. 
    If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
""" 
    )

    ctx = Context(agent)

    prompt = query if file_url is None else f"{query}. Use the URL {file_url} as the Url path of attached files mentioned in the question. Attached file are required to answer the question. To run Python code use Code Intrepreter tool."
    message = ChatMessage(
        role="user",
        content=prompt
    )

    response = await agent.run(message, ctx=ctx)

    return response