Final_Assignment_Template

Sleeping

App Files Files Community

Luigi D'Addona commited on Jun 20

Commit

e0568e5

1 Parent(s): 93c3b2a

aggiunto tool analyze_png_image

Browse files

Files changed (2) hide show

agent.py +4 -2
tools.py +45 -0

agent.py CHANGED Viewed

@@ -14,7 +14,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 # Local imports
 from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
-                  execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool, arxiv_search
 # Nota: per i test in locale si usa il .env
 #       su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
@@ -59,8 +60,9 @@ chat = ChatGoogleGenerativeAI(
 search_tool = get_tavily_search_tool()
 #wikipedia_tool = get_wikipedia_tool()
 analyze_mp3_tool = get_analyze_mp3_tool(chat)
-tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool, arxiv_search]
 # Bind tools to the model
 chat_with_tools = chat.bind_tools(tools)

 # Local imports
 from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
+                  execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool,\
+                  get_analyze_image_tool, arxiv_search
 # Nota: per i test in locale si usa il .env
 #       su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
 search_tool = get_tavily_search_tool()
 #wikipedia_tool = get_wikipedia_tool()
 analyze_mp3_tool = get_analyze_mp3_tool(chat)
+analyze_png_tool = get_analyze_image_tool(chat)
+tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool, analyze_png_tool, arxiv_search]
 # Bind tools to the model
 chat_with_tools = chat.bind_tools(tools)

tools.py CHANGED Viewed

@@ -256,6 +256,51 @@ def get_analyze_mp3_tool(llm):
     return analyze_mp3_file
 @tool
 def arxiv_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.

     return analyze_mp3_file
+def get_analyze_image_tool(llm):
+    @tool
+    def analyze_png_image(image_path: str) -> str:
+        """
+        Analyzes a PNG image and returns a detailed description of its content.
+        This tool requires an LLM capable of processing images, such as Gemini 1.5 Pro or Gemini 2.0 Flash.
+        """
+        try:
+            # Read image and encode as base64
+            with open(image_path, "rb") as image_file:
+                image_bytes = image_file.read()
+            image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+            # Prepare the prompt including the base64 image data
+            message = [
+                HumanMessage(
+                    content=[
+                        {
+                            "type": "text",
+                            "text": (
+                                "Provide a very detailed description of the content of this image. "
+                                "Focus on objects, people, actions, text, and overall scene context. "
+                                "Be as comprehensive as possible."
+                            ),
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/png;base64,{image_base64}"},
+                        },
+                    ]
+                )
+            ]
+            # Call the vision-capable model
+            response = llm.invoke(message)
+            return response.content.strip()
+        except Exception as e:
+            print("Error analyzing image file:{} - {}".format(image_path, e))
+            return ""
+    return analyze_png_image
 @tool
 def arxiv_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.