Luigi D'Addona
commited on
Commit
·
e0568e5
1
Parent(s):
93c3b2a
aggiunto tool analyze_png_image
Browse files
agent.py
CHANGED
@@ -14,7 +14,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
|
|
14 |
|
15 |
# Local imports
|
16 |
from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
|
17 |
-
execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool
|
|
|
18 |
|
19 |
# Nota: per i test in locale si usa il .env
|
20 |
# su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
|
@@ -59,8 +60,9 @@ chat = ChatGoogleGenerativeAI(
|
|
59 |
search_tool = get_tavily_search_tool()
|
60 |
#wikipedia_tool = get_wikipedia_tool()
|
61 |
analyze_mp3_tool = get_analyze_mp3_tool(chat)
|
|
|
62 |
|
63 |
-
tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool, arxiv_search]
|
64 |
|
65 |
# Bind tools to the model
|
66 |
chat_with_tools = chat.bind_tools(tools)
|
|
|
14 |
|
15 |
# Local imports
|
16 |
from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
|
17 |
+
execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool,\
|
18 |
+
get_analyze_image_tool, arxiv_search
|
19 |
|
20 |
# Nota: per i test in locale si usa il .env
|
21 |
# su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
|
|
|
60 |
search_tool = get_tavily_search_tool()
|
61 |
#wikipedia_tool = get_wikipedia_tool()
|
62 |
analyze_mp3_tool = get_analyze_mp3_tool(chat)
|
63 |
+
analyze_png_tool = get_analyze_image_tool(chat)
|
64 |
|
65 |
+
tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool, analyze_png_tool, arxiv_search]
|
66 |
|
67 |
# Bind tools to the model
|
68 |
chat_with_tools = chat.bind_tools(tools)
|
tools.py
CHANGED
@@ -256,6 +256,51 @@ def get_analyze_mp3_tool(llm):
|
|
256 |
return analyze_mp3_file
|
257 |
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
@tool
|
260 |
def arxiv_search(query: str) -> str:
|
261 |
"""Search Arxiv for a query and return maximum 3 result.
|
|
|
256 |
return analyze_mp3_file
|
257 |
|
258 |
|
259 |
+
def get_analyze_image_tool(llm):
|
260 |
+
@tool
|
261 |
+
def analyze_png_image(image_path: str) -> str:
|
262 |
+
"""
|
263 |
+
Analyzes a PNG image and returns a detailed description of its content.
|
264 |
+
This tool requires an LLM capable of processing images, such as Gemini 1.5 Pro or Gemini 2.0 Flash.
|
265 |
+
"""
|
266 |
+
try:
|
267 |
+
# Read image and encode as base64
|
268 |
+
with open(image_path, "rb") as image_file:
|
269 |
+
image_bytes = image_file.read()
|
270 |
+
|
271 |
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
272 |
+
|
273 |
+
# Prepare the prompt including the base64 image data
|
274 |
+
message = [
|
275 |
+
HumanMessage(
|
276 |
+
content=[
|
277 |
+
{
|
278 |
+
"type": "text",
|
279 |
+
"text": (
|
280 |
+
"Provide a very detailed description of the content of this image. "
|
281 |
+
"Focus on objects, people, actions, text, and overall scene context. "
|
282 |
+
"Be as comprehensive as possible."
|
283 |
+
),
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"type": "image_url",
|
287 |
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
288 |
+
},
|
289 |
+
]
|
290 |
+
)
|
291 |
+
]
|
292 |
+
|
293 |
+
# Call the vision-capable model
|
294 |
+
response = llm.invoke(message)
|
295 |
+
|
296 |
+
return response.content.strip()
|
297 |
+
except Exception as e:
|
298 |
+
print("Error analyzing image file:{} - {}".format(image_path, e))
|
299 |
+
return ""
|
300 |
+
|
301 |
+
return analyze_png_image
|
302 |
+
|
303 |
+
|
304 |
@tool
|
305 |
def arxiv_search(query: str) -> str:
|
306 |
"""Search Arxiv for a query and return maximum 3 result.
|