SerotoninRonin commited on
Commit
6bae69e
·
1 Parent(s): 62ed5af

Refactor agent architecture: replace ReActAgent with LangGraphAgent, streamline tool integration, and enhance Excel parsing and image description functionalities.

Browse files
Files changed (3) hide show
  1. agents.py +85 -62
  2. app.py +25 -81
  3. tools.py +133 -98
agents.py CHANGED
@@ -1,75 +1,98 @@
1
  import os
2
- from llama_index.core.agent.workflow import ReActAgent, FunctionAgent
3
- from llama_index.core import PromptTemplate
4
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 
 
 
5
  from tools import (
6
- search_tool,
7
  describe_image_tool,
8
  parse_excel_tool,
9
- access_webpage_tool,
10
- string_functions_tool
 
 
11
  )
12
 
13
- thinking_agent = ReActAgent(
14
- name="Thinking Agent",
15
- description="An agent that can think and reason about tasks, and then handoff the task to Technician Agent for execution, or to Manager Agent for review.",
16
- llm=HuggingFaceInferenceAPI(
17
- model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
18
- provider="auto",
19
- token=os.environ.get("HF_TOKEN")
20
- ),
21
- system_prompt="You are a thinking agent that can reason about tasks and communicate the necessary steps to complete them to Technician Agent, if necessary. If you believe the task is completed and the question is answered, you must handoff the answer to Manager Agent for final review.",
22
- can_handoff_to=["Technician Agent", "Manager Agent"]
23
- )
24
 
25
- technician_agent = ReActAgent(
26
- name="Technician Agent",
27
- description="An agent that can perform various technical tasks such as searching the web, describing images, parsing Excel files, string operations, and accessing webpages.",
28
- tools=[search_tool, describe_image_tool, parse_excel_tool, access_webpage_tool, string_functions_tool],
29
- llm=HuggingFaceInferenceAPI(
30
- model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
31
- provider="auto",
32
- token=os.environ.get("HF_TOKEN")
33
- ),
34
- system_prompt="You are a helpful agent that answers questions based on the provided tools. Use the tools to gather information and provide accurate answers, and send those answers to Thinking Agent. If the task is too complex or requires further reasoning, handoff the task to Thinking Agent for analysis with the reasoning as why you cannot complete it. You must always handoff to Thinking Agent",
35
- can_handoff_to=["Thinking Agent"]
36
- )
37
 
38
- manager_agent = ReActAgent(
39
- name="Manager Agent",
40
- description="A high-level agent that can manage tasks and coordinate between other agents.",
41
- llm=HuggingFaceInferenceAPI(
42
- model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
43
- provider="auto",
44
- token=os.environ.get("HF_TOKEN")
45
- ),
46
- can_handoff_to=["Thinking Agent"],
47
- # system_prompt="You are a manager agent that oversees tasks and coordinates between other agents. Do not include thoughts in your response. Just the answer. " \
48
- # "You will be given a question and you need to respond with the correct answer provided by the other agents. " \
49
- # "Your response should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. " \
50
- # "For example, if the question is 'What color are the stars on the American flag?' Your response would be 'White'. " \
51
- # "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. " \
52
- # "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. " \
53
- # "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. " \
54
- # "Do not engage in conversation, just respond with the answer unless the question explicitly asks for a certain style of response. " \
55
- # "If the results are questionable, you can send the task back to Thinking Agent for further analysis. If the answer can not be concluded, respond with 'I don't know'"
56
  )
57
 
58
- minimal_system_prompt = """
59
- You are the Manager Agent. 💼
60
- Respond with **only the final answer**, in as few words as possible.
61
- Do **not** include any reasoning, thoughts, or tool calls.
62
- If it's a number: use plain digits (no commas, %, etc.).
63
- If it's a string: no articles, no abbreviations.
64
- If it's a list: comma-separated, each element following the rules above.
65
- If unsure, reply: I don't know.
66
- Below is the current conversation consisting of interleaving human and assistant messages.
67
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- manager_prompt = PromptTemplate(template=minimal_system_prompt)
70
 
71
- manager_agent.update_prompts({"react_header", manager_prompt.get_template()})
 
72
 
73
- prompt_dict = manager_agent.get_prompts()
74
- for k, v in prompt_dict.items():
75
- print(f"Prompt: {k}\n\nValue: {v.template}")
 
 
 
 
 
 
 
 
1
  import os
2
+ from typing import Any, List, TypedDict
3
+ from langgraph.graph import StateGraph
4
+ from langgraph.prebuilt import ToolNode, tools_condition
5
+ from langchain_core.messages import HumanMessage, SystemMessage
6
+ from langchain_core.rate_limiters import InMemoryRateLimiter
7
+ from langchain_openai import ChatOpenAI
8
  from tools import (
 
9
  describe_image_tool,
10
  parse_excel_tool,
11
+ webpage_extraction_tool,
12
+ brave_web_search,
13
+ python_code_interpreter_tool,
14
+ audio_file_transcriber
15
  )
16
 
17
+ class AgentState(TypedDict):
18
+ messages: List[Any]
19
+ question: str
20
+ file_path: str
21
+ final_answer: str
 
 
 
 
 
 
22
 
23
+ tools = [
24
+ describe_image_tool,
25
+ parse_excel_tool,
26
+ webpage_extraction_tool,
27
+ brave_web_search,
28
+ python_code_interpreter_tool,
29
+ audio_file_transcriber
30
+ ]
 
 
 
 
31
 
32
+ rate_limiter = InMemoryRateLimiter(
33
+ requests_per_second=0.1, # <-- Can only make a request once every 10 seconds!!
34
+ check_every_n_seconds=0.1, # Wake up every 100 ms to check whether allowed to make a request,
35
+ max_bucket_size=10, # Controls the maximum burst size.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  )
37
 
38
+ class LangGraphAgent:
39
+ def __init__(self, model_name: str = "gpt-4o",):
40
+ self.llm = ChatOpenAI(model=model_name, max_tokens=3000, temperature=0, streaming=True, rate_limiter=rate_limiter)
41
+ self.llm_with_tools = self.llm.bind_tools(tools)
42
+ self.tool_node = ToolNode(tools)
43
+ self.graph = self.create_graph().compile()
44
+
45
+ def create_graph(self) -> StateGraph:
46
+ """Creates a state graph for the agent's workflow."""
47
+ # Define the tools and their respective states
48
+ graph = StateGraph(AgentState)
49
+ graph.add_node("agent", self.agent_node)
50
+ graph.add_node("tools", self.tool_node)
51
+
52
+ graph.set_entry_point("agent")
53
+
54
+ graph.add_conditional_edges("agent", tools_condition)
55
+ graph.add_edge("tools", "agent")
56
+
57
+ return graph
58
+
59
+ def agent_node(self, state: AgentState):
60
+ """Creates a node for the agent that uses the model to respond to user queries."""
61
+ messages = state['messages']
62
+
63
+ if len(messages) == 1:
64
+ system_prompt = ("You are a helpful assistant that can answer questions using various tools. "
65
+ "You must answer the given question using as few words as possible, or the given format, if any."
66
+ " If the answer is a number, you must return the number only, do not include symbols or commas. If you don't know the answer, say 'I don't know'. "
67
+ "These questions are nontrivial and may require advanced critical thinking and multiple tool calls."
68
+ "If you need to search the web for information, always use a search tool before using a wepbage extraction tool so you always have a legit website."
69
+ "If given a Python file, execute it with the code interpreter tool (riza_exec_python)")
70
+ messages = [SystemMessage(system_prompt)] + messages
71
+
72
+ while True:
73
+ response = self.llm_with_tools.invoke(messages)
74
+ messages.append(response)
75
+
76
+ if not response.tool_calls:
77
+ break
78
+
79
+ tool_outputs = []
80
+ for call in response.tool_calls:
81
+ tool_output = self.tool_node.invoke({"messages": [response], "tool_call": call})
82
+ tool_outputs.extend(tool_output["messages"])
83
 
84
+ messages.extend(tool_outputs)
85
 
86
+ state["messages"] = messages
87
+ return {"messages": messages}
88
 
89
+ def run(self, question: str) -> str:
90
+ state = AgentState(messages=[HumanMessage(content=question)], question=question, final_answer=None)
91
+ result = self.graph.invoke(state)
92
+
93
+ final_message = result["messages"][-1]
94
+ if hasattr(final_message, 'content'):
95
+ result['final_answer'] = final_message.content
96
+ else:
97
+ result['final_answer'] = str(final_message)
98
+ return result['final_answer']
app.py CHANGED
@@ -6,62 +6,14 @@ import inspect
6
  import dotenv
7
  dotenv.load_dotenv() # Load environment variables from .env file
8
  import pandas as pd
9
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
10
- from llama_index.core.agent.workflow import AgentWorkflow, AgentOutput, ToolCall, ToolCallResult, AgentInput, AgentStream
11
- from agents import thinking_agent, technician_agent, manager_agent
12
- import asyncio
13
  # (Keep Constants as is)
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
- llm = HuggingFaceInferenceAPI(model_name="deepseek-ai/DeepSeek-R1", provider="auto", token=os.environ.get("HF_TOKEN"))
18
-
19
-
20
- #def run_and_submit_all( profile: gr.OAuthProfile | None):
21
-
22
- async def run_agent_query(agent: AgentWorkflow, question: str):
23
- """
24
- Runs the agent on a single question and returns the answer.
25
- This function is intended to be used with Gradio for interactive querying.
26
- """
27
-
28
- handler = agent.run(user_msg=question)
29
- current_agent = None
30
- final_response = None
31
- current_tool_calls = ""
32
- async for event in handler.stream_events():
33
- if (
34
- hasattr(event, "current_agent_name")
35
- and event.current_agent_name != current_agent
36
- ):
37
- current_agent = event.current_agent_name
38
- print(f"\n{'='*50}")
39
- print(f"🤖 Agent: {current_agent}")
40
- print(f"{'='*50}\n")
41
 
42
- if isinstance(event, AgentStream):
43
- if event.delta:
44
- print(event.delta, end="", flush=True)
45
- elif isinstance(event, AgentInput):
46
- print("📥 Input:", event.input)
47
- elif isinstance(event, AgentOutput):
48
- if event.response.content:
49
- print("📤 Output:", event.response.content)
50
- final_response = event.response.content
51
- if event.tool_calls:
52
- print(
53
- "🛠️ Planning to use tools:",
54
- [call.tool_name for call in event.tool_calls],
55
- )
56
- elif isinstance(event, ToolCallResult):
57
- print(f"🔧 Tool Result ({event.tool_name}):")
58
- print(f" Arguments: {event.tool_kwargs}")
59
- print(f" Output: {event.tool_output}")
60
- elif isinstance(event, ToolCall):
61
- print(f"🔨 Calling Tool: {event.tool_name}")
62
- print(f" With arguments: {event.tool_kwargs}")
63
- return final_response if final_response else "No response from agent."
64
- def run_and_submit_all():
65
  """
66
  Fetches all questions, runs the BasicAgent on them, submits all answers,
67
  and displays the results.
@@ -69,26 +21,19 @@ def run_and_submit_all():
69
  # --- Determine HF Space Runtime URL and Repo URL ---
70
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
71
 
72
- # if profile:
73
- # username= f"{profile.username}"
74
- # print(f"User logged in: {username}")
75
- # else:
76
- # print("User not logged in.")
77
- # return "Please Login to Hugging Face with the button.", None
 
78
 
79
  api_url = DEFAULT_API_URL
80
  questions_url = f"{api_url}/questions"
81
  submit_url = f"{api_url}/submit"
82
 
83
- try:
84
- agent = AgentWorkflow(
85
- agents=[thinking_agent, technician_agent, manager_agent],
86
- root_agent=manager_agent.name,
87
- handoff_output_prompt="handoff_result: Passed to {to_agent}. Reason: {reason}. Please continue processing using the original user question."
88
- )
89
- except Exception as e:
90
- print(f"Error instantiating agent: {e}")
91
- return f"Error initializing agent: {e}", None
92
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
93
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
94
  print(agent_code)
@@ -118,7 +63,7 @@ def run_and_submit_all():
118
  results_log = []
119
  answers_payload = []
120
  print(f"Running agent on {len(questions_data)} questions...")
121
- for item in questions_data[:4]:
122
  task_id = item.get("task_id")
123
  question_text = item.get("question")
124
  if not task_id or question_text is None:
@@ -145,7 +90,7 @@ def run_and_submit_all():
145
  return f"An unexpected error occurred fetching file {file_name}: {e}", None
146
  try:
147
  print(f"Running agent on task {task_id} with question: {question_text}")
148
- submitted_answer = asyncio.run(run_agent_query(agent, question_text))
149
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
150
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
151
  except Exception as e:
@@ -164,20 +109,19 @@ def run_and_submit_all():
164
  # 5. Submit
165
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
166
  try:
167
- # response = requests.post(submit_url, json=submission_data, timeout=60)
168
- # response.raise_for_status()
169
- # result_data = response.json()
170
- # final_status = (
171
- # f"Submission Successful!\n"
172
- # f"User: {result_data.get('username')}\n"
173
- # f"Overall Score: {result_data.get('score', 'N/A')}% "
174
- # f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
175
- # f"Message: {result_data.get('message', 'No message received.')}"
176
- # )
177
- # print("Submission successful.")
178
  results_df = pd.DataFrame(results_log)
179
- return "Nothing submitted", results_df
180
- # return final_status, results_df
181
  except requests.exceptions.HTTPError as e:
182
  error_detail = f"Server responded with status {e.response.status_code}."
183
  try:
 
6
  import dotenv
7
  dotenv.load_dotenv() # Load environment variables from .env file
8
  import pandas as pd
9
+ from agents import LangGraphAgent
10
+
 
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
  Fetches all questions, runs the BasicAgent on them, submits all answers,
19
  and displays the results.
 
21
  # --- Determine HF Space Runtime URL and Repo URL ---
22
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
23
 
24
+ if profile:
25
+ username= f"{profile.username}"
26
+ print(f"User logged in: {username}")
27
+ else:
28
+ print("User not logged in.")
29
+ return "Please Login to Hugging Face with the button.", None
30
+
31
 
32
  api_url = DEFAULT_API_URL
33
  questions_url = f"{api_url}/questions"
34
  submit_url = f"{api_url}/submit"
35
 
36
+ agent = LangGraphAgent()
 
 
 
 
 
 
 
 
37
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
38
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
39
  print(agent_code)
 
63
  results_log = []
64
  answers_payload = []
65
  print(f"Running agent on {len(questions_data)} questions...")
66
+ for item in questions_data:
67
  task_id = item.get("task_id")
68
  question_text = item.get("question")
69
  if not task_id or question_text is None:
 
90
  return f"An unexpected error occurred fetching file {file_name}: {e}", None
91
  try:
92
  print(f"Running agent on task {task_id} with question: {question_text}")
93
+ submitted_answer = agent.run(question_text)
94
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
95
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
96
  except Exception as e:
 
109
  # 5. Submit
110
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
111
  try:
112
+ response = requests.post(submit_url, json=submission_data, timeout=60)
113
+ response.raise_for_status()
114
+ result_data = response.json()
115
+ final_status = (
116
+ f"Submission Successful!\n"
117
+ f"User: {result_data.get('username')}\n"
118
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
119
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
120
+ f"Message: {result_data.get('message', 'No message received.')}"
121
+ )
122
+ print("Submission successful.")
123
  results_df = pd.DataFrame(results_log)
124
+ return final_status, results_df
 
125
  except requests.exceptions.HTTPError as e:
126
  error_detail = f"Server responded with status {e.response.status_code}."
127
  try:
tools.py CHANGED
@@ -1,119 +1,154 @@
1
- from llama_index.core.tools import FunctionTool
2
- from llama_index.tools.tavily_research import TavilyToolSpec
3
- from llama_index.core.schema import ImageDocument
4
- from llama_index.readers.whisper import WhisperReader
 
 
 
 
 
 
 
 
5
 
6
- import os
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- tool_spec = TavilyToolSpec(
9
- api_key=os.environ.get("TAVILY_API_KEY"),
10
- )
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- search_tool = FunctionTool.from_defaults(tool_spec.search)
13
 
14
- from llama_index.multi_modal_llms.huggingface import HuggingFaceMultiModal
 
 
 
 
 
 
 
 
 
15
 
16
- def describe_image(image_path: str, prompt: str = "Describe the following image:") -> str:
 
17
  """
18
- Function to describe an image using a multi-modal LLM.
19
- :param image_path: Path to the image file or base64 encoded image data.
20
- :param prompt: Prompt to use for the description. Defaults to "Describe the following image:".
21
- :return: Description of the image.
 
22
  """
23
- image = ImageDocument(image_path=image_path)
24
  try:
25
- llm = HuggingFaceMultiModal.from_model_name("Qwen/Qwen2-VL-2B-Instruct", use_fast=True)
26
- return llm.complete(
27
- prompt=f"{prompt}",
28
- image_documents=[image]
29
- ).text
30
  except Exception as e:
31
- return f"Error describing image: {e}"
32
-
33
- describe_image_tool = FunctionTool.from_defaults(describe_image)
34
-
35
- # Tool to parse xls/xlsx files
36
- def parse_excel(file_path: str) -> str:
37
- """
38
- Function to parse an Excel file and return its content as a string.
39
- :param file_path: Path to the Excel file (xls or xlsx).
40
- :return: Content of the Excel file as a string.
41
- """
42
- import pandas as pd
43
-
44
- df = pd.read_excel(io=file_path)
45
-
46
- # Convert DataFrame to string
47
- return df.to_string() if not df.empty else "The Excel file is empty."
48
-
49
- parse_excel_tool = FunctionTool.from_defaults(parse_excel)
50
 
51
- def access_webpage(url: str) -> str:
 
52
  """
53
- Function to access a webpage and return its content.
54
- :param url: URL of the webpage to access.
55
- :return: Content of the webpage.
 
 
56
  """
57
- import requests
58
  try:
59
- print(f"Accessing webpage: {url}")
60
  response = requests.get(url)
61
- print(f"Response status code: {response.status_code}")
62
- response.raise_for_status() # Raise an error for bad responses
63
  return response.text
64
- except requests.RequestException as e:
65
- return f"Error accessing {url}: {e}"
66
  except Exception as e:
67
- return f"An unexpected error occurred: {e}"
68
-
69
- access_webpage_tool = FunctionTool.from_defaults(access_webpage)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- def string_functions(input_string: str, operation: str) -> str:
 
72
  """
73
- Function to perform string operations.
74
- :param input_string: The input string to operate on.
75
- :param operation: The operation to perform (e.g., "uppercase", "lowercase", "reverse", "length", "count_vowels", "count_consonants", "count_words", "count_sentences").
76
- :return: Result of the string operation.
 
77
  """
78
- if operation == "uppercase":
79
- return input_string.upper()
80
- elif operation == "lowercase":
81
- return input_string.lower()
82
- elif operation == "reverse":
83
- return input_string[::-1]
84
- elif operation == "length":
85
- return str(len(input_string))
86
- elif operation == "count_vowels":
87
- vowels = "aeiouAEIOU"
88
- return str(sum(1 for char in input_string if char in vowels))
89
- elif operation == "count_consonants":
90
- vowels = "aeiouAEIOU"
91
- return str(sum(1 for char in input_string if char.isalpha() and char not in vowels))
92
- elif operation == "count_words":
93
- return str(len(input_string.split()))
94
- elif operation == "count_sentences":
95
- import re
96
- sentences = re.split(r'[.!?]+', input_string)
97
- return str(len([s for s in sentences if s.strip()]))
98
- else:
99
- return "Invalid operation. Supported operations: uppercase, lowercase, reverse."
100
 
101
- string_functions_tool = FunctionTool.from_defaults(string_functions)
102
 
103
- def transcribe_audio(audio_path: str) -> str:
104
- """
105
- Function to transcribe audio using a multi-modal LLM.
106
- :param audio_path: Path to the audio file.
107
- :return: Transcription of the audio.
108
- """
109
- try:
110
- reader = WhisperReader(api_key=os.environ.get("OPENAI_API_KEY"))
111
- documents = reader.load_data(file=audio_path)
112
- if not documents:
113
- return "No audio content found."
114
- # Assuming the first document contains the transcription
115
- return documents[0].text if documents else "No transcription available."
116
- except Exception as e:
117
- return f"Error transcribing audio: {e}"
118
-
119
- transcribe_audio_tool = FunctionTool.from_defaults(transcribe_audio)
 
1
+ from langchain_core.tools import tool
2
+ import dotenv
3
+ dotenv.load_dotenv()
4
+ from langchain_tavily import TavilySearch, TavilyExtract
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_community.tools.riza.command import ExecPython
8
+ from langchain_community.tools import BraveSearch
9
+ from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
10
+ import requests
11
+ import base64
12
+ import pandas as pd
13
 
14
+ # def search_tool(query: str) -> str:
15
+ # """
16
+ # Search the web for information using Tavily API (or similar service).
17
+ # """
18
+ # api_key = os.environ.get("TAVILY_API_KEY")
19
+ # if not api_key:
20
+ # return "TAVILY_API_KEY not set."
21
+ # url = f"https://api.tavily.com/search?q={query}&key={api_key}"
22
+ # try:
23
+ # response = requests.get(url)
24
+ # response.raise_for_status()
25
+ # return response.text
26
+ # except Exception as e:
27
+ # return f"Error searching web: {e}"
28
 
29
+ @tool
30
+ def describe_image_tool(image_path: str, prompt: str = "Describe the following image:") -> str:
31
+ """
32
+ Describe an image using a vision language model.
33
+ Args:
34
+ image_path (str): Path to the image file.
35
+ prompt (str): Prompt to guide the description.
36
+ Returns:
37
+ str: Description of the image.
38
+ """
39
+ print(f"Describing image: {image_path} with prompt: {prompt}")
40
+ vision_llm = ChatOpenAI(model="gpt-4o")
41
+ with open(image_path, "rb") as image_file:
42
+ image_content = image_file.read()
43
 
44
+ image_base64 = base64.b64encode(image_content).decode('utf-8')
45
 
46
+ message = [
47
+ HumanMessage(
48
+ content=[
49
+ {"type": "text", "text": prompt},
50
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
51
+ ]
52
+ )
53
+ ]
54
+ response = vision_llm.invoke(message)
55
+ return response.content if response else f"[Image description for {image_path} with prompt '{prompt}' not available."
56
 
57
+ @tool
58
+ def parse_excel_tool(file_path: str) -> str:
59
  """
60
+ Parse an Excel file and return its content as a string.
61
+ Args:
62
+ file_path (str): Path to the Excel file.
63
+ Returns:
64
+ str: Content of the Excel file as a string.
65
  """
 
66
  try:
67
+ df = pd.read_excel(io=file_path)
68
+ return df.to_string() if not df.empty else "The Excel file is empty."
 
 
 
69
  except Exception as e:
70
+ return f"Error parsing Excel file: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ @tool
73
+ def access_webpage_tool(url: str) -> str:
74
  """
75
+ Access a webpage and return its content. This should not be used for YouTube
76
+ Args:
77
+ url (str): URL of the webpage to access.
78
+ Returns:
79
+ str: Content of the webpage or an error message.
80
  """
81
+ print(f"Accessing webpage: {url}")
82
  try:
 
83
  response = requests.get(url)
84
+ response.raise_for_status()
 
85
  return response.text
 
 
86
  except Exception as e:
87
+ return f"Error accessing {url}: {e}"
88
+ # @tool
89
+ # def string_functions_tool(input_string: str, operation: str) -> str:
90
+ # """
91
+ # Perform various string operations based on the specified operation.
92
+ # Args:
93
+ # input_string (str): The string to operate on.
94
+ # operation (str): The operation to perform. Supported operations:
95
+ # 'uppercase', 'lowercase', 'reverse', 'length',
96
+ # 'count_vowels', 'count_consonants', 'count_words',
97
+ # 'count_sentences'.
98
+ # Returns:
99
+ # str: Result of the operation or an error message if the operation is invalid.
100
+ # """
101
+ # if operation == "uppercase":
102
+ # return input_string.upper()
103
+ # elif operation == "lowercase":
104
+ # return input_string.lower()
105
+ # elif operation == "reverse":
106
+ # return input_string[::-1]
107
+ # elif operation == "length":
108
+ # return str(len(input_string))
109
+ # elif operation == "count_vowels":
110
+ # vowels = "aeiouAEIOU"
111
+ # return str(sum(1 for char in input_string if char in vowels))
112
+ # elif operation == "count_consonants":
113
+ # vowels = "aeiouAEIOU"
114
+ # return str(sum(1 for char in input_string if char.isalpha() and char not in vowels))
115
+ # elif operation == "count_words":
116
+ # return str(len(input_string.split()))
117
+ # elif operation == "count_sentences":
118
+ # import re
119
+ # sentences = re.split(r'[.!?]+', input_string)
120
+ # return str(len([s for s in sentences if s.strip()]))
121
+ # else:
122
+ # return "Invalid operation. Supported operations: uppercase, lowercase, reverse."
123
 
124
+ @tool
125
+ def audio_file_transcriber(file_path: str) -> str :
126
  """
127
+ Transcribe an audio file
128
+ Args:
129
+ file_path (str): the file path of the audio file
130
+ Returns:
131
+ str: A transcription of the audio
132
  """
133
+ loader = AssemblyAIAudioTranscriptLoader(file_path=file_path)
134
+ docs = loader.load()
135
+ return docs[0].page_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
 
137
 
138
+ brave_web_search = BraveSearch.from_search_kwargs({"max_results": 5})
139
+
140
+
141
+ # search_tool = TavilySearch(
142
+ # max_results=5,
143
+ # include_answer="basic",
144
+ # include_images=True,
145
+ # include_image_descriptions=True,
146
+ # search_depth="advanced"
147
+ # )
148
+
149
+ webpage_extraction_tool = TavilyExtract(
150
+ extract_depth="advanced",
151
+ include_images=True
152
+ )
153
+
154
+ python_code_interpreter_tool = ExecPython()