jesusgj commited on
Commit
0a282bd
·
1 Parent(s): 7ceabea

Modified files

Browse files
Files changed (1) hide show
  1. agent.py +3 -7
agent.py CHANGED
@@ -9,7 +9,6 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
9
  from dotenv import load_dotenv
10
  from requests.exceptions import RequestException
11
  import wikipedia
12
- from llama_index.core.schema import Document
13
  from llama_index.readers.web import BeautifulSoupWebReader
14
 
15
  from smolagents import (
@@ -73,7 +72,6 @@ def normalize_answer_format(answer: str) -> str:
73
  answer = answer.strip().rstrip('.')
74
  is_list = ',' in answer and len(answer.split(',')) > 1
75
  try:
76
- # Check if it can be a number, ignoring commas for list check
77
  is_numeric = not is_list and float(answer.replace(',', '')) is not None
78
  except ValueError:
79
  is_numeric = False
@@ -113,9 +111,9 @@ def initialize_agent():
113
 
114
  # --- Tool Definitions for the Agent ---
115
 
116
- @tool
117
  @retry
118
  @lru_cache(maxsize=128)
 
119
  def get_webpage_content(url: str) -> str:
120
  """
121
  Extracts the text content from a single webpage.
@@ -128,12 +126,11 @@ def initialize_agent():
128
  docs = loader.load_data(urls=[url])
129
  if not docs or not docs[0].text:
130
  raise ValueError(f"No content could be extracted from {url}")
131
- # Return up to the first 15,000 characters to avoid overwhelming the context window.
132
  return docs[0].text[:15000]
133
 
134
- @tool
135
  @retry
136
  @lru_cache(maxsize=128)
 
137
  def get_youtube_transcript(video_url: str) -> str:
138
  """
139
  Fetches the full transcript of a YouTube video as a single string.
@@ -150,15 +147,14 @@ def initialize_agent():
150
  try:
151
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
152
  transcript_text = ' '.join([t['text'] for t in transcript_list])
153
- # Return up to the first 15,000 characters
154
  return transcript_text[:15000]
155
  except (TranscriptsDisabled, NoTranscriptFound) as e:
156
  logging.error(f"Could not retrieve transcript for {video_url}: {e}")
157
  raise YouTubeTranscriptApiError(f"Transcript not available for video {video_id}.") from e
158
 
159
- @tool
160
  @retry
161
  @lru_cache(maxsize=32)
 
162
  def wikipedia_search(query: str) -> str:
163
  """
164
  Searches Wikipedia for a given query and returns a summary.
 
9
  from dotenv import load_dotenv
10
  from requests.exceptions import RequestException
11
  import wikipedia
 
12
  from llama_index.readers.web import BeautifulSoupWebReader
13
 
14
  from smolagents import (
 
72
  answer = answer.strip().rstrip('.')
73
  is_list = ',' in answer and len(answer.split(',')) > 1
74
  try:
 
75
  is_numeric = not is_list and float(answer.replace(',', '')) is not None
76
  except ValueError:
77
  is_numeric = False
 
111
 
112
  # --- Tool Definitions for the Agent ---
113
 
 
114
  @retry
115
  @lru_cache(maxsize=128)
116
+ @tool
117
  def get_webpage_content(url: str) -> str:
118
  """
119
  Extracts the text content from a single webpage.
 
126
  docs = loader.load_data(urls=[url])
127
  if not docs or not docs[0].text:
128
  raise ValueError(f"No content could be extracted from {url}")
 
129
  return docs[0].text[:15000]
130
 
 
131
  @retry
132
  @lru_cache(maxsize=128)
133
+ @tool
134
  def get_youtube_transcript(video_url: str) -> str:
135
  """
136
  Fetches the full transcript of a YouTube video as a single string.
 
147
  try:
148
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
149
  transcript_text = ' '.join([t['text'] for t in transcript_list])
 
150
  return transcript_text[:15000]
151
  except (TranscriptsDisabled, NoTranscriptFound) as e:
152
  logging.error(f"Could not retrieve transcript for {video_url}: {e}")
153
  raise YouTubeTranscriptApiError(f"Transcript not available for video {video_id}.") from e
154
 
 
155
  @retry
156
  @lru_cache(maxsize=32)
157
+ @tool
158
  def wikipedia_search(query: str) -> str:
159
  """
160
  Searches Wikipedia for a given query and returns a summary.