Spaces:
Sleeping
Sleeping
jesusgj
commited on
Commit
·
0a282bd
1
Parent(s):
7ceabea
Modified files
Browse files
agent.py
CHANGED
|
@@ -9,7 +9,6 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
|
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
from requests.exceptions import RequestException
|
| 11 |
import wikipedia
|
| 12 |
-
from llama_index.core.schema import Document
|
| 13 |
from llama_index.readers.web import BeautifulSoupWebReader
|
| 14 |
|
| 15 |
from smolagents import (
|
|
@@ -73,7 +72,6 @@ def normalize_answer_format(answer: str) -> str:
|
|
| 73 |
answer = answer.strip().rstrip('.')
|
| 74 |
is_list = ',' in answer and len(answer.split(',')) > 1
|
| 75 |
try:
|
| 76 |
-
# Check if it can be a number, ignoring commas for list check
|
| 77 |
is_numeric = not is_list and float(answer.replace(',', '')) is not None
|
| 78 |
except ValueError:
|
| 79 |
is_numeric = False
|
|
@@ -113,9 +111,9 @@ def initialize_agent():
|
|
| 113 |
|
| 114 |
# --- Tool Definitions for the Agent ---
|
| 115 |
|
| 116 |
-
@tool
|
| 117 |
@retry
|
| 118 |
@lru_cache(maxsize=128)
|
|
|
|
| 119 |
def get_webpage_content(url: str) -> str:
|
| 120 |
"""
|
| 121 |
Extracts the text content from a single webpage.
|
|
@@ -128,12 +126,11 @@ def initialize_agent():
|
|
| 128 |
docs = loader.load_data(urls=[url])
|
| 129 |
if not docs or not docs[0].text:
|
| 130 |
raise ValueError(f"No content could be extracted from {url}")
|
| 131 |
-
# Return up to the first 15,000 characters to avoid overwhelming the context window.
|
| 132 |
return docs[0].text[:15000]
|
| 133 |
|
| 134 |
-
@tool
|
| 135 |
@retry
|
| 136 |
@lru_cache(maxsize=128)
|
|
|
|
| 137 |
def get_youtube_transcript(video_url: str) -> str:
|
| 138 |
"""
|
| 139 |
Fetches the full transcript of a YouTube video as a single string.
|
|
@@ -150,15 +147,14 @@ def initialize_agent():
|
|
| 150 |
try:
|
| 151 |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
| 152 |
transcript_text = ' '.join([t['text'] for t in transcript_list])
|
| 153 |
-
# Return up to the first 15,000 characters
|
| 154 |
return transcript_text[:15000]
|
| 155 |
except (TranscriptsDisabled, NoTranscriptFound) as e:
|
| 156 |
logging.error(f"Could not retrieve transcript for {video_url}: {e}")
|
| 157 |
raise YouTubeTranscriptApiError(f"Transcript not available for video {video_id}.") from e
|
| 158 |
|
| 159 |
-
@tool
|
| 160 |
@retry
|
| 161 |
@lru_cache(maxsize=32)
|
|
|
|
| 162 |
def wikipedia_search(query: str) -> str:
|
| 163 |
"""
|
| 164 |
Searches Wikipedia for a given query and returns a summary.
|
|
|
|
| 9 |
from dotenv import load_dotenv
|
| 10 |
from requests.exceptions import RequestException
|
| 11 |
import wikipedia
|
|
|
|
| 12 |
from llama_index.readers.web import BeautifulSoupWebReader
|
| 13 |
|
| 14 |
from smolagents import (
|
|
|
|
| 72 |
answer = answer.strip().rstrip('.')
|
| 73 |
is_list = ',' in answer and len(answer.split(',')) > 1
|
| 74 |
try:
|
|
|
|
| 75 |
is_numeric = not is_list and float(answer.replace(',', '')) is not None
|
| 76 |
except ValueError:
|
| 77 |
is_numeric = False
|
|
|
|
| 111 |
|
| 112 |
# --- Tool Definitions for the Agent ---
|
| 113 |
|
|
|
|
| 114 |
@retry
|
| 115 |
@lru_cache(maxsize=128)
|
| 116 |
+
@tool
|
| 117 |
def get_webpage_content(url: str) -> str:
|
| 118 |
"""
|
| 119 |
Extracts the text content from a single webpage.
|
|
|
|
| 126 |
docs = loader.load_data(urls=[url])
|
| 127 |
if not docs or not docs[0].text:
|
| 128 |
raise ValueError(f"No content could be extracted from {url}")
|
|
|
|
| 129 |
return docs[0].text[:15000]
|
| 130 |
|
|
|
|
| 131 |
@retry
|
| 132 |
@lru_cache(maxsize=128)
|
| 133 |
+
@tool
|
| 134 |
def get_youtube_transcript(video_url: str) -> str:
|
| 135 |
"""
|
| 136 |
Fetches the full transcript of a YouTube video as a single string.
|
|
|
|
| 147 |
try:
|
| 148 |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
| 149 |
transcript_text = ' '.join([t['text'] for t in transcript_list])
|
|
|
|
| 150 |
return transcript_text[:15000]
|
| 151 |
except (TranscriptsDisabled, NoTranscriptFound) as e:
|
| 152 |
logging.error(f"Could not retrieve transcript for {video_url}: {e}")
|
| 153 |
raise YouTubeTranscriptApiError(f"Transcript not available for video {video_id}.") from e
|
| 154 |
|
|
|
|
| 155 |
@retry
|
| 156 |
@lru_cache(maxsize=32)
|
| 157 |
+
@tool
|
| 158 |
def wikipedia_search(query: str) -> str:
|
| 159 |
"""
|
| 160 |
Searches Wikipedia for a given query and returns a summary.
|