Spaces:
Sleeping
Sleeping
Attempt with wikipedia parsing tools
Browse files- agents/agent.py +1 -5
- agents/video_agent.py +14 -4
- app.py +46 -10
- prompts/default_prompt.py +45 -0
- pyproject.toml +4 -0
- run_local_agent.py +42 -2
- tools/__init__.py +34 -0
- tools/open_files.py +95 -0
- tools/parse_wikipedia_table.py +105 -0
- tools/video_analyzer.py +73 -55
- tools/web_utils.py +6 -1
- tools/webpage_parser.py +28 -0
- utils/__init__.py +5 -1
- uv.lock +93 -0
agents/agent.py
CHANGED
|
@@ -1,15 +1,11 @@
|
|
| 1 |
from smolagents import (
|
| 2 |
CodeAgent,
|
| 3 |
-
DuckDuckGoSearchTool,
|
| 4 |
-
WikipediaSearchTool,
|
| 5 |
LiteLLMModel,
|
| 6 |
Tool,
|
| 7 |
)
|
| 8 |
-
from tools.text_search import TextSearch
|
| 9 |
-
from tools.text_splitter import text_splitter
|
| 10 |
-
from tools.video_analyzer import WebVideoAnalyzerTool
|
| 11 |
from typing import Callable
|
| 12 |
|
|
|
|
| 13 |
class MyAgent:
|
| 14 |
def __init__(
|
| 15 |
self,
|
|
|
|
| 1 |
from smolagents import (
|
| 2 |
CodeAgent,
|
|
|
|
|
|
|
| 3 |
LiteLLMModel,
|
| 4 |
Tool,
|
| 5 |
)
|
|
|
|
|
|
|
|
|
|
| 6 |
from typing import Callable
|
| 7 |
|
| 8 |
+
|
| 9 |
class MyAgent:
|
| 10 |
def __init__(
|
| 11 |
self,
|
agents/video_agent.py
CHANGED
|
@@ -34,20 +34,30 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
|
|
| 34 |
driver = helium.get_driver()
|
| 35 |
current_step = memory_step.step_number
|
| 36 |
if driver is not None:
|
| 37 |
-
for
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
previous_memory_step.observations_images = None
|
| 40 |
png_bytes = driver.get_screenshot_as_png()
|
| 41 |
image = Image.open(BytesIO(png_bytes))
|
| 42 |
print(f"Captured a browser screenshot: {image.size} pixels")
|
| 43 |
-
memory_step.observations_images = [
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Update observations with current URL
|
| 46 |
url_info = f"Current url: {driver.current_url}"
|
| 47 |
memory_step.observations = (
|
| 48 |
-
url_info
|
|
|
|
|
|
|
| 49 |
)
|
| 50 |
|
|
|
|
| 51 |
video_agent = MyAgent(
|
| 52 |
api_key=os.getenv("GEMINI_API_KEY"),
|
| 53 |
temperature=0.0,
|
|
|
|
| 34 |
driver = helium.get_driver()
|
| 35 |
current_step = memory_step.step_number
|
| 36 |
if driver is not None:
|
| 37 |
+
for (
|
| 38 |
+
previous_memory_step
|
| 39 |
+
) in agent.memory.steps: # Remove previous screenshots for lean processing
|
| 40 |
+
if (
|
| 41 |
+
isinstance(previous_memory_step, ActionStep)
|
| 42 |
+
and previous_memory_step.step_number <= current_step - 2
|
| 43 |
+
):
|
| 44 |
previous_memory_step.observations_images = None
|
| 45 |
png_bytes = driver.get_screenshot_as_png()
|
| 46 |
image = Image.open(BytesIO(png_bytes))
|
| 47 |
print(f"Captured a browser screenshot: {image.size} pixels")
|
| 48 |
+
memory_step.observations_images = [
|
| 49 |
+
image.copy()
|
| 50 |
+
] # Create a copy to ensure it persists
|
| 51 |
|
| 52 |
# Update observations with current URL
|
| 53 |
url_info = f"Current url: {driver.current_url}"
|
| 54 |
memory_step.observations = (
|
| 55 |
+
url_info
|
| 56 |
+
if memory_step.observations is None
|
| 57 |
+
else memory_step.observations + "\n" + url_info
|
| 58 |
)
|
| 59 |
|
| 60 |
+
|
| 61 |
video_agent = MyAgent(
|
| 62 |
api_key=os.getenv("GEMINI_API_KEY"),
|
| 63 |
temperature=0.0,
|
app.py
CHANGED
|
@@ -5,10 +5,51 @@ import pandas as pd
|
|
| 5 |
from agents.agent import MyAgent
|
| 6 |
import time
|
| 7 |
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 13 |
"""
|
| 14 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
@@ -30,14 +71,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 30 |
|
| 31 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
| 32 |
try:
|
| 33 |
-
agent = MyAgent(
|
| 34 |
-
provider="litellm",
|
| 35 |
-
model_id="gemini/gemini-2.0-flash-lite",
|
| 36 |
-
api_key=os.getenv("GEMINI_API_KEY"),
|
| 37 |
-
planning_interval=3,
|
| 38 |
-
num_ctx=8192,
|
| 39 |
-
temperature=0.2,
|
| 40 |
-
)
|
| 41 |
|
| 42 |
except Exception as e:
|
| 43 |
print(f"Error instantiating agent: {e}")
|
|
@@ -72,17 +106,19 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 72 |
answers_payload = []
|
| 73 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 74 |
for item in tqdm(
|
| 75 |
-
questions_data
|
| 76 |
desc="Agent is answering questions...",
|
| 77 |
total=len(questions_data),
|
| 78 |
):
|
| 79 |
task_id = item.get("task_id")
|
| 80 |
question_text = item.get("question")
|
|
|
|
|
|
|
| 81 |
if not task_id or question_text is None:
|
| 82 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 83 |
continue
|
| 84 |
try:
|
| 85 |
-
submitted_answer = agent(
|
| 86 |
time.sleep(30) # to avoid rate limiting
|
| 87 |
answers_payload.append(
|
| 88 |
{"task_id": task_id, "submitted_answer": submitted_answer}
|
|
|
|
| 5 |
from agents.agent import MyAgent
|
| 6 |
import time
|
| 7 |
from tqdm import tqdm
|
| 8 |
+
from prompts.default_prompt import generate_prompt
|
| 9 |
+
from smolagents import (
|
| 10 |
+
DuckDuckGoSearchTool,
|
| 11 |
+
VisitWebpageTool,
|
| 12 |
+
)
|
| 13 |
+
from tools.text_search import TextSearch
|
| 14 |
+
from tools.text_splitter import text_splitter
|
| 15 |
+
from tools.webpage_parser import WebpageParser
|
| 16 |
+
from tools.parse_wikipedia_table import WikipediaParser
|
| 17 |
+
from tools.open_files import OpenFilesTool
|
| 18 |
|
| 19 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 20 |
|
| 21 |
|
| 22 |
+
myagent_args = {
|
| 23 |
+
"provider": "litellm",
|
| 24 |
+
"model_id": "gemini/gemini-2.0-flash-lite",
|
| 25 |
+
# "api_base": OLLAMA_API_BASE,
|
| 26 |
+
"planning_interval": 3,
|
| 27 |
+
"tools": [
|
| 28 |
+
DuckDuckGoSearchTool(),
|
| 29 |
+
WikipediaParser(),
|
| 30 |
+
VisitWebpageTool(),
|
| 31 |
+
TextSearch(),
|
| 32 |
+
text_splitter,
|
| 33 |
+
WebpageParser(),
|
| 34 |
+
OpenFilesTool(),
|
| 35 |
+
],
|
| 36 |
+
"additional_authorized_imports": [
|
| 37 |
+
"pandas",
|
| 38 |
+
"numpy",
|
| 39 |
+
"datetime",
|
| 40 |
+
"json",
|
| 41 |
+
"re",
|
| 42 |
+
"math",
|
| 43 |
+
"os",
|
| 44 |
+
"requests",
|
| 45 |
+
"csv",
|
| 46 |
+
"urllib",
|
| 47 |
+
],
|
| 48 |
+
"num_ctx": 8192,
|
| 49 |
+
"temperature": 0.2,
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 54 |
"""
|
| 55 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
|
|
| 71 |
|
| 72 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
| 73 |
try:
|
| 74 |
+
agent = MyAgent(**myagent_args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
except Exception as e:
|
| 77 |
print(f"Error instantiating agent: {e}")
|
|
|
|
| 106 |
answers_payload = []
|
| 107 |
print(f"Running agent on {len(questions_data)} questions...")
|
| 108 |
for item in tqdm(
|
| 109 |
+
questions_data,
|
| 110 |
desc="Agent is answering questions...",
|
| 111 |
total=len(questions_data),
|
| 112 |
):
|
| 113 |
task_id = item.get("task_id")
|
| 114 |
question_text = item.get("question")
|
| 115 |
+
file_name = item.get("file_name")
|
| 116 |
+
prompt = generate_prompt(question_text, file_name)
|
| 117 |
if not task_id or question_text is None:
|
| 118 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 119 |
continue
|
| 120 |
try:
|
| 121 |
+
submitted_answer = agent(prompt)
|
| 122 |
time.sleep(30) # to avoid rate limiting
|
| 123 |
answers_payload.append(
|
| 124 |
{"task_id": task_id, "submitted_answer": submitted_answer}
|
prompts/default_prompt.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def generate_prompt(question_text, file_name):
|
| 2 |
+
"""
|
| 3 |
+
Generates a prompt for the agent based on the provided question text and file name.
|
| 4 |
+
|
| 5 |
+
Args:
|
| 6 |
+
question_text (str): The question to be answered.
|
| 7 |
+
file_name (str): The name of the file to be used in the task.
|
| 8 |
+
|
| 9 |
+
Returns:
|
| 10 |
+
str: The generated prompt.
|
| 11 |
+
"""
|
| 12 |
+
# Define the full prompt with instructions and guidelines
|
| 13 |
+
|
| 14 |
+
full_prompt = f"""You are a highly precise answering agent.
|
| 15 |
+
When given a question:
|
| 16 |
+
- If necessary, perform a web search using the tool `DuckDuckGoSearchTool` to find possible sources of information.
|
| 17 |
+
- Use the `visit_webpage` tool to visit the webpage and extract the content in markdown format.
|
| 18 |
+
- If the web search only returns titles and short snippets, you MUST visit the actual webpage to read the full content before answering.
|
| 19 |
+
- Use the `WikipediaParser` tool to fetch and read the Wikipedia page when necessary.
|
| 20 |
+
- You just have the ability to read Wikipedia pages only.
|
| 21 |
+
- If the task requires reading, listening, or analyzing a file, you must use the file specified in the `file_name` field of the task metadata, not the file name mentioned casually inside the question text.
|
| 22 |
+
- Comma separated lists MUST contain a single space after each comma.
|
| 23 |
+
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
| 24 |
+
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
|
| 25 |
+
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 26 |
+
- Only answer after you have gathered enough information by reading the actual page contents.
|
| 27 |
+
- Once you have the final answer, you must call `final_answer("your_answer")` immediately after printing it.
|
| 28 |
+
- Do not retry or execute anything else after calling `final_answer`.
|
| 29 |
+
- `final_answer` must wrap the exact printed value.
|
| 30 |
+
Provide ONLY the precise answer requested.
|
| 31 |
+
Do not include explanations, steps, reasoning, or additional text.
|
| 32 |
+
Be direct and specific. GAIA benchmark requires exact matching answers.
|
| 33 |
+
Example: if asked "What is the capital of France?", respond exactly:
|
| 34 |
+
Thoughts: I need to retrieve the capital of France from Wikipedia and output it directly.
|
| 35 |
+
Code:
|
| 36 |
+
```py
|
| 37 |
+
print("Paris")
|
| 38 |
+
```<end_code>
|
| 39 |
+
Based on the above guidelines, answer the following question:
|
| 40 |
+
--begin of question--
|
| 41 |
+
{question_text}
|
| 42 |
+
--end of question--
|
| 43 |
+
If the questions mentions the need to use a file, use the following `file_name` value as the `file_name` parameter in any function calls:
|
| 44 |
+
file_name: {file_name}"""
|
| 45 |
+
return full_prompt
|
pyproject.toml
CHANGED
|
@@ -5,13 +5,17 @@ description = "Add your description here"
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.12"
|
| 7 |
dependencies = [
|
|
|
|
| 8 |
"ffmpeg>=1.4",
|
| 9 |
"gradio[oauth]>=5.27.0",
|
| 10 |
"helium>=5.1.1",
|
| 11 |
"litellm==1.67.1",
|
|
|
|
| 12 |
"numpy>=2.2.5",
|
| 13 |
"openai>=1.76.0",
|
|
|
|
| 14 |
"opencv-python>=4.11.0.86",
|
|
|
|
| 15 |
"pandas>=2.2.3",
|
| 16 |
"pillow>=11.2.1",
|
| 17 |
"python-dotenv>=1.1.0",
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.12"
|
| 7 |
dependencies = [
|
| 8 |
+
"beautifulsoup4>=4.13.4",
|
| 9 |
"ffmpeg>=1.4",
|
| 10 |
"gradio[oauth]>=5.27.0",
|
| 11 |
"helium>=5.1.1",
|
| 12 |
"litellm==1.67.1",
|
| 13 |
+
"markdownify>=1.1.0",
|
| 14 |
"numpy>=2.2.5",
|
| 15 |
"openai>=1.76.0",
|
| 16 |
+
"openai-whisper>=20240930",
|
| 17 |
"opencv-python>=4.11.0.86",
|
| 18 |
+
"openpyxl>=3.1.5",
|
| 19 |
"pandas>=2.2.3",
|
| 20 |
"pillow>=11.2.1",
|
| 21 |
"python-dotenv>=1.1.0",
|
run_local_agent.py
CHANGED
|
@@ -1,5 +1,17 @@
|
|
| 1 |
from agents.agent import MyAgent
|
| 2 |
from utils import run_agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
import os
|
| 5 |
import json
|
|
@@ -13,12 +25,35 @@ OLLAMA_API_BASE: str = os.getenv("OLLAMA_API_BASE", default="http://localhost:11
|
|
| 13 |
OLLAMA_API_KEY: str | None = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
|
| 14 |
OLLAMA_NUM_CTX: int = int(os.getenv("OLLAMA_NUM_CTX", default=8192))
|
| 15 |
|
|
|
|
| 16 |
myagent_args = {
|
| 17 |
"provider": "litellm",
|
| 18 |
"model_id": "gemini/gemini-2.0-flash-lite",
|
| 19 |
# "api_base": OLLAMA_API_BASE,
|
| 20 |
"planning_interval": 3,
|
| 21 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
}
|
| 23 |
|
| 24 |
print(f"Using args: {myagent_args}")
|
|
@@ -29,6 +64,11 @@ if __name__ == "__main__":
|
|
| 29 |
with open(QUESTIONS_FILEPATH, "r") as f:
|
| 30 |
questions = json.load(f)
|
| 31 |
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
print("Answers:", answers)
|
| 34 |
print("Finished running the agent.")
|
|
|
|
| 1 |
from agents.agent import MyAgent
|
| 2 |
from utils import run_agent
|
| 3 |
+
from smolagents import (
|
| 4 |
+
DuckDuckGoSearchTool,
|
| 5 |
+
# WikipediaSearchTool,
|
| 6 |
+
VisitWebpageTool,
|
| 7 |
+
)
|
| 8 |
+
from tools.text_search import TextSearch
|
| 9 |
+
from tools.text_splitter import text_splitter
|
| 10 |
+
from tools.webpage_parser import WebpageParser
|
| 11 |
+
from tools.parse_wikipedia_table import WikipediaParser
|
| 12 |
+
from tools.open_files import OpenFilesTool
|
| 13 |
+
from prompts.default_prompt import generate_prompt
|
| 14 |
+
|
| 15 |
|
| 16 |
import os
|
| 17 |
import json
|
|
|
|
| 25 |
OLLAMA_API_KEY: str | None = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
|
| 26 |
OLLAMA_NUM_CTX: int = int(os.getenv("OLLAMA_NUM_CTX", default=8192))
|
| 27 |
|
| 28 |
+
|
| 29 |
myagent_args = {
|
| 30 |
"provider": "litellm",
|
| 31 |
"model_id": "gemini/gemini-2.0-flash-lite",
|
| 32 |
# "api_base": OLLAMA_API_BASE,
|
| 33 |
"planning_interval": 3,
|
| 34 |
+
"tools": [
|
| 35 |
+
DuckDuckGoSearchTool(),
|
| 36 |
+
WikipediaParser(),
|
| 37 |
+
VisitWebpageTool(),
|
| 38 |
+
TextSearch(),
|
| 39 |
+
text_splitter,
|
| 40 |
+
WebpageParser(),
|
| 41 |
+
OpenFilesTool(),
|
| 42 |
+
],
|
| 43 |
+
"additional_authorized_imports": [
|
| 44 |
+
"pandas",
|
| 45 |
+
"numpy",
|
| 46 |
+
"datetime",
|
| 47 |
+
"json",
|
| 48 |
+
"re",
|
| 49 |
+
"math",
|
| 50 |
+
"os",
|
| 51 |
+
"requests",
|
| 52 |
+
"csv",
|
| 53 |
+
"urllib",
|
| 54 |
+
],
|
| 55 |
+
"num_ctx": 8192,
|
| 56 |
+
"temperature": 0.2,
|
| 57 |
}
|
| 58 |
|
| 59 |
print(f"Using args: {myagent_args}")
|
|
|
|
| 64 |
with open(QUESTIONS_FILEPATH, "r") as f:
|
| 65 |
questions = json.load(f)
|
| 66 |
|
| 67 |
+
question = questions[0]
|
| 68 |
+
question_text = question.get("question")
|
| 69 |
+
file_name = question.get("file_name")
|
| 70 |
+
prompt = generate_prompt(question_text, file_name)
|
| 71 |
+
|
| 72 |
+
answers = run_agent(agent, [questions[0]])
|
| 73 |
print("Answers:", answers)
|
| 74 |
print("Finished running the agent.")
|
tools/__init__.py
CHANGED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import requests
|
| 3 |
+
from markdownify import markdownify
|
| 4 |
+
from requests.exceptions import RequestException
|
| 5 |
+
from smolagents import tool
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@tool
|
| 9 |
+
def visit_webpage(url: str) -> str:
|
| 10 |
+
"""Visits a webpage at the given URL and returns its content as a markdown string.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
url: The URL of the webpage to visit.
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
The content of the webpage converted to Markdown, or an error message if the request fails.
|
| 17 |
+
"""
|
| 18 |
+
try:
|
| 19 |
+
# Send a GET request to the URL
|
| 20 |
+
response = requests.get(url)
|
| 21 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
| 22 |
+
|
| 23 |
+
# Convert the HTML content to Markdown
|
| 24 |
+
markdown_content = markdownify(response.text).strip()
|
| 25 |
+
|
| 26 |
+
# Remove multiple line breaks
|
| 27 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
| 28 |
+
|
| 29 |
+
return markdown_content
|
| 30 |
+
|
| 31 |
+
except RequestException as e:
|
| 32 |
+
return f"Error fetching the webpage: {str(e)}"
|
| 33 |
+
except Exception as e:
|
| 34 |
+
return f"An unexpected error occurred: {str(e)}"
|
tools/open_files.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
import csv
|
| 5 |
+
import openpyxl
|
| 6 |
+
import whisper
|
| 7 |
+
import requests
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class OpenFilesTool(Tool):
|
| 11 |
+
name = "open_files_tool"
|
| 12 |
+
description = (
|
| 13 |
+
"This tool opens files and returns their content as a string. "
|
| 14 |
+
"It can handle text, CSV, JSON, XLSX, and MP3 file types."
|
| 15 |
+
)
|
| 16 |
+
inputs = {
|
| 17 |
+
"file_path": {
|
| 18 |
+
"type": "string",
|
| 19 |
+
"description": "The path to the file to be opened.",
|
| 20 |
+
},
|
| 21 |
+
"file_type": {
|
| 22 |
+
"type": "string",
|
| 23 |
+
"description": "The type of the file (text, csv, json, xlsx, mp3). Default is 'text'.",
|
| 24 |
+
"nullable": True,
|
| 25 |
+
},
|
| 26 |
+
}
|
| 27 |
+
output_type = "string"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def download_file(self, file_name: str) -> None:
|
| 31 |
+
if not os.path.exists(file_name):
|
| 32 |
+
url = f"https://agents-course-unit4-scoring.hf.space/files/{file_name.split('.')[0]}"
|
| 33 |
+
r = requests.get(url)
|
| 34 |
+
with open(file_name, "wb") as f:
|
| 35 |
+
f.write(r.content)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def open_file_as_text(self, file_name: str, filetype: str = "txt") -> str:
|
| 39 |
+
"""
|
| 40 |
+
Opens a file and returns its content as readable text.
|
| 41 |
+
Supports 'txt', 'json', 'csv', 'xlsx', and 'mp3' (transcribes speech to text).
|
| 42 |
+
Args:
|
| 43 |
+
file_name (str): The path or name of the file.
|
| 44 |
+
filetype (Optional[str]): Type of file ('txt', 'json', 'csv', 'xlsx', 'mp3'). Defaults to 'txt'.
|
| 45 |
+
Returns:
|
| 46 |
+
str: The content of the file as text, or transcribed speech if 'mp3'.
|
| 47 |
+
"""
|
| 48 |
+
self.download_file(file_name)
|
| 49 |
+
try:
|
| 50 |
+
if filetype == "txt":
|
| 51 |
+
with open(file_name, "r", encoding="utf-8") as f:
|
| 52 |
+
return f.read()
|
| 53 |
+
|
| 54 |
+
elif filetype == "json":
|
| 55 |
+
with open(file_name, "r", encoding="utf-8") as f:
|
| 56 |
+
data = json.load(f)
|
| 57 |
+
return json.dumps(data, indent=2)
|
| 58 |
+
|
| 59 |
+
elif filetype == "csv":
|
| 60 |
+
with open(file_name, "r", encoding="utf-8") as f:
|
| 61 |
+
reader = csv.reader(f)
|
| 62 |
+
rows = list(reader)
|
| 63 |
+
return "\n".join([", ".join(row) for row in rows])
|
| 64 |
+
|
| 65 |
+
elif filetype == "xlsx":
|
| 66 |
+
wb = openpyxl.load_workbook(file_name, data_only=True)
|
| 67 |
+
sheet = wb.active
|
| 68 |
+
content = []
|
| 69 |
+
for row in sheet.iter_rows(values_only=True):
|
| 70 |
+
content.append(", ".join(str(cell) if cell is not None else "" for cell in row))
|
| 71 |
+
return "\n".join(content)
|
| 72 |
+
|
| 73 |
+
elif filetype == "mp3":
|
| 74 |
+
w = whisper.load_model("base")
|
| 75 |
+
res = w.transcribe(file_name)
|
| 76 |
+
return res["text"]
|
| 77 |
+
|
| 78 |
+
else:
|
| 79 |
+
return f"Unsupported filetype '{filetype}'. Supported types are 'txt', 'json', 'csv', 'xlsx', and 'mp3'."
|
| 80 |
+
|
| 81 |
+
except FileNotFoundError:
|
| 82 |
+
return f"File '{file_name}' not found."
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return f"Error opening file '{file_name}': {str(e)}"
|
| 85 |
+
|
| 86 |
+
def forward(self, file_path: str, file_type: str = "text") -> str:
|
| 87 |
+
"""
|
| 88 |
+
Opens a file and returns its content as a string.
|
| 89 |
+
Args:
|
| 90 |
+
file_path (str): The path to the file to be opened.
|
| 91 |
+
file_type (str): The type of the file (text, csv, json, xlsx, mp3). Default is 'text'.
|
| 92 |
+
Returns:
|
| 93 |
+
str: The content of the file as a string.
|
| 94 |
+
"""
|
| 95 |
+
return self.open_file_as_text(file_path, file_type)
|
tools/parse_wikipedia_table.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup, Tag
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class WikipediaParser(Tool):
|
| 7 |
+
name: str = "wikipedia_parser_tool"
|
| 8 |
+
description: str = (
|
| 9 |
+
"This tool parse a Wikipedia page into a clean, readable text format."
|
| 10 |
+
)
|
| 11 |
+
inputs: dict[str, dict[str, str]] = {
|
| 12 |
+
"url": {
|
| 13 |
+
"type": "string",
|
| 14 |
+
"description": "The Wikipedia page url.",
|
| 15 |
+
}
|
| 16 |
+
}
|
| 17 |
+
output_type: str = "string"
|
| 18 |
+
|
| 19 |
+
def get_wikipedia_page(self, url: str) -> str:
|
| 20 |
+
"""
|
| 21 |
+
Fetches the content of a Wikipedia page given its URL.
|
| 22 |
+
Args:
|
| 23 |
+
url (str): The URL of the Wikipedia page.
|
| 24 |
+
Returns:
|
| 25 |
+
str: The HTML content of the page.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
headers = {
|
| 29 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" # AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
|
| 30 |
+
}
|
| 31 |
+
resp = requests.get(url, headers=headers, timeout=30)
|
| 32 |
+
resp.raise_for_status()
|
| 33 |
+
soup = BeautifulSoup(resp.text, "html.parser")
|
| 34 |
+
|
| 35 |
+
content_div = soup.find("div", id="mw-content-text")
|
| 36 |
+
if not content_div:
|
| 37 |
+
return "Content not found."
|
| 38 |
+
|
| 39 |
+
elements: list[str] = []
|
| 40 |
+
h_tags: list[str] = [f"h{i}" for i in range(1, 6)]
|
| 41 |
+
extra_tags: list[str] = ["p", "ul", "ol"]
|
| 42 |
+
html_tags: list[str] = h_tags + extra_tags
|
| 43 |
+
|
| 44 |
+
for elem in content_div.find_all(html_tags):
|
| 45 |
+
if elem.name in h_tags:
|
| 46 |
+
elements.append("\n\n" + elem.get_text(strip=True) + "\n")
|
| 47 |
+
elif elem.name in extra_tags:
|
| 48 |
+
elements.append(elem.get_text(strip=True))
|
| 49 |
+
elif elem.name == "table":
|
| 50 |
+
elements.append(self.parse_wikipedia_table(elem))
|
| 51 |
+
|
| 52 |
+
return "\n".join(elements)
|
| 53 |
+
|
| 54 |
+
def parse_wikipedia_table(table: Tag) -> str:
|
| 55 |
+
"""
|
| 56 |
+
Parses a Wikipedia table into a clean, readable text format.
|
| 57 |
+
Args:
|
| 58 |
+
table (Tag): BeautifulSoup Tag for the table.
|
| 59 |
+
Returns:
|
| 60 |
+
str: Formatted table as readable text.
|
| 61 |
+
"""
|
| 62 |
+
rows = []
|
| 63 |
+
headers = []
|
| 64 |
+
|
| 65 |
+
# Try to get headers
|
| 66 |
+
thead = table.find("thead")
|
| 67 |
+
if thead:
|
| 68 |
+
for th in thead.find_all("th"):
|
| 69 |
+
header_text = th.get_text(separator=" ", strip=True)
|
| 70 |
+
headers.append(header_text)
|
| 71 |
+
if headers:
|
| 72 |
+
rows.append(" | ".join(headers))
|
| 73 |
+
|
| 74 |
+
# Parse table body rows
|
| 75 |
+
tbody = table.find("tbody")
|
| 76 |
+
if not tbody:
|
| 77 |
+
tbody = table # fallback: some tables have no tbody explicitly
|
| 78 |
+
|
| 79 |
+
for tr in tbody.find_all("tr"):
|
| 80 |
+
cells = tr.find_all(["th", "td"])
|
| 81 |
+
cell_texts = []
|
| 82 |
+
for cell in cells:
|
| 83 |
+
# Clean references like [7], [note 1], etc.
|
| 84 |
+
for sup in cell.find_all("sup", class_="reference"):
|
| 85 |
+
sup.decompose()
|
| 86 |
+
|
| 87 |
+
text = cell.get_text(separator=" ", strip=True)
|
| 88 |
+
cell_texts.append(text)
|
| 89 |
+
|
| 90 |
+
if cell_texts:
|
| 91 |
+
row_text = " | ".join(cell_texts)
|
| 92 |
+
rows.append(row_text)
|
| 93 |
+
|
| 94 |
+
return "\n".join(rows)
|
| 95 |
+
|
| 96 |
+
def forward(self, url: str) -> str:
|
| 97 |
+
"""
|
| 98 |
+
Parses the Wikipedia page and returns the content as a string.
|
| 99 |
+
Args:
|
| 100 |
+
url (str): The URL of the Wikipedia page.
|
| 101 |
+
Returns:
|
| 102 |
+
str: The parsed content of the page.
|
| 103 |
+
"""
|
| 104 |
+
html_string = self.get_wikipedia_page(url)
|
| 105 |
+
return html_string
|
tools/video_analyzer.py
CHANGED
|
@@ -16,6 +16,7 @@ from selenium.webdriver.support.ui import WebDriverWait
|
|
| 16 |
from selenium.webdriver.support import expected_conditions as EC
|
| 17 |
import helium
|
| 18 |
|
|
|
|
| 19 |
class WebVideoAnalyzerTool(Tool):
|
| 20 |
name = "web_video_analyzer"
|
| 21 |
description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
|
|
@@ -31,11 +32,13 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 31 |
"duration": {
|
| 32 |
"type": "integer",
|
| 33 |
"description": "How many seconds of the video to analyze (default: 30)",
|
|
|
|
| 34 |
},
|
| 35 |
"interval": {
|
| 36 |
"type": "integer",
|
| 37 |
"description": "How often to take screenshots (in seconds, default: 1)",
|
| 38 |
-
|
|
|
|
| 39 |
}
|
| 40 |
output_type = "string"
|
| 41 |
|
|
@@ -43,9 +46,9 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 43 |
"""Initialize the browser with appropriate settings."""
|
| 44 |
if self.driver is not None:
|
| 45 |
return self.driver
|
| 46 |
-
|
| 47 |
print("Setting up browser...")
|
| 48 |
-
|
| 49 |
# Configure Chrome options
|
| 50 |
chrome_options = webdriver.ChromeOptions()
|
| 51 |
chrome_options.add_argument("--force-device-scale-factor=1")
|
|
@@ -53,7 +56,7 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 53 |
chrome_options.add_argument("--disable-pdf-viewer")
|
| 54 |
chrome_options.add_argument("--window-position=0,0")
|
| 55 |
chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
|
| 56 |
-
|
| 57 |
# Initialize the driver
|
| 58 |
self.driver = helium.start_chrome(headless=False, options=chrome_options)
|
| 59 |
return self.driver
|
|
@@ -63,10 +66,10 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 63 |
try:
|
| 64 |
print(f"Navigating to {url}...")
|
| 65 |
helium.go_to(url)
|
| 66 |
-
|
| 67 |
# Wait for page to load
|
| 68 |
time.sleep(3)
|
| 69 |
-
|
| 70 |
# Handle YouTube-specific interactions
|
| 71 |
if "youtube.com" in url:
|
| 72 |
try:
|
|
@@ -75,7 +78,7 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 75 |
helium.click("Accept all")
|
| 76 |
elif helium.Button("I agree").exists():
|
| 77 |
helium.click("I agree")
|
| 78 |
-
|
| 79 |
# Click on the video to ensure it's playing
|
| 80 |
try:
|
| 81 |
# Find the video player element
|
|
@@ -83,21 +86,23 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 83 |
EC.presence_of_element_located((By.TAG_NAME, "video"))
|
| 84 |
)
|
| 85 |
video_element.click()
|
| 86 |
-
|
| 87 |
# Ensure the video is playing by trying to click the play button if visible
|
| 88 |
try:
|
| 89 |
-
play_button = self.driver.find_element(
|
|
|
|
|
|
|
| 90 |
if "Play" in play_button.get_attribute("aria-label"):
|
| 91 |
play_button.click()
|
| 92 |
except:
|
| 93 |
pass
|
| 94 |
-
|
| 95 |
except:
|
| 96 |
print("Could not locate video element to click")
|
| 97 |
-
|
| 98 |
except Exception as e:
|
| 99 |
print(f"Error during YouTube setup: {str(e)}")
|
| 100 |
-
|
| 101 |
# General approach - try to find and click on any video element
|
| 102 |
else:
|
| 103 |
try:
|
|
@@ -107,11 +112,11 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 107 |
video_elements[0].click()
|
| 108 |
except Exception as e:
|
| 109 |
print(f"Could not find or click video element: {str(e)}")
|
| 110 |
-
|
| 111 |
# Allow video to start
|
| 112 |
time.sleep(2)
|
| 113 |
return True
|
| 114 |
-
|
| 115 |
except Exception as e:
|
| 116 |
print(f"Error navigating to {url}: {str(e)}")
|
| 117 |
return False
|
|
@@ -121,13 +126,15 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 121 |
try:
|
| 122 |
# Try pressing Escape key to close general popups
|
| 123 |
webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
|
| 124 |
-
|
| 125 |
# YouTube-specific: try to close any visible dialog or popup
|
| 126 |
if "youtube.com" in self.driver.current_url:
|
| 127 |
# Try to find and click close buttons on popups
|
| 128 |
try:
|
| 129 |
-
close_buttons = self.driver.find_elements(
|
| 130 |
-
|
|
|
|
|
|
|
| 131 |
for button in close_buttons:
|
| 132 |
button.click()
|
| 133 |
except:
|
|
@@ -143,96 +150,106 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 143 |
def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
|
| 144 |
"""Count objects of the specified label in a screenshot."""
|
| 145 |
detector = pipeline("object-detection", model="facebook/detr-resnet-50")
|
| 146 |
-
|
| 147 |
try:
|
| 148 |
# Run detection on the image
|
| 149 |
results = detector(image)
|
| 150 |
-
|
| 151 |
# Count objects matching the label
|
| 152 |
-
object_count = sum(
|
| 153 |
-
|
|
|
|
|
|
|
| 154 |
# Debug: print detected classes
|
| 155 |
detected_classes = [result["label"] for result in results]
|
| 156 |
if detected_classes:
|
| 157 |
print(f"Detected classes: {', '.join(detected_classes)}")
|
| 158 |
-
|
| 159 |
return object_count
|
| 160 |
-
|
| 161 |
except Exception as e:
|
| 162 |
print(f"Error detecting objects in screenshot: {str(e)}")
|
| 163 |
return 0
|
| 164 |
|
| 165 |
-
def _capture_video_frames(
|
|
|
|
|
|
|
| 166 |
"""Capture frames from the video at regular intervals."""
|
| 167 |
results = []
|
| 168 |
-
|
| 169 |
-
print(
|
|
|
|
|
|
|
| 170 |
temp_dir = tempfile.mkdtemp()
|
| 171 |
-
|
| 172 |
for seconds_elapsed in range(0, duration, interval):
|
| 173 |
# Take screenshot
|
| 174 |
try:
|
| 175 |
print(f"Capturing frame at {seconds_elapsed} seconds...")
|
| 176 |
screenshot = self._take_screenshot()
|
| 177 |
-
|
| 178 |
# Save screenshot for debugging (optional)
|
| 179 |
screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
|
| 180 |
screenshot.save(screenshot_path)
|
| 181 |
-
|
| 182 |
# Analyze screenshot
|
| 183 |
object_count = self._analyze_screenshot(screenshot, label)
|
| 184 |
-
|
| 185 |
# Store results
|
| 186 |
-
results.append(
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
| 192 |
# Wait for next interval
|
| 193 |
if seconds_elapsed + interval < duration:
|
| 194 |
time.sleep(interval)
|
| 195 |
-
|
| 196 |
except Exception as e:
|
| 197 |
print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
|
| 198 |
-
|
| 199 |
return results
|
| 200 |
|
| 201 |
-
def forward(
|
|
|
|
|
|
|
| 202 |
"""
|
| 203 |
Analyzes a video on a webpage by taking screenshots and counting objects.
|
| 204 |
-
|
| 205 |
Args:
|
| 206 |
url (str): The URL of the webpage containing the video.
|
| 207 |
label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
|
| 208 |
duration (int): How many seconds of the video to analyze.
|
| 209 |
interval (int): How often to take screenshots (in seconds).
|
| 210 |
-
|
| 211 |
Returns:
|
| 212 |
str: A detailed report of object counts over time.
|
| 213 |
"""
|
| 214 |
try:
|
| 215 |
# Setup the browser
|
| 216 |
self._setup_browser()
|
| 217 |
-
|
| 218 |
# Navigate to the video
|
| 219 |
if not self._navigate_to_video(url):
|
| 220 |
return f"Error: Could not navigate to or play the video at {url}"
|
| 221 |
-
|
| 222 |
# Close any popups or overlays
|
| 223 |
self._close_popups()
|
| 224 |
-
|
| 225 |
# Capture and analyze frames
|
| 226 |
frame_results = self._capture_video_frames(duration, interval, label)
|
| 227 |
-
|
| 228 |
# Calculate summary statistics
|
| 229 |
if not frame_results:
|
| 230 |
return f"Error: No frames were successfully captured and analyzed"
|
| 231 |
-
|
| 232 |
total_objects = sum(result["object_count"] for result in frame_results)
|
| 233 |
avg_objects = total_objects / len(frame_results)
|
| 234 |
max_objects = max(frame_results, key=lambda x: x["object_count"])
|
| 235 |
-
|
| 236 |
# Generate a report
|
| 237 |
report = [
|
| 238 |
f"# {label.title()} Count Analysis for Video",
|
|
@@ -245,22 +262,24 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 245 |
f"Average {label}s per screenshot: {avg_objects:.2f}",
|
| 246 |
f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
|
| 247 |
"",
|
| 248 |
-
"## Time-based Analysis"
|
| 249 |
]
|
| 250 |
-
|
| 251 |
# Add frame-by-frame details
|
| 252 |
for result in frame_results:
|
| 253 |
-
report.append(
|
| 254 |
-
|
|
|
|
|
|
|
| 255 |
# Clean up
|
| 256 |
try:
|
| 257 |
helium.kill_browser()
|
| 258 |
self.driver = None
|
| 259 |
except:
|
| 260 |
print("Warning: Could not properly close the browser")
|
| 261 |
-
|
| 262 |
return "\n".join(report)
|
| 263 |
-
|
| 264 |
except Exception as e:
|
| 265 |
# Ensure browser is closed on error
|
| 266 |
try:
|
|
@@ -269,6 +288,5 @@ class WebVideoAnalyzerTool(Tool):
|
|
| 269 |
self.driver = None
|
| 270 |
except:
|
| 271 |
pass
|
| 272 |
-
|
| 273 |
return f"Error analyzing video: {str(e)}"
|
| 274 |
-
|
|
|
|
| 16 |
from selenium.webdriver.support import expected_conditions as EC
|
| 17 |
import helium
|
| 18 |
|
| 19 |
+
|
| 20 |
class WebVideoAnalyzerTool(Tool):
|
| 21 |
name = "web_video_analyzer"
|
| 22 |
description = "Analyzes a video on a webpage (YouTube, Vimeo, etc.) by taking screenshots at intervals and counting objects of a specified type in each frame."
|
|
|
|
| 32 |
"duration": {
|
| 33 |
"type": "integer",
|
| 34 |
"description": "How many seconds of the video to analyze (default: 30)",
|
| 35 |
+
"nullable": True,
|
| 36 |
},
|
| 37 |
"interval": {
|
| 38 |
"type": "integer",
|
| 39 |
"description": "How often to take screenshots (in seconds, default: 1)",
|
| 40 |
+
"nullable": True,
|
| 41 |
+
},
|
| 42 |
}
|
| 43 |
output_type = "string"
|
| 44 |
|
|
|
|
| 46 |
"""Initialize the browser with appropriate settings."""
|
| 47 |
if self.driver is not None:
|
| 48 |
return self.driver
|
| 49 |
+
|
| 50 |
print("Setting up browser...")
|
| 51 |
+
|
| 52 |
# Configure Chrome options
|
| 53 |
chrome_options = webdriver.ChromeOptions()
|
| 54 |
chrome_options.add_argument("--force-device-scale-factor=1")
|
|
|
|
| 56 |
chrome_options.add_argument("--disable-pdf-viewer")
|
| 57 |
chrome_options.add_argument("--window-position=0,0")
|
| 58 |
chrome_options.add_argument("--autoplay-policy=no-user-gesture-required")
|
| 59 |
+
|
| 60 |
# Initialize the driver
|
| 61 |
self.driver = helium.start_chrome(headless=False, options=chrome_options)
|
| 62 |
return self.driver
|
|
|
|
| 66 |
try:
|
| 67 |
print(f"Navigating to {url}...")
|
| 68 |
helium.go_to(url)
|
| 69 |
+
|
| 70 |
# Wait for page to load
|
| 71 |
time.sleep(3)
|
| 72 |
+
|
| 73 |
# Handle YouTube-specific interactions
|
| 74 |
if "youtube.com" in url:
|
| 75 |
try:
|
|
|
|
| 78 |
helium.click("Accept all")
|
| 79 |
elif helium.Button("I agree").exists():
|
| 80 |
helium.click("I agree")
|
| 81 |
+
|
| 82 |
# Click on the video to ensure it's playing
|
| 83 |
try:
|
| 84 |
# Find the video player element
|
|
|
|
| 86 |
EC.presence_of_element_located((By.TAG_NAME, "video"))
|
| 87 |
)
|
| 88 |
video_element.click()
|
| 89 |
+
|
| 90 |
# Ensure the video is playing by trying to click the play button if visible
|
| 91 |
try:
|
| 92 |
+
play_button = self.driver.find_element(
|
| 93 |
+
By.CLASS_NAME, "ytp-play-button"
|
| 94 |
+
)
|
| 95 |
if "Play" in play_button.get_attribute("aria-label"):
|
| 96 |
play_button.click()
|
| 97 |
except:
|
| 98 |
pass
|
| 99 |
+
|
| 100 |
except:
|
| 101 |
print("Could not locate video element to click")
|
| 102 |
+
|
| 103 |
except Exception as e:
|
| 104 |
print(f"Error during YouTube setup: {str(e)}")
|
| 105 |
+
|
| 106 |
# General approach - try to find and click on any video element
|
| 107 |
else:
|
| 108 |
try:
|
|
|
|
| 112 |
video_elements[0].click()
|
| 113 |
except Exception as e:
|
| 114 |
print(f"Could not find or click video element: {str(e)}")
|
| 115 |
+
|
| 116 |
# Allow video to start
|
| 117 |
time.sleep(2)
|
| 118 |
return True
|
| 119 |
+
|
| 120 |
except Exception as e:
|
| 121 |
print(f"Error navigating to {url}: {str(e)}")
|
| 122 |
return False
|
|
|
|
| 126 |
try:
|
| 127 |
# Try pressing Escape key to close general popups
|
| 128 |
webdriver.ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
|
| 129 |
+
|
| 130 |
# YouTube-specific: try to close any visible dialog or popup
|
| 131 |
if "youtube.com" in self.driver.current_url:
|
| 132 |
# Try to find and click close buttons on popups
|
| 133 |
try:
|
| 134 |
+
close_buttons = self.driver.find_elements(
|
| 135 |
+
By.CSS_SELECTOR,
|
| 136 |
+
"button.ytp-ad-overlay-close-button, button.ytp-ad-skip-button",
|
| 137 |
+
)
|
| 138 |
for button in close_buttons:
|
| 139 |
button.click()
|
| 140 |
except:
|
|
|
|
| 150 |
def _analyze_screenshot(self, image: Image.Image, label: str) -> int:
|
| 151 |
"""Count objects of the specified label in a screenshot."""
|
| 152 |
detector = pipeline("object-detection", model="facebook/detr-resnet-50")
|
| 153 |
+
|
| 154 |
try:
|
| 155 |
# Run detection on the image
|
| 156 |
results = detector(image)
|
| 157 |
+
|
| 158 |
# Count objects matching the label
|
| 159 |
+
object_count = sum(
|
| 160 |
+
1 for result in results if label.lower() in result["label"].lower()
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
# Debug: print detected classes
|
| 164 |
detected_classes = [result["label"] for result in results]
|
| 165 |
if detected_classes:
|
| 166 |
print(f"Detected classes: {', '.join(detected_classes)}")
|
| 167 |
+
|
| 168 |
return object_count
|
| 169 |
+
|
| 170 |
except Exception as e:
|
| 171 |
print(f"Error detecting objects in screenshot: {str(e)}")
|
| 172 |
return 0
|
| 173 |
|
| 174 |
+
def _capture_video_frames(
|
| 175 |
+
self, duration: int = 30, interval: int = 1, label: str = ""
|
| 176 |
+
) -> List[Dict]:
|
| 177 |
"""Capture frames from the video at regular intervals."""
|
| 178 |
results = []
|
| 179 |
+
|
| 180 |
+
print(
|
| 181 |
+
f"Starting frame capture for {duration} seconds with {interval} second intervals..."
|
| 182 |
+
)
|
| 183 |
temp_dir = tempfile.mkdtemp()
|
| 184 |
+
|
| 185 |
for seconds_elapsed in range(0, duration, interval):
|
| 186 |
# Take screenshot
|
| 187 |
try:
|
| 188 |
print(f"Capturing frame at {seconds_elapsed} seconds...")
|
| 189 |
screenshot = self._take_screenshot()
|
| 190 |
+
|
| 191 |
# Save screenshot for debugging (optional)
|
| 192 |
screenshot_path = os.path.join(temp_dir, f"frame_{seconds_elapsed}.jpg")
|
| 193 |
screenshot.save(screenshot_path)
|
| 194 |
+
|
| 195 |
# Analyze screenshot
|
| 196 |
object_count = self._analyze_screenshot(screenshot, label)
|
| 197 |
+
|
| 198 |
# Store results
|
| 199 |
+
results.append(
|
| 200 |
+
{
|
| 201 |
+
"time": seconds_elapsed,
|
| 202 |
+
"object_count": object_count,
|
| 203 |
+
"screenshot_path": screenshot_path,
|
| 204 |
+
}
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
# Wait for next interval
|
| 208 |
if seconds_elapsed + interval < duration:
|
| 209 |
time.sleep(interval)
|
| 210 |
+
|
| 211 |
except Exception as e:
|
| 212 |
print(f"Error capturing frame at {seconds_elapsed} seconds: {str(e)}")
|
| 213 |
+
|
| 214 |
return results
|
| 215 |
|
| 216 |
+
def forward(
|
| 217 |
+
self, url: str, label: str, duration: int = 30, interval: int = 1
|
| 218 |
+
) -> str:
|
| 219 |
"""
|
| 220 |
Analyzes a video on a webpage by taking screenshots and counting objects.
|
| 221 |
+
|
| 222 |
Args:
|
| 223 |
url (str): The URL of the webpage containing the video.
|
| 224 |
label (str): The type of object to count (e.g., 'bird', 'person', 'car', 'dog').
|
| 225 |
duration (int): How many seconds of the video to analyze.
|
| 226 |
interval (int): How often to take screenshots (in seconds).
|
| 227 |
+
|
| 228 |
Returns:
|
| 229 |
str: A detailed report of object counts over time.
|
| 230 |
"""
|
| 231 |
try:
|
| 232 |
# Setup the browser
|
| 233 |
self._setup_browser()
|
| 234 |
+
|
| 235 |
# Navigate to the video
|
| 236 |
if not self._navigate_to_video(url):
|
| 237 |
return f"Error: Could not navigate to or play the video at {url}"
|
| 238 |
+
|
| 239 |
# Close any popups or overlays
|
| 240 |
self._close_popups()
|
| 241 |
+
|
| 242 |
# Capture and analyze frames
|
| 243 |
frame_results = self._capture_video_frames(duration, interval, label)
|
| 244 |
+
|
| 245 |
# Calculate summary statistics
|
| 246 |
if not frame_results:
|
| 247 |
return f"Error: No frames were successfully captured and analyzed"
|
| 248 |
+
|
| 249 |
total_objects = sum(result["object_count"] for result in frame_results)
|
| 250 |
avg_objects = total_objects / len(frame_results)
|
| 251 |
max_objects = max(frame_results, key=lambda x: x["object_count"])
|
| 252 |
+
|
| 253 |
# Generate a report
|
| 254 |
report = [
|
| 255 |
f"# {label.title()} Count Analysis for Video",
|
|
|
|
| 262 |
f"Average {label}s per screenshot: {avg_objects:.2f}",
|
| 263 |
f"Maximum {label}s in a single screenshot: {max_objects['object_count']} (at {max_objects['time']} seconds)",
|
| 264 |
"",
|
| 265 |
+
"## Time-based Analysis",
|
| 266 |
]
|
| 267 |
+
|
| 268 |
# Add frame-by-frame details
|
| 269 |
for result in frame_results:
|
| 270 |
+
report.append(
|
| 271 |
+
f"Time {result['time']} seconds: {result['object_count']} {label}s"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
# Clean up
|
| 275 |
try:
|
| 276 |
helium.kill_browser()
|
| 277 |
self.driver = None
|
| 278 |
except:
|
| 279 |
print("Warning: Could not properly close the browser")
|
| 280 |
+
|
| 281 |
return "\n".join(report)
|
| 282 |
+
|
| 283 |
except Exception as e:
|
| 284 |
# Ensure browser is closed on error
|
| 285 |
try:
|
|
|
|
| 288 |
self.driver = None
|
| 289 |
except:
|
| 290 |
pass
|
| 291 |
+
|
| 292 |
return f"Error analyzing video: {str(e)}"
|
|
|
tools/web_utils.py
CHANGED
|
@@ -5,6 +5,7 @@ from selenium.webdriver.common.by import By
|
|
| 5 |
|
| 6 |
driver = None
|
| 7 |
|
|
|
|
| 8 |
@tool
|
| 9 |
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
|
| 10 |
"""
|
|
@@ -16,19 +17,23 @@ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
|
|
| 16 |
if driver:
|
| 17 |
elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
|
| 18 |
if nth_result > len(elements):
|
| 19 |
-
raise Exception(
|
|
|
|
|
|
|
| 20 |
result = f"Found {len(elements)} matches for '{text}'."
|
| 21 |
elem = elements[nth_result - 1]
|
| 22 |
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
|
| 23 |
result += f"Focused on element {nth_result} of {len(elements)}"
|
| 24 |
return result
|
| 25 |
|
|
|
|
| 26 |
@tool
|
| 27 |
def go_back() -> None:
|
| 28 |
"""Goes back to previous page."""
|
| 29 |
if driver:
|
| 30 |
driver.back()
|
| 31 |
|
|
|
|
| 32 |
@tool
|
| 33 |
def close_popups() -> str:
|
| 34 |
"""
|
|
|
|
| 5 |
|
| 6 |
driver = None
|
| 7 |
|
| 8 |
+
|
| 9 |
@tool
|
| 10 |
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
|
| 11 |
"""
|
|
|
|
| 17 |
if driver:
|
| 18 |
elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
|
| 19 |
if nth_result > len(elements):
|
| 20 |
+
raise Exception(
|
| 21 |
+
f"Match n°{nth_result} not found (only {len(elements)} matches found)"
|
| 22 |
+
)
|
| 23 |
result = f"Found {len(elements)} matches for '{text}'."
|
| 24 |
elem = elements[nth_result - 1]
|
| 25 |
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
|
| 26 |
result += f"Focused on element {nth_result} of {len(elements)}"
|
| 27 |
return result
|
| 28 |
|
| 29 |
+
|
| 30 |
@tool
|
| 31 |
def go_back() -> None:
|
| 32 |
"""Goes back to previous page."""
|
| 33 |
if driver:
|
| 34 |
driver.back()
|
| 35 |
|
| 36 |
+
|
| 37 |
@tool
|
| 38 |
def close_popups() -> str:
|
| 39 |
"""
|
tools/webpage_parser.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
from smolagents import Tool
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class WebpageParser(Tool):
|
| 6 |
+
name: str = "webpage_parser_tool"
|
| 7 |
+
description: str = (
|
| 8 |
+
"This tool parses elements from HTML to make them easily searchable."
|
| 9 |
+
)
|
| 10 |
+
inputs: dict[str, dict[str, str]] = {
|
| 11 |
+
"html_string": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "The HTML content as a string.",
|
| 14 |
+
},
|
| 15 |
+
}
|
| 16 |
+
output_type: str = "array"
|
| 17 |
+
|
| 18 |
+
def forward(self, html_string: str) -> list[str]:
|
| 19 |
+
"""
|
| 20 |
+
Parses the HTML string and returns all elements as an array.
|
| 21 |
+
"""
|
| 22 |
+
# Create a BeautifulSoup object
|
| 23 |
+
soup = BeautifulSoup(html_string, "html.parser")
|
| 24 |
+
|
| 25 |
+
# Extract all elements as strings
|
| 26 |
+
elements = [str(element) for element in soup.find_all()]
|
| 27 |
+
|
| 28 |
+
return elements
|
utils/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import requests
|
| 2 |
from smolagents import CodeAgent
|
| 3 |
from tqdm import tqdm
|
|
|
|
| 4 |
|
| 5 |
DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
|
| 6 |
|
|
@@ -50,12 +51,15 @@ def run_agent(agent: CodeAgent, questions: list[dict]) -> list[str]:
|
|
| 50 |
for question in tqdm(questions, desc="Running agent"):
|
| 51 |
task_id = question.get("task_id")
|
| 52 |
question_text = question.get("question")
|
|
|
|
|
|
|
|
|
|
| 53 |
if not task_id or question_text is None:
|
| 54 |
print(f"Skipping item with missing task_id or question: {question}")
|
| 55 |
continue
|
| 56 |
|
| 57 |
try:
|
| 58 |
-
answer = agent(
|
| 59 |
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
|
| 60 |
results_log.append(
|
| 61 |
{
|
|
|
|
| 1 |
import requests
|
| 2 |
from smolagents import CodeAgent
|
| 3 |
from tqdm import tqdm
|
| 4 |
+
from prompts.default_prompt import generate_prompt
|
| 5 |
|
| 6 |
DEFAULT_API_URL: str = "https://agents-course-unit4-scoring.hf.space"
|
| 7 |
|
|
|
|
| 51 |
for question in tqdm(questions, desc="Running agent"):
|
| 52 |
task_id = question.get("task_id")
|
| 53 |
question_text = question.get("question")
|
| 54 |
+
file_name = question.get("file_name")
|
| 55 |
+
prompt = generate_prompt(question_text, file_name)
|
| 56 |
+
|
| 57 |
if not task_id or question_text is None:
|
| 58 |
print(f"Skipping item with missing task_id or question: {question}")
|
| 59 |
continue
|
| 60 |
|
| 61 |
try:
|
| 62 |
+
answer = agent(prompt)
|
| 63 |
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
|
| 64 |
results_log.append(
|
| 65 |
{
|
uv.lock
CHANGED
|
@@ -342,6 +342,15 @@ wheels = [
|
|
| 342 |
{ url = "https://files.pythonhosted.org/packages/83/a2/66adca41164860dee6d2d47b506fef3262c8879aab727b687c798d67313f/duckduckgo_search-8.0.1-py3-none-any.whl", hash = "sha256:87ea18d9abb1cd5dc8f63fc70ac867996acce2cb5e0129d191b9491c202420be", size = 18125 },
|
| 343 |
]
|
| 344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
[[package]]
|
| 346 |
name = "fastapi"
|
| 347 |
version = "0.115.12"
|
|
@@ -544,13 +553,17 @@ name = "hf-agents-gaia-agent"
|
|
| 544 |
version = "0.1.0"
|
| 545 |
source = { virtual = "." }
|
| 546 |
dependencies = [
|
|
|
|
| 547 |
{ name = "ffmpeg" },
|
| 548 |
{ name = "gradio", extra = ["oauth"] },
|
| 549 |
{ name = "helium" },
|
| 550 |
{ name = "litellm" },
|
|
|
|
| 551 |
{ name = "numpy" },
|
| 552 |
{ name = "openai" },
|
|
|
|
| 553 |
{ name = "opencv-python" },
|
|
|
|
| 554 |
{ name = "pandas" },
|
| 555 |
{ name = "pillow" },
|
| 556 |
{ name = "python-dotenv" },
|
|
@@ -567,13 +580,17 @@ dependencies = [
|
|
| 567 |
|
| 568 |
[package.metadata]
|
| 569 |
requires-dist = [
|
|
|
|
| 570 |
{ name = "ffmpeg", specifier = ">=1.4" },
|
| 571 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
|
| 572 |
{ name = "helium", specifier = ">=5.1.1" },
|
| 573 |
{ name = "litellm", specifier = "==1.67.1" },
|
|
|
|
| 574 |
{ name = "numpy", specifier = ">=2.2.5" },
|
| 575 |
{ name = "openai", specifier = ">=1.76.0" },
|
|
|
|
| 576 |
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
|
|
|
| 577 |
{ name = "pandas", specifier = ">=2.2.3" },
|
| 578 |
{ name = "pillow", specifier = ">=11.2.1" },
|
| 579 |
{ name = "python-dotenv", specifier = ">=1.1.0" },
|
|
@@ -760,6 +777,24 @@ wheels = [
|
|
| 760 |
{ url = "https://files.pythonhosted.org/packages/88/86/c14d3c24ae13c08296d068e6f79fd4bd17a0a07bddbda94990b87c35d20e/litellm-1.67.1-py3-none-any.whl", hash = "sha256:8fff5b2a16b63bb594b94d6c071ad0f27d3d8cd4348bd5acea2fd40c8e0c11e8", size = 7607266 },
|
| 761 |
]
|
| 762 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
[[package]]
|
| 764 |
name = "lxml"
|
| 765 |
version = "5.4.0"
|
|
@@ -874,6 +909,15 @@ wheels = [
|
|
| 874 |
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
|
| 875 |
]
|
| 876 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 877 |
[[package]]
|
| 878 |
name = "mpmath"
|
| 879 |
version = "1.3.0"
|
|
@@ -952,6 +996,28 @@ wheels = [
|
|
| 952 |
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
|
| 953 |
]
|
| 954 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 955 |
[[package]]
|
| 956 |
name = "numpy"
|
| 957 |
version = "2.2.5"
|
|
@@ -1142,6 +1208,21 @@ wheels = [
|
|
| 1142 |
{ url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
|
| 1143 |
]
|
| 1144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1145 |
[[package]]
|
| 1146 |
name = "opencv-python"
|
| 1147 |
version = "4.11.0.86"
|
|
@@ -1159,6 +1240,18 @@ wheels = [
|
|
| 1159 |
{ url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
|
| 1160 |
]
|
| 1161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1162 |
[[package]]
|
| 1163 |
name = "orjson"
|
| 1164 |
version = "3.10.16"
|
|
|
|
| 342 |
{ url = "https://files.pythonhosted.org/packages/83/a2/66adca41164860dee6d2d47b506fef3262c8879aab727b687c798d67313f/duckduckgo_search-8.0.1-py3-none-any.whl", hash = "sha256:87ea18d9abb1cd5dc8f63fc70ac867996acce2cb5e0129d191b9491c202420be", size = 18125 },
|
| 343 |
]
|
| 344 |
|
| 345 |
+
[[package]]
|
| 346 |
+
name = "et-xmlfile"
|
| 347 |
+
version = "2.0.0"
|
| 348 |
+
source = { registry = "https://pypi.org/simple" }
|
| 349 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
|
| 350 |
+
wheels = [
|
| 351 |
+
{ url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
|
| 352 |
+
]
|
| 353 |
+
|
| 354 |
[[package]]
|
| 355 |
name = "fastapi"
|
| 356 |
version = "0.115.12"
|
|
|
|
| 553 |
version = "0.1.0"
|
| 554 |
source = { virtual = "." }
|
| 555 |
dependencies = [
|
| 556 |
+
{ name = "beautifulsoup4" },
|
| 557 |
{ name = "ffmpeg" },
|
| 558 |
{ name = "gradio", extra = ["oauth"] },
|
| 559 |
{ name = "helium" },
|
| 560 |
{ name = "litellm" },
|
| 561 |
+
{ name = "markdownify" },
|
| 562 |
{ name = "numpy" },
|
| 563 |
{ name = "openai" },
|
| 564 |
+
{ name = "openai-whisper" },
|
| 565 |
{ name = "opencv-python" },
|
| 566 |
+
{ name = "openpyxl" },
|
| 567 |
{ name = "pandas" },
|
| 568 |
{ name = "pillow" },
|
| 569 |
{ name = "python-dotenv" },
|
|
|
|
| 580 |
|
| 581 |
[package.metadata]
|
| 582 |
requires-dist = [
|
| 583 |
+
{ name = "beautifulsoup4", specifier = ">=4.13.4" },
|
| 584 |
{ name = "ffmpeg", specifier = ">=1.4" },
|
| 585 |
{ name = "gradio", extras = ["oauth"], specifier = ">=5.27.0" },
|
| 586 |
{ name = "helium", specifier = ">=5.1.1" },
|
| 587 |
{ name = "litellm", specifier = "==1.67.1" },
|
| 588 |
+
{ name = "markdownify", specifier = ">=1.1.0" },
|
| 589 |
{ name = "numpy", specifier = ">=2.2.5" },
|
| 590 |
{ name = "openai", specifier = ">=1.76.0" },
|
| 591 |
+
{ name = "openai-whisper", specifier = ">=20240930" },
|
| 592 |
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
| 593 |
+
{ name = "openpyxl", specifier = ">=3.1.5" },
|
| 594 |
{ name = "pandas", specifier = ">=2.2.3" },
|
| 595 |
{ name = "pillow", specifier = ">=11.2.1" },
|
| 596 |
{ name = "python-dotenv", specifier = ">=1.1.0" },
|
|
|
|
| 777 |
{ url = "https://files.pythonhosted.org/packages/88/86/c14d3c24ae13c08296d068e6f79fd4bd17a0a07bddbda94990b87c35d20e/litellm-1.67.1-py3-none-any.whl", hash = "sha256:8fff5b2a16b63bb594b94d6c071ad0f27d3d8cd4348bd5acea2fd40c8e0c11e8", size = 7607266 },
|
| 778 |
]
|
| 779 |
|
| 780 |
+
[[package]]
|
| 781 |
+
name = "llvmlite"
|
| 782 |
+
version = "0.44.0"
|
| 783 |
+
source = { registry = "https://pypi.org/simple" }
|
| 784 |
+
sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880 }
|
| 785 |
+
wheels = [
|
| 786 |
+
{ url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297 },
|
| 787 |
+
{ url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105 },
|
| 788 |
+
{ url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901 },
|
| 789 |
+
{ url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247 },
|
| 790 |
+
{ url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380 },
|
| 791 |
+
{ url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306 },
|
| 792 |
+
{ url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090 },
|
| 793 |
+
{ url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904 },
|
| 794 |
+
{ url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245 },
|
| 795 |
+
{ url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193 },
|
| 796 |
+
]
|
| 797 |
+
|
| 798 |
[[package]]
|
| 799 |
name = "lxml"
|
| 800 |
version = "5.4.0"
|
|
|
|
| 909 |
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
|
| 910 |
]
|
| 911 |
|
| 912 |
+
[[package]]
|
| 913 |
+
name = "more-itertools"
|
| 914 |
+
version = "10.7.0"
|
| 915 |
+
source = { registry = "https://pypi.org/simple" }
|
| 916 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ce/a0/834b0cebabbfc7e311f30b46c8188790a37f89fc8d756660346fe5abfd09/more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3", size = 127671 }
|
| 917 |
+
wheels = [
|
| 918 |
+
{ url = "https://files.pythonhosted.org/packages/2b/9f/7ba6f94fc1e9ac3d2b853fdff3035fb2fa5afbed898c4a72b8a020610594/more_itertools-10.7.0-py3-none-any.whl", hash = "sha256:d43980384673cb07d2f7d2d918c616b30c659c089ee23953f601d6609c67510e", size = 65278 },
|
| 919 |
+
]
|
| 920 |
+
|
| 921 |
[[package]]
|
| 922 |
name = "mpmath"
|
| 923 |
version = "1.3.0"
|
|
|
|
| 996 |
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
|
| 997 |
]
|
| 998 |
|
| 999 |
+
[[package]]
|
| 1000 |
+
name = "numba"
|
| 1001 |
+
version = "0.61.2"
|
| 1002 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1003 |
+
dependencies = [
|
| 1004 |
+
{ name = "llvmlite" },
|
| 1005 |
+
{ name = "numpy" },
|
| 1006 |
+
]
|
| 1007 |
+
sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615 }
|
| 1008 |
+
wheels = [
|
| 1009 |
+
{ url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626 },
|
| 1010 |
+
{ url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287 },
|
| 1011 |
+
{ url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928 },
|
| 1012 |
+
{ url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115 },
|
| 1013 |
+
{ url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929 },
|
| 1014 |
+
{ url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785 },
|
| 1015 |
+
{ url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289 },
|
| 1016 |
+
{ url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918 },
|
| 1017 |
+
{ url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056 },
|
| 1018 |
+
{ url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846 },
|
| 1019 |
+
]
|
| 1020 |
+
|
| 1021 |
[[package]]
|
| 1022 |
name = "numpy"
|
| 1023 |
version = "2.2.5"
|
|
|
|
| 1208 |
{ url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
|
| 1209 |
]
|
| 1210 |
|
| 1211 |
+
[[package]]
|
| 1212 |
+
name = "openai-whisper"
|
| 1213 |
+
version = "20240930"
|
| 1214 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1215 |
+
dependencies = [
|
| 1216 |
+
{ name = "more-itertools" },
|
| 1217 |
+
{ name = "numba" },
|
| 1218 |
+
{ name = "numpy" },
|
| 1219 |
+
{ name = "tiktoken" },
|
| 1220 |
+
{ name = "torch" },
|
| 1221 |
+
{ name = "tqdm" },
|
| 1222 |
+
{ name = "triton", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or sys_platform == 'linux2'" },
|
| 1223 |
+
]
|
| 1224 |
+
sdist = { url = "https://files.pythonhosted.org/packages/f5/77/952ca71515f81919bd8a6a4a3f89a27b09e73880cebf90957eda8f2f8545/openai-whisper-20240930.tar.gz", hash = "sha256:b7178e9c1615576807a300024f4daa6353f7e1a815dac5e38c33f1ef055dd2d2", size = 800544 }
|
| 1225 |
+
|
| 1226 |
[[package]]
|
| 1227 |
name = "opencv-python"
|
| 1228 |
version = "4.11.0.86"
|
|
|
|
| 1240 |
{ url = "https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
|
| 1241 |
]
|
| 1242 |
|
| 1243 |
+
[[package]]
|
| 1244 |
+
name = "openpyxl"
|
| 1245 |
+
version = "3.1.5"
|
| 1246 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1247 |
+
dependencies = [
|
| 1248 |
+
{ name = "et-xmlfile" },
|
| 1249 |
+
]
|
| 1250 |
+
sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
|
| 1251 |
+
wheels = [
|
| 1252 |
+
{ url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
|
| 1253 |
+
]
|
| 1254 |
+
|
| 1255 |
[[package]]
|
| 1256 |
name = "orjson"
|
| 1257 |
version = "3.10.16"
|