Spaces:
Sleeping
Sleeping
handle images and youtube
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/tools.cpython-311.pyc +0 -0
- test_youtube_question.py +71 -0
- tools.py +62 -16
__pycache__/app.cpython-311.pyc
CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
__pycache__/tools.cpython-311.pyc
CHANGED
Binary files a/__pycache__/tools.cpython-311.pyc and b/__pycache__/tools.cpython-311.pyc differ
|
|
test_youtube_question.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Test script to verify video analysis for a GAIA YouTube question.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import requests
|
7 |
+
from tools import video_analysis_tool
|
8 |
+
|
9 |
+
|
10 |
+
def test_youtube_video_question():
|
11 |
+
api_url = "https://agents-course-unit4-scoring.hf.space"
|
12 |
+
questions_url = f"{api_url}/questions"
|
13 |
+
print("=== Testing YouTube Video Question ===")
|
14 |
+
|
15 |
+
# 1. Fetch questions
|
16 |
+
print("1. Fetching questions...")
|
17 |
+
try:
|
18 |
+
response = requests.get(questions_url, timeout=15)
|
19 |
+
response.raise_for_status()
|
20 |
+
questions_data = response.json()
|
21 |
+
print(f"✅ Fetched {len(questions_data)} questions")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"❌ Failed to fetch questions: {e}")
|
24 |
+
return
|
25 |
+
|
26 |
+
# 2. Find a question with a YouTube link in the question text or file_name
|
27 |
+
youtube_question = None
|
28 |
+
for i, question in enumerate(questions_data):
|
29 |
+
qtext = question.get('question', '').lower()
|
30 |
+
fname = question.get('file_name', '').lower()
|
31 |
+
if 'youtube.com' in qtext or 'youtu.be' in qtext or 'youtube.com' in fname or 'youtu.be' in fname:
|
32 |
+
youtube_question = (i, question)
|
33 |
+
break
|
34 |
+
|
35 |
+
if not youtube_question:
|
36 |
+
print("❌ No YouTube video questions found.")
|
37 |
+
return
|
38 |
+
|
39 |
+
idx, question = youtube_question
|
40 |
+
question_text = question.get('question')
|
41 |
+
file_name = question.get('file_name', '')
|
42 |
+
print(f"\n2. Found YouTube video question {idx+1}:")
|
43 |
+
print(f" Question: {question_text[:120]}...")
|
44 |
+
print(f" File name: {file_name}")
|
45 |
+
|
46 |
+
# 3. Extract YouTube URL
|
47 |
+
# Try to find a YouTube URL in the question text or file_name
|
48 |
+
import re
|
49 |
+
yt_url = None
|
50 |
+
yt_pattern = r'(https?://(?:www\.)?(?:youtube\.com|youtu\.be)[^\s]*)'
|
51 |
+
match = re.search(yt_pattern, question_text)
|
52 |
+
if match:
|
53 |
+
yt_url = match.group(1)
|
54 |
+
elif file_name and ('youtube.com' in file_name or 'youtu.be' in file_name):
|
55 |
+
yt_url = file_name
|
56 |
+
|
57 |
+
if not yt_url:
|
58 |
+
print("❌ Could not extract YouTube URL from question.")
|
59 |
+
return
|
60 |
+
print(f"3. YouTube URL: {yt_url}")
|
61 |
+
|
62 |
+
# 4. Analyze the video
|
63 |
+
print("4. Analyzing video with video_analysis_tool...")
|
64 |
+
result = video_analysis_tool.invoke(yt_url)
|
65 |
+
print(f"5. Tool result:")
|
66 |
+
print(f" {result[:500]}...")
|
67 |
+
print("\n✅ YouTube video analysis test complete!")
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
test_youtube_video_question()
|
tools.py
CHANGED
@@ -290,23 +290,64 @@ def image_recognition(img_path: str) -> str:
|
|
290 |
try:
|
291 |
if not os.path.exists(img_path):
|
292 |
return f"Error: Image file not found at {img_path}"
|
|
|
293 |
if not os.getenv("OPENAI_API_KEY"):
|
294 |
return "OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables."
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
)
|
307 |
-
|
308 |
-
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
except Exception as e:
|
311 |
return f"Error analyzing image: {str(e)}"
|
312 |
|
@@ -608,9 +649,14 @@ def analyze_excel_file(file_path: str, query: str) -> str:
|
|
608 |
return f"Error analyzing Excel file: {str(e)}"
|
609 |
|
610 |
|
|
|
|
|
|
|
|
|
|
|
611 |
analyze_excel_file_tool = Tool(
|
612 |
name="analyze_excel_file_tool",
|
613 |
-
func=
|
614 |
description="Analyze an Excel file using pandas and answer a question about it."
|
615 |
)
|
616 |
|
|
|
290 |
try:
|
291 |
if not os.path.exists(img_path):
|
292 |
return f"Error: Image file not found at {img_path}"
|
293 |
+
|
294 |
if not os.getenv("OPENAI_API_KEY"):
|
295 |
return "OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables."
|
296 |
+
|
297 |
+
# Get image info first
|
298 |
+
try:
|
299 |
+
img = Image.open(img_path)
|
300 |
+
image_info = f"Image: {img.size[0]}x{img.size[1]} pixels, mode: {img.mode}"
|
301 |
+
except Exception as e:
|
302 |
+
image_info = f"Image info error: {str(e)}"
|
303 |
+
|
304 |
+
# Try vision model
|
305 |
+
try:
|
306 |
+
vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
307 |
+
with open(img_path, "rb") as image_file:
|
308 |
+
image_bytes = image_file.read()
|
309 |
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
310 |
+
|
311 |
+
message = [
|
312 |
+
HumanMessage(
|
313 |
+
content=[
|
314 |
+
{"type": "text", "text": "Describe what you see in this image in detail. If there's text, extract it. If it's a chess position, describe the board state and pieces."},
|
315 |
+
{"type": "image_url", "image_url": {
|
316 |
+
"url": f"data:image/png;base64,{image_base64}"}},
|
317 |
+
]
|
318 |
+
)
|
319 |
+
]
|
320 |
+
|
321 |
+
response = vision_llm.invoke(message)
|
322 |
+
vision_result = response.content.strip()
|
323 |
+
|
324 |
+
# Check if we got a content policy response
|
325 |
+
if "sorry" in vision_result.lower() and "can't assist" in vision_result.lower():
|
326 |
+
# Fallback to OCR
|
327 |
+
try:
|
328 |
+
import pytesseract
|
329 |
+
text = pytesseract.image_to_string(img).strip()
|
330 |
+
if text:
|
331 |
+
return f"{image_info}\n\nOCR extracted text:\n{text}"
|
332 |
+
else:
|
333 |
+
return f"{image_info}\n\nVision model blocked. OCR found no text."
|
334 |
+
except ImportError:
|
335 |
+
return f"{image_info}\n\nVision model blocked. OCR not available."
|
336 |
+
else:
|
337 |
+
return f"{image_info}\n\nVision analysis:\n{vision_result}"
|
338 |
+
|
339 |
+
except Exception as vision_error:
|
340 |
+
# Fallback to OCR if vision fails
|
341 |
+
try:
|
342 |
+
import pytesseract
|
343 |
+
text = pytesseract.image_to_string(img).strip()
|
344 |
+
if text:
|
345 |
+
return f"{image_info}\n\nVision failed, OCR extracted text:\n{text}"
|
346 |
+
else:
|
347 |
+
return f"{image_info}\n\nVision failed: {str(vision_error)}. OCR found no text."
|
348 |
+
except ImportError:
|
349 |
+
return f"{image_info}\n\nVision failed: {str(vision_error)}. OCR not available."
|
350 |
+
|
351 |
except Exception as e:
|
352 |
return f"Error analyzing image: {str(e)}"
|
353 |
|
|
|
649 |
return f"Error analyzing Excel file: {str(e)}"
|
650 |
|
651 |
|
652 |
+
def analyze_excel_file_simple(file_path: str) -> str:
|
653 |
+
"""Wrapper for analyze_excel_file that uses a default query."""
|
654 |
+
return analyze_excel_file(file_path, "Analyze this spreadsheet")
|
655 |
+
|
656 |
+
|
657 |
analyze_excel_file_tool = Tool(
|
658 |
name="analyze_excel_file_tool",
|
659 |
+
func=analyze_excel_file_simple,
|
660 |
description="Analyze an Excel file using pandas and answer a question about it."
|
661 |
)
|
662 |
|