santimber commited on
Commit
cf93357
·
1 Parent(s): f206914

handle images and youtube

Browse files
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
__pycache__/tools.cpython-311.pyc CHANGED
Binary files a/__pycache__/tools.cpython-311.pyc and b/__pycache__/tools.cpython-311.pyc differ
 
test_youtube_question.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify video analysis for a GAIA YouTube question.
4
+ """
5
+
6
+ import requests
7
+ from tools import video_analysis_tool
8
+
9
+
10
+ def test_youtube_video_question():
11
+ api_url = "https://agents-course-unit4-scoring.hf.space"
12
+ questions_url = f"{api_url}/questions"
13
+ print("=== Testing YouTube Video Question ===")
14
+
15
+ # 1. Fetch questions
16
+ print("1. Fetching questions...")
17
+ try:
18
+ response = requests.get(questions_url, timeout=15)
19
+ response.raise_for_status()
20
+ questions_data = response.json()
21
+ print(f"✅ Fetched {len(questions_data)} questions")
22
+ except Exception as e:
23
+ print(f"❌ Failed to fetch questions: {e}")
24
+ return
25
+
26
+ # 2. Find a question with a YouTube link in the question text or file_name
27
+ youtube_question = None
28
+ for i, question in enumerate(questions_data):
29
+ qtext = question.get('question', '').lower()
30
+ fname = question.get('file_name', '').lower()
31
+ if 'youtube.com' in qtext or 'youtu.be' in qtext or 'youtube.com' in fname or 'youtu.be' in fname:
32
+ youtube_question = (i, question)
33
+ break
34
+
35
+ if not youtube_question:
36
+ print("❌ No YouTube video questions found.")
37
+ return
38
+
39
+ idx, question = youtube_question
40
+ question_text = question.get('question')
41
+ file_name = question.get('file_name', '')
42
+ print(f"\n2. Found YouTube video question {idx+1}:")
43
+ print(f" Question: {question_text[:120]}...")
44
+ print(f" File name: {file_name}")
45
+
46
+ # 3. Extract YouTube URL
47
+ # Try to find a YouTube URL in the question text or file_name
48
+ import re
49
+ yt_url = None
50
+ yt_pattern = r'(https?://(?:www\.)?(?:youtube\.com|youtu\.be)[^\s]*)'
51
+ match = re.search(yt_pattern, question_text)
52
+ if match:
53
+ yt_url = match.group(1)
54
+ elif file_name and ('youtube.com' in file_name or 'youtu.be' in file_name):
55
+ yt_url = file_name
56
+
57
+ if not yt_url:
58
+ print("❌ Could not extract YouTube URL from question.")
59
+ return
60
+ print(f"3. YouTube URL: {yt_url}")
61
+
62
+ # 4. Analyze the video
63
+ print("4. Analyzing video with video_analysis_tool...")
64
+ result = video_analysis_tool.invoke(yt_url)
65
+ print(f"5. Tool result:")
66
+ print(f" {result[:500]}...")
67
+ print("\n✅ YouTube video analysis test complete!")
68
+
69
+
70
+ if __name__ == "__main__":
71
+ test_youtube_video_question()
tools.py CHANGED
@@ -290,23 +290,64 @@ def image_recognition(img_path: str) -> str:
290
  try:
291
  if not os.path.exists(img_path):
292
  return f"Error: Image file not found at {img_path}"
 
293
  if not os.getenv("OPENAI_API_KEY"):
294
  return "OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables."
295
- vision_llm = ChatOpenAI(model="gpt-4o")
296
- with open(img_path, "rb") as image_file:
297
- image_bytes = image_file.read()
298
- image_base64 = base64.b64encode(image_bytes).decode("utf-8")
299
- message = [
300
- HumanMessage(
301
- content=[
302
- {"type": "text", "text": "Describe the image or extract all the text from this image. Return only the description or extracted text, no explanations."},
303
- {"type": "image_url", "image_url": {
304
- "url": f"data:image/png;base64,{image_base64}"}},
305
- ]
306
- )
307
- ]
308
- response = vision_llm.invoke(message)
309
- return response.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  except Exception as e:
311
  return f"Error analyzing image: {str(e)}"
312
 
@@ -608,9 +649,14 @@ def analyze_excel_file(file_path: str, query: str) -> str:
608
  return f"Error analyzing Excel file: {str(e)}"
609
 
610
 
 
 
 
 
 
611
  analyze_excel_file_tool = Tool(
612
  name="analyze_excel_file_tool",
613
- func=analyze_excel_file,
614
  description="Analyze an Excel file using pandas and answer a question about it."
615
  )
616
 
 
290
  try:
291
  if not os.path.exists(img_path):
292
  return f"Error: Image file not found at {img_path}"
293
+
294
  if not os.getenv("OPENAI_API_KEY"):
295
  return "OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables."
296
+
297
+ # Get image info first
298
+ try:
299
+ img = Image.open(img_path)
300
+ image_info = f"Image: {img.size[0]}x{img.size[1]} pixels, mode: {img.mode}"
301
+ except Exception as e:
302
+ image_info = f"Image info error: {str(e)}"
303
+
304
+ # Try vision model
305
+ try:
306
+ vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)
307
+ with open(img_path, "rb") as image_file:
308
+ image_bytes = image_file.read()
309
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
310
+
311
+ message = [
312
+ HumanMessage(
313
+ content=[
314
+ {"type": "text", "text": "Describe what you see in this image in detail. If there's text, extract it. If it's a chess position, describe the board state and pieces."},
315
+ {"type": "image_url", "image_url": {
316
+ "url": f"data:image/png;base64,{image_base64}"}},
317
+ ]
318
+ )
319
+ ]
320
+
321
+ response = vision_llm.invoke(message)
322
+ vision_result = response.content.strip()
323
+
324
+ # Check if we got a content policy response
325
+ if "sorry" in vision_result.lower() and "can't assist" in vision_result.lower():
326
+ # Fallback to OCR
327
+ try:
328
+ import pytesseract
329
+ text = pytesseract.image_to_string(img).strip()
330
+ if text:
331
+ return f"{image_info}\n\nOCR extracted text:\n{text}"
332
+ else:
333
+ return f"{image_info}\n\nVision model blocked. OCR found no text."
334
+ except ImportError:
335
+ return f"{image_info}\n\nVision model blocked. OCR not available."
336
+ else:
337
+ return f"{image_info}\n\nVision analysis:\n{vision_result}"
338
+
339
+ except Exception as vision_error:
340
+ # Fallback to OCR if vision fails
341
+ try:
342
+ import pytesseract
343
+ text = pytesseract.image_to_string(img).strip()
344
+ if text:
345
+ return f"{image_info}\n\nVision failed, OCR extracted text:\n{text}"
346
+ else:
347
+ return f"{image_info}\n\nVision failed: {str(vision_error)}. OCR found no text."
348
+ except ImportError:
349
+ return f"{image_info}\n\nVision failed: {str(vision_error)}. OCR not available."
350
+
351
  except Exception as e:
352
  return f"Error analyzing image: {str(e)}"
353
 
 
649
  return f"Error analyzing Excel file: {str(e)}"
650
 
651
 
652
+ def analyze_excel_file_simple(file_path: str) -> str:
653
+ """Wrapper for analyze_excel_file that uses a default query."""
654
+ return analyze_excel_file(file_path, "Analyze this spreadsheet")
655
+
656
+
657
  analyze_excel_file_tool = Tool(
658
  name="analyze_excel_file_tool",
659
+ func=analyze_excel_file_simple,
660
  description="Analyze an Excel file using pandas and answer a question about it."
661
  )
662