Final_Assignment_Template

Sleeping

App Files Files Community

ABVM commited on Jun 7, 2025

Commit

a4dd17a

verified ·

1 Parent(s): fcddfb2

Delete vision_tool.py

Browse files

Files changed (1) hide show

vision_tool.py +0 -70

vision_tool.py DELETED Viewed

@@ -1,70 +0,0 @@
-# Vision tool using Groq's Meta-Llama Scout model
-from smolagents import tool
-from groq import Groq
-import os
-def _llama_analyze(image_b64: str, prompt: str) -> str:
-    """Internal helper to query the Llama vision model."""
-    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
-            ],
-        }
-    ]
-    response = client.chat.completions.create(
-        model="meta-llama/llama-4-scout-17b-16e-instruct",
-        messages=messages,
-        stream=False,
-        max_tokens=512,
-    )
-    return response.choices[0].message.content
-@tool
-def image_reasoning_tool(image_file: str, prompt: str | None = None) -> dict:
-    """Perform OCR and optional vision analysis on an image.
-    This single entry point unifies OCR extraction and Llama vision reasoning so
-    the planner only sees one image tool.
-    Args:
-        image_file: Path to the image file to analyze.
-        prompt: Optional instruction for the vision model. If omitted, only OCR
-            is performed.
-    Returns:
-        Dictionary with OCR text, base64 image data and optional vision model
-        response.
-    """
-    try:
-        from PIL import Image
-        from smolagents.utils import encode_image_base64
-        import pytesseract
-        image = Image.open(image_file)
-        b64 = encode_image_base64(image)
-        ocr_text = pytesseract.image_to_string(image)
-        vision_text = ""
-        if prompt:
-            try:
-                vision_text = _llama_analyze(b64, prompt)
-            except Exception as e:  # vision errors shouldn't break OCR result
-                vision_text = f"Error processing image with vision model: {e}"
-        return {"ocr_text": ocr_text, "vision_text": vision_text, "base64_image": b64}
-    except Exception as e:
-        return {
-            "ocr_text": "",
-            "vision_text": "",
-            "base64_image": "",
-            "error": str(e),
-        }