srijaydeshpande's picture
Update app.py
b221614 verified
import gradio as gr
import os
import base64
from groq import Groq
from PIL import Image
import io
# 1. Setup Groq Client (Ensure GROQ_API_KEY is in Space Secrets)
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# 2. Function to convert PIL image to Base64
def encode_image(image):
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
# 3. Load and encode fixed reference images from the local folder
REF_PATHS = ["references/reference_1.jpeg", "references/reference_2.jpeg"]
FIXED_BASE64 = []
for path in REF_PATHS:
if os.path.exists(path):
img = Image.open(path).convert("RGB")
FIXED_BASE64.append(encode_image(img))
def detect_covering(query_image):
if query_image is None:
return "Please upload an image."
# Encode the user's query image
query_b64 = encode_image(query_image)
# Build the multi-image message content
content = [{"type": "text", "text": "First two reference images show a green pole around the tree. Determine if the LAST image contains the SAME type of green pole. Answer ONLY YES or NO."}]
# Add reference images to content
for b64 in FIXED_BASE64:
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{b64}"}
})
# Add the final query image
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{query_b64}"}
})
# Call Groq API (Llama 3.2 Vision supports up to 5 images per request)
completion = client.chat.completions.create(
model="meta-llama/llama-4-scout-17b-16e-instruct",
messages=[{"role": "user", "content": content}],
temperature=0.0, # Keep output consistent
max_tokens=10
)
return completion.choices[0].message.content.strip().upper()
# Gradio Interface
demo = gr.Interface(
fn=detect_covering,
inputs=gr.Image(type="pil", label="Upload Query Image"),
outputs="text",
title="Tree Guard Detector",
description="Uses pre-set reference images from the repo to detect tree guards via Groq."
)
demo.launch()