capradeepgujaran's picture
Update app.py
85d2f78 verified
raw
history blame
3.08 kB
import os
import base64
import gradio as gr
from PIL import Image
import io
import json
from groq import Groq
# Load environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)
def encode_image(image):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def analyze_construction_image(image, follow_up_question=""):
if image is None:
return "Error: No image uploaded", "", ""
try:
image_data_url = f"data:image/png;base64,{encode_image(image)}"
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Output the result in JSON format."
},
{
"type": "image_url",
"image_url": {
"url": image_data_url
}
}
]
}
]
if follow_up_question:
messages.append({
"role": "user",
"content": follow_up_question
})
completion = client.chat.completions.create(
model="llama-3.2-90b-vision-preview",
messages=messages,
temperature=0.7,
max_tokens=1000,
top_p=1,
stream=False,
response_format={"type": "json_object"},
stop=None
)
result = json.loads(completion.choices[0].message.content)
snag_category = result.get('snag_category', 'N/A')
snag_description = result.get('snag_description', 'N/A')
desnag_steps = '\n'.join(result.get('desnag_steps', ['N/A']))
return snag_category, snag_description, desnag_steps
except Exception as e:
return f"Error: {str(e)}", "", ""
# Create the Gradio interface
iface = gr.Interface(
fn=analyze_construction_image,
inputs=[
gr.Image(type="pil", label="Upload Construction Image"),
gr.Textbox(label="Follow-up Question (Optional)")
],
outputs=[
gr.Textbox(label="Snag Category"),
gr.Textbox(label="Snag Description"),
gr.Textbox(label="Steps to Desnag")
],
title="Construction Image Analyzer (Llama 3.2 90B Vision via Groq)",
description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2 90B Vision technology through Groq API. You can also ask follow-up questions about the image.",
examples=[
["example_image1.jpg", "What safety concerns do you see?"],
["example_image2.jpg", "Is there any visible structural damage?"]
],
cache_examples=True,
theme="default"
)
# Launch the app
if __name__ == "__main__":
iface.launch()