Spaces:

AffordableAI
/

Construction_Site_Safety_Analyzer_Llama_3.2_Vision

Running

App Files Files Community

capradeepgujaran commited on Oct 18, 2024

Commit

85d2f78

verified ·

1 Parent(s): 2811ee2

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -44

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import base64
 import gradio as gr
 from PIL import Image
 import io
 from groq import Groq
 # Load environment variables
@@ -11,58 +12,58 @@ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 # Initialize Groq client
 client = Groq(api_key=GROQ_API_KEY)
-def analyze_construction_image(image):
     if image is None:
         return "Error: No image uploaded", "", ""
     try:
-        # Convert PIL Image to base64
-        buffered = io.BytesIO()
-        image.save(buffered, format="PNG")
-        img_str = base64.b64encode(buffered.getvalue()).decode()
-        image_data_url = f"data:image/png;base64,{img_str}"
-        # Prepare the message for Groq API
-        completion = client.chat.completions.create(
-            model="llama-3.2-11b-vision-preview",
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": "You are an AI assistant specialized in analyzing construction site images. Analyze this construction image. Identify the snag category, provide a detailed snag description, and list steps to desnag. Format your response as follows:\nSnag Category: [category]\nSnag Description: [detailed description]\nSteps to Desnag:\n1. [step 1]\n2. [step 2]\n3. [step 3]"
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": image_data_url
-                            }
                         }
-                    ]
-                }
-            ],
             temperature=0.7,
-            max_tokens=500,
             top_p=1,
             stream=False,
             stop=None
         )
-        result = completion.choices[0].message.content
-        # Parse the result
-        snag_category = "N/A"
-        snag_description = "N/A"
-        desnag_steps = "N/A"
-        for line in result.split('\n'):
-            if line.startswith("Snag Category:"):
-                snag_category = line.split(":", 1)[1].strip()
-            elif line.startswith("Snag Description:"):
-                snag_description = line.split(":", 1)[1].strip()
-            elif line.startswith("Steps to Desnag:"):
-                desnag_steps = "\n".join(result.split("Steps to Desnag:")[1].strip().split("\n"))
         return snag_category, snag_description, desnag_steps
     except Exception as e:
@@ -71,17 +72,20 @@ def analyze_construction_image(image):
 # Create the Gradio interface
 iface = gr.Interface(
     fn=analyze_construction_image,
-    inputs=gr.Image(type="pil", label="Upload Construction Image"),
     outputs=[
         gr.Textbox(label="Snag Category"),
         gr.Textbox(label="Snag Description"),
         gr.Textbox(label="Steps to Desnag")
     ],
-    title="Construction Image Analyzer (Llama 3.2 Vision via Groq)",
-    description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2 Vision technology through Groq API.",
     examples=[
-        ["example_image1.jpg"],
-        ["example_image2.jpg"]
     ],
     cache_examples=True,
     theme="default"

 import gradio as gr
 from PIL import Image
 import io
+import json
 from groq import Groq
 # Load environment variables
 # Initialize Groq client
 client = Groq(api_key=GROQ_API_KEY)
+def encode_image(image):
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode('utf-8')
+def analyze_construction_image(image, follow_up_question=""):
     if image is None:
         return "Error: No image uploaded", "", ""
     try:
+        image_data_url = f"data:image/png;base64,{encode_image(image)}"
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Analyze this construction site image. Identify any issues or snags, categorize them, provide a detailed description, and suggest steps to resolve them. Output the result in JSON format."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": image_data_url
                         }
+                    }
+                ]
+            }
+        ]
+        if follow_up_question:
+            messages.append({
+                "role": "user",
+                "content": follow_up_question
+            })
+        completion = client.chat.completions.create(
+            model="llama-3.2-90b-vision-preview",
+            messages=messages,
             temperature=0.7,
+            max_tokens=1000,
             top_p=1,
             stream=False,
+            response_format={"type": "json_object"},
             stop=None
         )
+        result = json.loads(completion.choices[0].message.content)
+        snag_category = result.get('snag_category', 'N/A')
+        snag_description = result.get('snag_description', 'N/A')
+        desnag_steps = '\n'.join(result.get('desnag_steps', ['N/A']))
         return snag_category, snag_description, desnag_steps
     except Exception as e:
 # Create the Gradio interface
 iface = gr.Interface(
     fn=analyze_construction_image,
+    inputs=[
+        gr.Image(type="pil", label="Upload Construction Image"),
+        gr.Textbox(label="Follow-up Question (Optional)")
+    ],
     outputs=[
         gr.Textbox(label="Snag Category"),
         gr.Textbox(label="Snag Description"),
         gr.Textbox(label="Steps to Desnag")
     ],
+    title="Construction Image Analyzer (Llama 3.2 90B Vision via Groq)",
+    description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2 90B Vision technology through Groq API. You can also ask follow-up questions about the image.",
     examples=[
+        ["example_image1.jpg", "What safety concerns do you see?"],
+        ["example_image2.jpg", "Is there any visible structural damage?"]
     ],
     cache_examples=True,
     theme="default"