Spaces:

AffordableAI
/

Construction_Site_Safety_Analyzer_Llama_3.2_Vision

Sleeping

Construction_Site_Safety_Analyzer_Llama_3.2_Vision

File size: 10,359 Bytes

import os
import base64
import gradio as gr
from PIL import Image
import io
import json
from groq import Groq
import logging
import cv2
import numpy as np

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Load environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    logger.error("GROQ_API_KEY is not set in environment variables")
    raise ValueError("GROQ_API_KEY is not set")

# Initialize Groq client
client = Groq(api_key=GROQ_API_KEY)

def encode_image(image):
    try:
        if isinstance(image, str):  # If image is a file path
            with open(image, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        elif isinstance(image, Image.Image):  # If image is a PIL Image
            buffered = io.BytesIO()
            image.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode('utf-8')
        elif isinstance(image, np.ndarray):  # If image is a numpy array (from video)
            is_success, buffer = cv2.imencode(".png", image)
            if is_success:
                return base64.b64encode(buffer).decode('utf-8')
        else:
            raise ValueError(f"Unsupported image type: {type(image)}")
    except Exception as e:
        logger.error(f"Error encoding image: {str(e)}")
        raise

def analyze_construction_image(images, video=None):
    if not images and video is None:
        logger.warning("No images or video provided")
        return [("No input", "Error: Please upload images or a video for analysis.")]

    try:
        logger.info("Starting analysis")
        results = []

        if images:
            for i, image in enumerate(images):
                image_data_url = f"data:image/png;base64,{encode_image(image)}"
                messages = [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": f"Analyze this construction site image (Image {i+1}/{len(images)}). Identify any safety issues or hazards, categorize them, provide a detailed description, and suggest steps to resolve them."
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": image_data_url
                                }
                            }
                        ]
                    }
                ]
                completion = client.chat.completions.create(
                    model="llama-3.2-90b-vision-preview",
                    messages=messages,
                    temperature=0.7,
                    max_tokens=1000,
                    top_p=1,
                    stream=False,
                    stop=None
                )
                result = completion.choices[0].message.content
                results.append((f"Image {i+1} analysis", result))

        if video:
            cap = cv2.VideoCapture(video.name)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            duration = frame_count / fps
            
            # Analyze frames at 0%, 25%, 50%, 75%, and 100% of the video duration
            for i, time_point in enumerate([0, 0.25, 0.5, 0.75, 1]):
                cap.set(cv2.CAP_PROP_POS_MSEC, time_point * duration * 1000)
                ret, frame = cap.read()
                if ret:
                    image_data_url = f"data:image/png;base64,{encode_image(frame)}"
                    messages = [
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "text",
                                    "text": f"Analyze this frame from a construction site video (Frame {i+1}/5 at {time_point*100}% of video duration). Identify any safety issues or hazards, categorize them, provide a detailed description, and suggest steps to resolve them."
                                },
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": image_data_url
                                    }
                                }
                            ]
                        }
                    ]
                    completion = client.chat.completions.create(
                        model="llama-3.2-90b-vision-preview",
                        messages=messages,
                        temperature=0.7,
                        max_tokens=1000,
                        top_p=1,
                        stream=False,
                        stop=None
                    )
                    result = completion.choices[0].message.content
                    results.append((f"Video frame {i+1} analysis", result))
            cap.release()

        logger.info("Analysis completed successfully")
        return results
    except Exception as e:
        logger.error(f"Error during analysis: {str(e)}")
        logger.error(traceback.format_exc())
        error_message = f"Error during analysis: {str(e)}. Please try again or contact support if the issue persists."
        return [("Analysis error", error_message)]
        
def chat_about_image(message, chat_history):
    try:
        # Prepare the conversation history for the API
        messages = [
            {"role": "system", "content": "You are an AI assistant specialized in analyzing construction site images and answering questions about them. Use the information from the initial analysis to answer user queries."},
        ]
        
        # Add chat history to messages
        for human, ai in chat_history:
            if human:
                messages.append({"role": "user", "content": human})
            if ai:
                messages.append({"role": "assistant", "content": ai})
        
        # Add the new user message
        messages.append({"role": "user", "content": message})
        
        # Make API call
        completion = client.chat.completions.create(
            model="llama-3.2-90b-vision-preview",
            messages=messages,
            temperature=0.7,
            max_tokens=500,
            top_p=1,
            stream=False,
            stop=None
        )
        
        response = completion.choices[0].message.content
        chat_history.append((message, response))
        
        return "", chat_history
    except Exception as e:
        logger.error(f"Error during chat: {str(e)}")
        return "", chat_history + [(message, f"Error: {str(e)}")]


# Custom CSS for improved styling
custom_css = """
.container { max-width: 1200px; margin: auto; padding-top: 1.5rem; }
.header { text-align: center; margin-bottom: 1rem; }
.header h1 { color: #2c3e50; font-size: 2.5rem; }
.subheader { 
    color: #34495e; 
    font-size: 1rem; 
    line-height: 1.2; 
    margin-bottom: 1.5rem; 
    text-align: center; 
    padding: 0 15px;
    white-space: nowrap;
    overflow: hidden;
    text-overflow: ellipsis;
}
.image-container { border: 2px dashed #3498db; border-radius: 10px; padding: 1rem; text-align: center; }
.analyze-button { background-color: #2ecc71 !important; color: white !important; }
.clear-button { background-color: #e74c3c !important; color: white !important; width: 100px !important; }
.chatbot { border: 1px solid #bdc3c7; border-radius: 10px; padding: 1rem; height: 400px; overflow-y: auto; }
.chat-input { border: 1px solid #bdc3c7; border-radius: 5px; padding: 0.5rem; }
.groq-badge { position: fixed; bottom: 10px; right: 10px; background-color: #f39c12; color: white; padding: 5px 10px; border-radius: 5px; font-weight: bold; }
.chat-container { display: flex; flex-direction: column; }
.input-row { display: flex; align-items: center; margin-top: 10px; }
.input-row > div:first-child { flex-grow: 1; margin-right: 10px; }
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
    gr.HTML(
        """
        <div class="container">
            <div class="header">
                <h1>🏗️ Construction Site Safety Analyzer</h1>
            </div>
            <p class="subheader">Enhance workplace safety and compliance with AI-powered image and video analysis using Llama 3.2 90B Vision and expert chat assistance.</p>
        </div>
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.File(label="Upload Construction Site Images", file_count="multiple", type="file", elem_classes="image-container")
            video_input = gr.Video(label="Upload Construction Site Video", elem_classes="image-container")
            analyze_button = gr.Button("🔍 Analyze Safety Hazards", elem_classes="analyze-button")
        with gr.Column(scale=2):
            with gr.Group(elem_classes="chat-container"):
                chatbot = gr.Chatbot(label="Safety Analysis Results and Expert Chat", elem_classes="chatbot")
                with gr.Row(elem_classes="input-row"):
                    msg = gr.Textbox(
                        label="Ask about safety measures or regulations",
                        placeholder="E.g., 'What OSHA guidelines apply to this hazard?'",
                        show_label=False,
                        elem_classes="chat-input"
                    )
                    clear = gr.Button("🗑️ Clear", elem_classes="clear-button")

    def update_chat(history, new_messages):
        history = history or []
        history.extend(new_messages)
        return history

    analyze_button.click(
        analyze_construction_image,
        inputs=[image_input, video_input],
        outputs=[chatbot],
        postprocess=lambda x: update_chat(chatbot.value, x)
    )

    msg.submit(chat_about_image, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

    gr.HTML(
        """
        <div class="groq-badge">Powered by Groq</div>
        """
    )

# Launch the app
if __name__ == "__main__":
    iface.launch(debug=True)