Spaces:

superuser-aisensum
/

object-detection-and-counting

Paused

App Files Files Community

superuser-aisensum commited on Jan 13

Commit

1abc56b

verified ·

1 Parent(s): b7df5d9

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -43

app.py CHANGED Viewed

@@ -1,52 +1,149 @@
 import gradio as gr
 from roboflow import Roboflow
 import tempfile
-import os
-# Inisialisasi Roboflow
-rf = Roboflow(api_key="Otg64Ra6wNOgDyjuhMYU")
 project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
 model = project.version(16).model
-# Fungsi untuk menangani input dan output gambar
-def detect_objects(image):
-    # Menyimpan gambar yang diupload sebagai file sementara
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-        image.save(temp_file, format="JPEG")
-        temp_file_path = temp_file.name
-    # Lakukan prediksi pada gambar
-    predictions = model.predict(temp_file_path, confidence=50, overlap=30).json()
-    # Menghitung jumlah objek per kelas
-    class_count = {}
-    for prediction in predictions['predictions']:
-        class_name = prediction['class']
-        if class_name in class_count:
-            class_count[class_name] += 1
-        else:
-            class_count[class_name] = 1
-    # Menyusun output berupa string hasil perhitungan
-    result_text = "Jumlah objek per kelas:\n"
-    for class_name, count in class_count.items():
-        result_text += f"{class_name}: {count} objek\n"
-    # Menyimpan gambar dengan prediksi
-    output_image = model.predict(temp_file_path, confidence=50, overlap=30).save("/tmp/prediction.jpg")
-    # Hapus file sementara setelah prediksi
-    os.remove(temp_file_path)
-    return "/tmp/prediction.jpg", result_text
-# Membuat antarmuka Gradio
-iface = gr.Interface(
-    fn=detect_objects,                         # Fungsi yang dipanggil saat gambar diupload
-    inputs=gr.Image(type="pil"),               # Input berupa gambar
-    outputs=[gr.Image(), gr.Textbox()],        # Output gambar dan teks
-    live=True                                    # Menampilkan hasil secara langsung
 )
-# Menjalankan antarmuka
-iface.launch()

+import logging
 import gradio as gr
+import os
 from roboflow import Roboflow
+from dotenv import load_dotenv
+from openai import OpenAI
 import tempfile
+import numpy as np
+from PIL import Image, ImageDraw
+import base64
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize API Keys
+roboflow_key = os.getenv("ROBOFLOW_API_KEY")
+if not roboflow_key:
+    raise ValueError("ROBOFLOW_API_KEY is missing. Please add it to the .env file.")
+openai_key = os.getenv("OPENAI_API_KEY")
+if not openai_key:
+    raise ValueError("OPENAI_API_KEY is missing. Please add it to the .env file.")
+# Initialize Roboflow and OpenAI clients
+rf = Roboflow(api_key=roboflow_key)
 project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
 model = project.version(16).model
+client_openai = OpenAI(api_key=openai_key)
+# Function to detect objects and estimate occluded objects
+def detect_and_estimate_objects(image):
+    try:
+        # Save image to temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+            image.save(temp_file, format="JPEG")
+            temp_file_path = temp_file.name
+        logger.info("Image saved successfully for processing.")
+        # Step 1: YOLO detection
+        predictions = model.predict(temp_file_path, confidence=50, overlap=80).json()
+        class_count = {}
+        object_positions = []
+        for prediction in predictions['predictions']:
+            class_name = prediction['class']
+            bbox = prediction['x'], prediction['y'], prediction['width'], prediction['height']
+            object_positions.append(bbox)
+            class_count[class_name] = class_count.get(class_name, 0) + 1
+        logger.info(f"YOLO detected objects: {class_count}")
+        # Step 2: Create a grid and map detected objects
+        grid_size = 5
+        image_width, image_height = image.size
+        grid = np.zeros((grid_size, grid_size))
+        for x, y, w, h in object_positions:
+            grid_x = min(max(int(x / image_width * grid_size), 0), grid_size - 1)
+            grid_y = min(max(int(y / image_height * grid_size), 0), grid_size - 1)
+            grid[grid_y, grid_x] += 1
+        logger.info(f"Grid occupancy calculated: {grid.tolist()}")
+        # Step 3: Use GPT-4 to estimate occluded objects
+        # Encode image to Base64
+        with open(temp_file_path, "rb") as image_file:
+            base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+            print(base64_image)
+        logger.info(f"Base64 encoding successful. Length: {len(base64_image)}")
+        # prompt = f"""
+        # Here is an image encoded in Base64 format: {base64_image} Please analyze this image and estimate the number of occluded objects for each class.
+        # """
+        response = client_openai.chat.completions.create(
+    model="gpt-4o",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "What is in this image?",
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+                },
+            ],
+        }
+    ],
 )
+        gpt_estimation = response.choices[0].message.content.strip()
+        print(response.choices[0].message.content)
+        logger.info(f"GPT-4 estimation: {gpt_estimation}")
+        # Step 4: Combine YOLO and GPT results
+        result_text = "YOLO Detection Results:\n"
+        for class_name, count in class_count.items():
+            result_text += f"{class_name}: {count} objects\n"
+        result_text += f"\nGPT Estimation for Occluded Objects:\n{gpt_estimation}"
+        # Step 5: Visualize grid on the image
+        draw = ImageDraw.Draw(image)
+        for i in range(1, grid_size):
+            draw.line([(i * image_width // grid_size, 0), (i * image_width // grid_size, image_height)], fill="red", width=2)
+            draw.line([(0, i * image_height // grid_size), (image_width, i * image_height // grid_size)], fill="red", width=2)
+        output_path = "/tmp/prediction_grid.jpg"
+        image.save(output_path)
+        logger.info("Processed image saved successfully.")
+        # Cleanup
+        os.remove(temp_file_path)
+        return output_path, result_text
+    except Exception as e:
+        logger.error(f"Error during processing: {e}")
+        return None, f"Error: {e}"
+# Create Gradio interface
+with gr.Blocks() as iface:
+    gr.Markdown("### Object Detection and Counting with GPT-4 Assistance")
+    with gr.Row():
+        input_image = gr.Image(type="pil", label="Upload Image")
+        output_image = gr.Image(label="Processed Image with Grid")
+        output_text = gr.Textbox(label="Results", interactive=False)
+    detect_button = gr.Button("Process Image")
+    detect_button.click(
+        fn=detect_and_estimate_objects,
+        inputs=[input_image],
+        outputs=[output_image, output_text]
+    )
+iface.launch(debug=True)