Spaces:

superuser-aisensum
/

object-detection-and-counting

Paused

App Files Files Community

superuser-aisensum commited on Jan 14

Commit

3e8f2a5

verified ·

1 Parent(s): 21edaee

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -53

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ if not openai_key:
 # Initialize Roboflow and OpenAI clients
 rf = Roboflow(api_key=roboflow_key)
 project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
-model = project.version(16).model
 client_openai = OpenAI(api_key=openai_key)
@@ -50,75 +50,89 @@ def detect_and_estimate_objects(image):
         class_count = {}
         object_positions = []
         for prediction in predictions['predictions']:
             class_name = prediction['class']
-            bbox = prediction['x'], prediction['y'], prediction['width'], prediction['height']
-            object_positions.append(bbox)
-            class_count[class_name] = class_count.get(class_name, 0) + 1
-        logger.info(f"YOLO detected objects: {class_count}")
-        # Step 2: Create a grid and map detected objects
-        grid_size = 5
-        image_width, image_height = image.size
-        grid = np.zeros((grid_size, grid_size))
-        for x, y, w, h in object_positions:
-            grid_x = min(max(int(x / image_width * grid_size), 0), grid_size - 1)
-            grid_y = min(max(int(y / image_height * grid_size), 0), grid_size - 1)
-            grid[grid_y, grid_x] += 1
-        logger.info(f"Grid occupancy calculated: {grid.tolist()}")
-        # Step 3: Use GPT-4 to estimate occluded objects
         # Encode image to Base64
         with open(temp_file_path, "rb") as image_file:
             base64_image = base64.b64encode(image_file.read()).decode("utf-8")
-            print(base64_image)
-        logger.info(f"Base64 encoding successful. Length: {len(base64_image)}")
-        # prompt = f"""
-        # Here is an image encoded in Base64 format: {base64_image} Please analyze this image and estimate the number of occluded objects for each class.
-        # """
         response = client_openai.chat.completions.create(
-    model="gpt-4o",
-    messages=[
-        {
-            "role": "user",
-            "content": [
                 {
-                    "type": "text",
-                    "text": """How many cans are there on this shelf? Take an accurate count and give your answer like this
-                    Total Cans:[Total cans of the nestle products]""",
-                },
-                {
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
-                },
             ],
-        }
-    ],
-)
         gpt_estimation = response.choices[0].message.content.strip()
         print(response.choices[0].message.content)
         logger.info(f"GPT-4 estimation: {gpt_estimation}")
-        # Step 4: Combine YOLO and GPT results
-        result_text = "YOLO Detection Results:\n"
-        for class_name, count in class_count.items():
-            result_text += f"{class_name}: {count} objects\n"
-        result_text += f"\nGPT Estimation for Occluded Objects:\n{gpt_estimation}"
-        # Step 5: Visualize grid on the image
-        draw = ImageDraw.Draw(image)
-        for i in range(1, grid_size):
-            draw.line([(i * image_width // grid_size, 0), (i * image_width // grid_size, image_height)], fill="red", width=2)
-            draw.line([(0, i * image_height // grid_size), (image_width, i * image_height // grid_size)], fill="red", width=2)
-        output_path = "/tmp/prediction_grid.jpg"
         image.save(output_path)
         logger.info("Processed image saved successfully.")
@@ -134,10 +148,10 @@ def detect_and_estimate_objects(image):
 # Create Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("### Object Detection and Counting with GPT-4 Assistance")
     with gr.Row():
         input_image = gr.Image(type="pil", label="Upload Image")
-        output_image = gr.Image(label="Processed Image with Grid")
         output_text = gr.Textbox(label="Results", interactive=False)
     detect_button = gr.Button("Process Image")
@@ -147,4 +161,4 @@ with gr.Blocks() as iface:
         outputs=[output_image, output_text]
     )
-iface.launch(debug=True)

 # Initialize Roboflow and OpenAI clients
 rf = Roboflow(api_key=roboflow_key)
 project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
+model = project.version(16).model  # Commented-out YOLO model usage, if necessary
 client_openai = OpenAI(api_key=openai_key)
         class_count = {}
         object_positions = []
+        # Draw bounding boxes
+        draw = ImageDraw.Draw(image)
         for prediction in predictions['predictions']:
             class_name = prediction['class']
+            x, y, width, height = prediction['x'], prediction['y'], prediction['width'], prediction['height']
+            # Calculate bounding box coordinates
+            left = int(x - width / 2)
+            top = int(y - height / 2)
+            right = int(x + width / 2)
+            bottom = int(y + height / 2)
+            # Draw bounding box
+            draw.rectangle([left, top, right, bottom], outline="red", width=4)
+            # Count occurrences of detected classes
+            class_count[class_name] = class_count.get(class_name, 0) + 1
+            object_positions.append((left, top, right, bottom))
+        logger.info(f"YOLO detected objects: {class_count}")
+        # Step 2: Prepare base64 encoding for GPT-4
         # Encode image to Base64
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+            image.save(temp_file, format="JPEG")
+            temp_file_path = temp_file.name
         with open(temp_file_path, "rb") as image_file:
             base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+            logger.info(f"Base64 encoding successful. Length: {len(base64_image)}")
+        # Step 3: Use GPT-4 to estimate occluded objects
         response = client_openai.chat.completions.create(
+            model="gpt-4o",
+            messages=[
                 {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": """Please count the number of cans of the following Nestlé products in the image, including those that are partially obstructed or hidden.
+                            For partially visible or obstructed cans, please estimate their number based on visible clues and assume that they belong to the same product in front of them.
+                            Please count accurately the number of cans of the following Nestlé products in the image:
+                            - Nescafe Mocha
+                            - Nescafe Latte
+                            - Nescafe Original
+                            - Bear Brand
+                            - Nescafe Cappuccino
+                            - Nescafe Ice Black
+                            - Nescafe Coconut Latte
+                            - Nescafe Caramel
+                            Please note that some products may be partially visible or obstructed, but are still important to count. Products that are only partially visible or obstructed. Think of them as cans of the same product in front of them.
+                            Please count the visible cans as well as the occluded ones. For partially hidden cans, assume they are the same product and estimate their presence based on the visible portion.
+                            Provide your response in the format:
+                            Nescafé Mocha: [number]
+                            Nescafé Latte: [number]
+                            Nescafé Original: [number]
+                            Bear Brand: [number]
+                            Nescafé Cappuccino: [number]
+                            Nescafé Ice Black: [number]
+                            Nescafé Coconut Latte: [number]
+                            Nescafé Caramel: [number]
+                            Total Nestlé Products: [Total number of Nestlé products]""",
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+                        },
+                    ],
+                }
             ],
+        )
         gpt_estimation = response.choices[0].message.content.strip()
         print(response.choices[0].message.content)
         logger.info(f"GPT-4 estimation: {gpt_estimation}")
+        # Step 4: Combine YOLO and GPT results (without YOLO part now)
+        result_text = f"Results from GPT-4:\n{gpt_estimation}"
+        # Step 5: Return the result text without the grid visualization
+        output_path = "/tmp/prediction_result.jpg"
         image.save(output_path)
         logger.info("Processed image saved successfully.")
 # Create Gradio interface
 with gr.Blocks() as iface:
+    gr.Markdown("### Object Detection and Counting with YOLO and GPT-4 Assistance")
     with gr.Row():
         input_image = gr.Image(type="pil", label="Upload Image")
+        output_image = gr.Image(label="Processed Image")
         output_text = gr.Textbox(label="Results", interactive=False)
     detect_button = gr.Button("Process Image")
         outputs=[output_image, output_text]
     )
+iface.launch(debug=True)