superuser-aisensum commited on
Commit
3e8f2a5
·
verified ·
1 Parent(s): 21edaee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -53
app.py CHANGED
@@ -31,7 +31,7 @@ if not openai_key:
31
  # Initialize Roboflow and OpenAI clients
32
  rf = Roboflow(api_key=roboflow_key)
33
  project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
34
- model = project.version(16).model
35
 
36
  client_openai = OpenAI(api_key=openai_key)
37
 
@@ -50,75 +50,89 @@ def detect_and_estimate_objects(image):
50
  class_count = {}
51
  object_positions = []
52
 
 
 
53
  for prediction in predictions['predictions']:
54
  class_name = prediction['class']
55
- bbox = prediction['x'], prediction['y'], prediction['width'], prediction['height']
56
- object_positions.append(bbox)
57
- class_count[class_name] = class_count.get(class_name, 0) + 1
58
 
59
- logger.info(f"YOLO detected objects: {class_count}")
 
 
 
 
60
 
61
- # Step 2: Create a grid and map detected objects
62
- grid_size = 5
63
- image_width, image_height = image.size
64
- grid = np.zeros((grid_size, grid_size))
65
 
66
- for x, y, w, h in object_positions:
67
- grid_x = min(max(int(x / image_width * grid_size), 0), grid_size - 1)
68
- grid_y = min(max(int(y / image_height * grid_size), 0), grid_size - 1)
69
- grid[grid_y, grid_x] += 1
70
 
71
- logger.info(f"Grid occupancy calculated: {grid.tolist()}")
72
 
73
- # Step 3: Use GPT-4 to estimate occluded objects
74
-
75
  # Encode image to Base64
 
 
 
 
76
  with open(temp_file_path, "rb") as image_file:
77
  base64_image = base64.b64encode(image_file.read()).decode("utf-8")
78
- print(base64_image)
79
- logger.info(f"Base64 encoding successful. Length: {len(base64_image)}")
80
-
81
- # prompt = f"""
82
- # Here is an image encoded in Base64 format: {base64_image} Please analyze this image and estimate the number of occluded objects for each class.
83
- # """
84
 
 
85
  response = client_openai.chat.completions.create(
86
- model="gpt-4o",
87
- messages=[
88
- {
89
- "role": "user",
90
- "content": [
91
  {
92
- "type": "text",
93
- "text": """How many cans are there on this shelf? Take an accurate count and give your answer like this
94
- Total Cans:[Total cans of the nestle products]""",
95
- },
96
- {
97
- "type": "image_url",
98
- "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
99
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ],
101
- }
102
- ],
103
- )
104
  gpt_estimation = response.choices[0].message.content.strip()
105
  print(response.choices[0].message.content)
106
 
107
  logger.info(f"GPT-4 estimation: {gpt_estimation}")
108
 
109
- # Step 4: Combine YOLO and GPT results
110
- result_text = "YOLO Detection Results:\n"
111
- for class_name, count in class_count.items():
112
- result_text += f"{class_name}: {count} objects\n"
113
- result_text += f"\nGPT Estimation for Occluded Objects:\n{gpt_estimation}"
114
-
115
- # Step 5: Visualize grid on the image
116
- draw = ImageDraw.Draw(image)
117
- for i in range(1, grid_size):
118
- draw.line([(i * image_width // grid_size, 0), (i * image_width // grid_size, image_height)], fill="red", width=2)
119
- draw.line([(0, i * image_height // grid_size), (image_width, i * image_height // grid_size)], fill="red", width=2)
120
 
121
- output_path = "/tmp/prediction_grid.jpg"
 
122
  image.save(output_path)
123
 
124
  logger.info("Processed image saved successfully.")
@@ -134,10 +148,10 @@ def detect_and_estimate_objects(image):
134
 
135
  # Create Gradio interface
136
  with gr.Blocks() as iface:
137
- gr.Markdown("### Object Detection and Counting with GPT-4 Assistance")
138
  with gr.Row():
139
  input_image = gr.Image(type="pil", label="Upload Image")
140
- output_image = gr.Image(label="Processed Image with Grid")
141
  output_text = gr.Textbox(label="Results", interactive=False)
142
 
143
  detect_button = gr.Button("Process Image")
@@ -147,4 +161,4 @@ with gr.Blocks() as iface:
147
  outputs=[output_image, output_text]
148
  )
149
 
150
- iface.launch(debug=True)
 
31
  # Initialize Roboflow and OpenAI clients
32
  rf = Roboflow(api_key=roboflow_key)
33
  project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
34
+ model = project.version(16).model # Commented-out YOLO model usage, if necessary
35
 
36
  client_openai = OpenAI(api_key=openai_key)
37
 
 
50
  class_count = {}
51
  object_positions = []
52
 
53
+ # Draw bounding boxes
54
+ draw = ImageDraw.Draw(image)
55
  for prediction in predictions['predictions']:
56
  class_name = prediction['class']
57
+ x, y, width, height = prediction['x'], prediction['y'], prediction['width'], prediction['height']
 
 
58
 
59
+ # Calculate bounding box coordinates
60
+ left = int(x - width / 2)
61
+ top = int(y - height / 2)
62
+ right = int(x + width / 2)
63
+ bottom = int(y + height / 2)
64
 
65
+ # Draw bounding box
66
+ draw.rectangle([left, top, right, bottom], outline="red", width=4)
 
 
67
 
68
+ # Count occurrences of detected classes
69
+ class_count[class_name] = class_count.get(class_name, 0) + 1
70
+ object_positions.append((left, top, right, bottom))
 
71
 
72
+ logger.info(f"YOLO detected objects: {class_count}")
73
 
74
+ # Step 2: Prepare base64 encoding for GPT-4
 
75
  # Encode image to Base64
76
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
77
+ image.save(temp_file, format="JPEG")
78
+ temp_file_path = temp_file.name
79
+
80
  with open(temp_file_path, "rb") as image_file:
81
  base64_image = base64.b64encode(image_file.read()).decode("utf-8")
82
+ logger.info(f"Base64 encoding successful. Length: {len(base64_image)}")
 
 
 
 
 
83
 
84
+ # Step 3: Use GPT-4 to estimate occluded objects
85
  response = client_openai.chat.completions.create(
86
+ model="gpt-4o",
87
+ messages=[
 
 
 
88
  {
89
+ "role": "user",
90
+ "content": [
91
+ {
92
+ "type": "text",
93
+ "text": """Please count the number of cans of the following Nestlé products in the image, including those that are partially obstructed or hidden.
94
+ For partially visible or obstructed cans, please estimate their number based on visible clues and assume that they belong to the same product in front of them.
95
+ Please count accurately the number of cans of the following Nestlé products in the image:
96
+ - Nescafe Mocha
97
+ - Nescafe Latte
98
+ - Nescafe Original
99
+ - Bear Brand
100
+ - Nescafe Cappuccino
101
+ - Nescafe Ice Black
102
+ - Nescafe Coconut Latte
103
+ - Nescafe Caramel
104
+ Please note that some products may be partially visible or obstructed, but are still important to count. Products that are only partially visible or obstructed. Think of them as cans of the same product in front of them.
105
+ Please count the visible cans as well as the occluded ones. For partially hidden cans, assume they are the same product and estimate their presence based on the visible portion.
106
+
107
+ Provide your response in the format:
108
+ Nescafé Mocha: [number]
109
+ Nescafé Latte: [number]
110
+ Nescafé Original: [number]
111
+ Bear Brand: [number]
112
+ Nescafé Cappuccino: [number]
113
+ Nescafé Ice Black: [number]
114
+ Nescafé Coconut Latte: [number]
115
+ Nescafé Caramel: [number]
116
+ Total Nestlé Products: [Total number of Nestlé products]""",
117
+ },
118
+ {
119
+ "type": "image_url",
120
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
121
+ },
122
+ ],
123
+ }
124
  ],
125
+ )
 
 
126
  gpt_estimation = response.choices[0].message.content.strip()
127
  print(response.choices[0].message.content)
128
 
129
  logger.info(f"GPT-4 estimation: {gpt_estimation}")
130
 
131
+ # Step 4: Combine YOLO and GPT results (without YOLO part now)
132
+ result_text = f"Results from GPT-4:\n{gpt_estimation}"
 
 
 
 
 
 
 
 
 
133
 
134
+ # Step 5: Return the result text without the grid visualization
135
+ output_path = "/tmp/prediction_result.jpg"
136
  image.save(output_path)
137
 
138
  logger.info("Processed image saved successfully.")
 
148
 
149
  # Create Gradio interface
150
  with gr.Blocks() as iface:
151
+ gr.Markdown("### Object Detection and Counting with YOLO and GPT-4 Assistance")
152
  with gr.Row():
153
  input_image = gr.Image(type="pil", label="Upload Image")
154
+ output_image = gr.Image(label="Processed Image")
155
  output_text = gr.Textbox(label="Results", interactive=False)
156
 
157
  detect_button = gr.Button("Process Image")
 
161
  outputs=[output_image, output_text]
162
  )
163
 
164
+ iface.launch(debug=True)