superuser-aisensum commited on
Commit
1abc56b
·
verified ·
1 Parent(s): b7df5d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -43
app.py CHANGED
@@ -1,52 +1,149 @@
 
1
  import gradio as gr
 
2
  from roboflow import Roboflow
 
 
3
  import tempfile
4
- import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Inisialisasi Roboflow
7
- rf = Roboflow(api_key="Otg64Ra6wNOgDyjuhMYU")
8
  project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
9
  model = project.version(16).model
10
 
11
- # Fungsi untuk menangani input dan output gambar
12
- def detect_objects(image):
13
- # Menyimpan gambar yang diupload sebagai file sementara
14
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
15
- image.save(temp_file, format="JPEG")
16
- temp_file_path = temp_file.name
17
-
18
- # Lakukan prediksi pada gambar
19
- predictions = model.predict(temp_file_path, confidence=50, overlap=30).json()
20
-
21
- # Menghitung jumlah objek per kelas
22
- class_count = {}
23
- for prediction in predictions['predictions']:
24
- class_name = prediction['class']
25
- if class_name in class_count:
26
- class_count[class_name] += 1
27
- else:
28
- class_count[class_name] = 1
29
-
30
- # Menyusun output berupa string hasil perhitungan
31
- result_text = "Jumlah objek per kelas:\n"
32
- for class_name, count in class_count.items():
33
- result_text += f"{class_name}: {count} objek\n"
34
-
35
- # Menyimpan gambar dengan prediksi
36
- output_image = model.predict(temp_file_path, confidence=50, overlap=30).save("/tmp/prediction.jpg")
37
-
38
- # Hapus file sementara setelah prediksi
39
- os.remove(temp_file_path)
40
-
41
- return "/tmp/prediction.jpg", result_text
42
-
43
- # Membuat antarmuka Gradio
44
- iface = gr.Interface(
45
- fn=detect_objects, # Fungsi yang dipanggil saat gambar diupload
46
- inputs=gr.Image(type="pil"), # Input berupa gambar
47
- outputs=[gr.Image(), gr.Textbox()], # Output gambar dan teks
48
- live=True # Menampilkan hasil secara langsung
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # Menjalankan antarmuka
52
- iface.launch()
 
1
+ import logging
2
  import gradio as gr
3
+ import os
4
  from roboflow import Roboflow
5
+ from dotenv import load_dotenv
6
+ from openai import OpenAI
7
  import tempfile
8
+ import numpy as np
9
+ from PIL import Image, ImageDraw
10
+ import base64
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Configure logging
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
19
+ )
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Initialize API Keys
23
+ roboflow_key = os.getenv("ROBOFLOW_API_KEY")
24
+ if not roboflow_key:
25
+ raise ValueError("ROBOFLOW_API_KEY is missing. Please add it to the .env file.")
26
+
27
+ openai_key = os.getenv("OPENAI_API_KEY")
28
+ if not openai_key:
29
+ raise ValueError("OPENAI_API_KEY is missing. Please add it to the .env file.")
30
 
31
+ # Initialize Roboflow and OpenAI clients
32
+ rf = Roboflow(api_key=roboflow_key)
33
  project = rf.workspace("alat-pelindung-diri").project("nescafe-4base")
34
  model = project.version(16).model
35
 
36
+ client_openai = OpenAI(api_key=openai_key)
37
+
38
+ # Function to detect objects and estimate occluded objects
39
+ def detect_and_estimate_objects(image):
40
+ try:
41
+ # Save image to temporary file
42
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
43
+ image.save(temp_file, format="JPEG")
44
+ temp_file_path = temp_file.name
45
+
46
+ logger.info("Image saved successfully for processing.")
47
+
48
+ # Step 1: YOLO detection
49
+ predictions = model.predict(temp_file_path, confidence=50, overlap=80).json()
50
+ class_count = {}
51
+ object_positions = []
52
+
53
+ for prediction in predictions['predictions']:
54
+ class_name = prediction['class']
55
+ bbox = prediction['x'], prediction['y'], prediction['width'], prediction['height']
56
+ object_positions.append(bbox)
57
+ class_count[class_name] = class_count.get(class_name, 0) + 1
58
+
59
+ logger.info(f"YOLO detected objects: {class_count}")
60
+
61
+ # Step 2: Create a grid and map detected objects
62
+ grid_size = 5
63
+ image_width, image_height = image.size
64
+ grid = np.zeros((grid_size, grid_size))
65
+
66
+ for x, y, w, h in object_positions:
67
+ grid_x = min(max(int(x / image_width * grid_size), 0), grid_size - 1)
68
+ grid_y = min(max(int(y / image_height * grid_size), 0), grid_size - 1)
69
+ grid[grid_y, grid_x] += 1
70
+
71
+ logger.info(f"Grid occupancy calculated: {grid.tolist()}")
72
+
73
+ # Step 3: Use GPT-4 to estimate occluded objects
74
+
75
+ # Encode image to Base64
76
+ with open(temp_file_path, "rb") as image_file:
77
+ base64_image = base64.b64encode(image_file.read()).decode("utf-8")
78
+ print(base64_image)
79
+ logger.info(f"Base64 encoding successful. Length: {len(base64_image)}")
80
+
81
+ # prompt = f"""
82
+ # Here is an image encoded in Base64 format: {base64_image} Please analyze this image and estimate the number of occluded objects for each class.
83
+ # """
84
+
85
+ response = client_openai.chat.completions.create(
86
+ model="gpt-4o",
87
+ messages=[
88
+ {
89
+ "role": "user",
90
+ "content": [
91
+ {
92
+ "type": "text",
93
+ "text": "What is in this image?",
94
+ },
95
+ {
96
+ "type": "image_url",
97
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
98
+ },
99
+ ],
100
+ }
101
+ ],
102
  )
103
+ gpt_estimation = response.choices[0].message.content.strip()
104
+ print(response.choices[0].message.content)
105
+
106
+ logger.info(f"GPT-4 estimation: {gpt_estimation}")
107
+
108
+ # Step 4: Combine YOLO and GPT results
109
+ result_text = "YOLO Detection Results:\n"
110
+ for class_name, count in class_count.items():
111
+ result_text += f"{class_name}: {count} objects\n"
112
+ result_text += f"\nGPT Estimation for Occluded Objects:\n{gpt_estimation}"
113
+
114
+ # Step 5: Visualize grid on the image
115
+ draw = ImageDraw.Draw(image)
116
+ for i in range(1, grid_size):
117
+ draw.line([(i * image_width // grid_size, 0), (i * image_width // grid_size, image_height)], fill="red", width=2)
118
+ draw.line([(0, i * image_height // grid_size), (image_width, i * image_height // grid_size)], fill="red", width=2)
119
+
120
+ output_path = "/tmp/prediction_grid.jpg"
121
+ image.save(output_path)
122
+
123
+ logger.info("Processed image saved successfully.")
124
+
125
+ # Cleanup
126
+ os.remove(temp_file_path)
127
+
128
+ return output_path, result_text
129
+
130
+ except Exception as e:
131
+ logger.error(f"Error during processing: {e}")
132
+ return None, f"Error: {e}"
133
+
134
+ # Create Gradio interface
135
+ with gr.Blocks() as iface:
136
+ gr.Markdown("### Object Detection and Counting with GPT-4 Assistance")
137
+ with gr.Row():
138
+ input_image = gr.Image(type="pil", label="Upload Image")
139
+ output_image = gr.Image(label="Processed Image with Grid")
140
+ output_text = gr.Textbox(label="Results", interactive=False)
141
+
142
+ detect_button = gr.Button("Process Image")
143
+ detect_button.click(
144
+ fn=detect_and_estimate_objects,
145
+ inputs=[input_image],
146
+ outputs=[output_image, output_text]
147
+ )
148
 
149
+ iface.launch(debug=True)