supersolar commited on
Commit
3ee66e9
·
verified ·
1 Parent(s): e75372e

Create 2.py

Browse files
Files changed (1) hide show
  1. 2.py +172 -0
2.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Tuple, Optional
3
+ import shutil
4
+ import os
5
+ import cv2
6
+ import numpy as np
7
+ import spaces
8
+ import supervision as sv
9
+ import torch
10
+ from PIL import Image
11
+ from tqdm import tqdm
12
+ from utils.video import generate_unique_name, create_directory, delete_directory
13
+ from utils.florencegpu2 import load_florence_model, run_florence_inference, \
14
+ FLORENCE_DETAILED_CAPTION_TASK, \
15
+ FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
16
+ from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
17
+ IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
18
+ from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
19
+ DEVICE = torch.device("cuda")
20
+ DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
21
+
22
+ # DEVICE = torch.device("cpu")
23
+
24
+ torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
25
+ if torch.cuda.get_device_properties(0).major >= 8:
26
+ torch.backends.cuda.matmul.allow_tf32 = True
27
+ torch.backends.cudnn.allow_tf32 = True
28
+
29
+
30
+ FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
31
+ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
32
+ SAM_VIDEO_MODEL = load_sam_video_model(device=DEVICE)
33
+
34
+
35
+
36
+ texts = ['the table', 'all person','ball']
37
+ from PIL import Image
38
+ import supervision as sv
39
+
40
+ def detect_objects_in_image(image_input_path, texts):
41
+ # 加载图像
42
+ image_input = Image.open(image_input_path)
43
+
44
+ # 初始化检测列表
45
+ detections_list = []
46
+
47
+ # 对每个文本进行检测
48
+ for text in texts:
49
+ _, result = run_florence_inference(
50
+ model=FLORENCE_MODEL,
51
+ processor=FLORENCE_PROCESSOR,
52
+ device=DEVICE,
53
+ image=image_input,
54
+ task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
55
+ text=text
56
+ )
57
+
58
+ # 从结果中构建监督检测对象
59
+ detections = sv.Detections.from_lmm(
60
+ lmm=sv.LMM.FLORENCE_2,
61
+ result=result,
62
+ resolution_wh=image_input.size
63
+ )
64
+
65
+ # 运行 SAM 推理
66
+ detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
67
+
68
+ # 将检测结果添加到列表中
69
+ detections_list.append(detections)
70
+
71
+ # 合并所有检测结果
72
+ detections = sv.Detections.merge(detections_list)
73
+
74
+ # 再次运行 SAM 推理
75
+ detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
76
+
77
+ return detections
78
+ # @title #合并遮罩加模糊merge_image_with_mask
79
+ import numpy as np
80
+ import cv2
81
+ import os
82
+ from PIL import Image, ImageFilter
83
+
84
+ def merge_image_with_mask(image_input_path, detections, output_folder):
85
+ # 创建输出文件夹
86
+ if not os.path.exists(output_folder):
87
+ os.makedirs(output_folder)
88
+
89
+ # 提取图片文件名
90
+ image_name = os.path.basename(image_input_path)
91
+ output_path = os.path.join(output_folder, image_name)
92
+
93
+ # 创建掩码文件夹
94
+ mask_folder = 'mask2'
95
+ if not os.path.exists(mask_folder):
96
+ os.makedirs(mask_folder)
97
+
98
+ # 合并掩码
99
+ combined_mask = np.zeros_like(detections.mask[0], dtype=np.uint8)
100
+ for mask in detections.mask:
101
+ combined_mask += mask
102
+ combined_mask = np.clip(combined_mask, 0, 255)
103
+ combined_mask = combined_mask.astype(np.uint8)
104
+
105
+ # 膨胀掩码
106
+ kernel = np.ones((6, 6), np.uint8)
107
+ dilated_mask = cv2.dilate(combined_mask, kernel, iterations=1)
108
+
109
+ # 保存膨胀后的掩码
110
+ #mask_path = os.path.join(mask_folder, 'test1.png')
111
+ #cv2.imwrite(mask_path, dilated_mask * 255)
112
+
113
+ # 读取原始图像
114
+ original_image = cv2.imread(image_input_path)
115
+
116
+ # 读取遮罩图片
117
+ #mask_image = cv2.imread(mask_path)
118
+
119
+ # 确保原始图片和遮罩图片尺寸一致
120
+ #assert original_image.shape == mask_image.shape, "The images must have the same dimensions."
121
+
122
+ # 使用掩膜从原始图片中提取部分区域
123
+ masked_image = cv2.bitwise_and(original_image, original_image, mask=dilated_mask)
124
+ # 将掩膜应用于原始图片
125
+ blurred_image = cv2.GaussianBlur(original_image, (21, 21), 500) # 使用较大的核大小进行模糊
126
+ # 将提取的部分区域叠加到模糊后的图片上
127
+ blurred_image = cv2.bitwise_and(blurred_image, blurred_image, mask=~dilated_mask)
128
+ # 将提取的部分区域叠加到模糊后的图片上
129
+ result = np.where(dilated_mask[:, :, None] > 0, masked_image, blurred_image)
130
+
131
+ # 保存合并后的图片
132
+ cv2.imwrite(output_path, result)
133
+ # @title #进度条批量处理文件夹process_images_in_folder(input_folder)
134
+ from tqdm import tqdm
135
+ import shutil
136
+ def process_images_in_folder(input_folder):
137
+ # 确保输出文件夹存在
138
+ output_folder = 'okframe2'
139
+ if not os.path.exists(output_folder):
140
+ os.makedirs(output_folder)
141
+ shutil.rmtree('okframe2')
142
+ output_folder = 'okframe2'
143
+ if not os.path.exists(output_folder):
144
+ os.makedirs(output_folder)
145
+
146
+ # 获取文件夹中的所有文件
147
+ files = [f for f in os.listdir(input_folder) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]
148
+
149
+ # 使用 tqdm 显示进度条
150
+ for filename in tqdm(files, desc="Processing Images"):
151
+ image_input_path = os.path.join(input_folder, filename)
152
+
153
+ # 检测对象
154
+ detections = detect_objects_in_image(
155
+ image_input_path=image_input_path,
156
+ texts=texts
157
+ )
158
+
159
+ # 合并图像
160
+ merge_image_with_mask(
161
+ image_input_path=image_input_path,
162
+ detections=detections,
163
+ output_folder=output_folder
164
+ )
165
+
166
+ # 使用示例
167
+ video_name = video_input_path.split('/')[-1].split('.')[0]
168
+ input_folder = 'frame2'
169
+ process_images_in_folder(input_folder)
170
+
171
+
172
+