import streamlit as st import torch import numpy as np from PIL import Image import requests from io import BytesIO from transformers import AutoProcessor, CLIPSegForImageSegmentation from scipy.ndimage import label, find_objects import time # Streamlit 应用标题 st.title("使用图像分割模型分割证件照") # 输入图像 URL url = st.text_input("输入图像地址:", "https://i.ibb.co/GRCGQ3n/464.jpg") # 输入要识别的物体文本 texts_input = st.text_input("输入要检测的对象(以逗号分隔):", "a card") texts = [text.strip() for text in texts_input.split(',')] # 选择面积阈值 area_threshold = st.slider("忽略小区域的面积阈值", 0, 10000, 5000) # 添加 GPU/CPU 选择按钮 device_option = st.radio("选择设备", ("GPU", "CPU")) # 提交按钮 if st.button('提交'): # 在按钮点击后确定设备 device = torch.device('cuda' if device_option == 'GPU' and torch.cuda.is_available() else 'cpu') st.write(f"设备: {device}") start_time = time.time() # 开始计时 # 加载模型和处理器到选定设备 processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined").to(device) # 下载并处理图像 response = requests.get(url) image = Image.open(BytesIO(response.content)) # 显示原始图像 st.image(image, caption="原图", use_column_width=True) # 处理图像和文本 inputs = processor(text=texts, images=[image] * len(texts), padding=True, return_tensors="pt").to(device) outputs = model(**inputs) # 将 logits 转换为概率值并生成掩码 probabilities = torch.sigmoid(outputs.logits) masks = probabilities > 0.5 masks = masks.detach().cpu().numpy() # 将数据移回 CPU 以进行后续处理 # 获取原始图像的 NumPy 数组 image_np = np.array(image) # 全局计数器初始化 global_counter = 1 # 对每个物体生成分割图像 for i, mask in enumerate(masks): # 将掩码调整为与原始图像相同的尺寸 mask_resized = Image.fromarray(mask).resize((image_np.shape[1], image_np.shape[0]), resample=Image.LANCZOS) mask_resized = np.array(mask_resized) > 0.5 # 标记连通区域 labeled_mask, num_features = label(mask_resized) object_slices = find_objects(labeled_mask) for j in range(1, num_features + 1): # 获取当前连通区域的边界框 object_slice = object_slices[j-1] area = (object_slice[0].stop - object_slice[0].start) * (object_slice[1].stop - object_slice[1].start) # 忽略面积小于阈值的区域 if area < area_threshold: continue # 创建一个矩形掩码 single_object_mask = np.zeros_like(mask_resized) single_object_mask[object_slice] = 1 # 仅保留原始图像中与当前矩形掩码匹配的区域 single_segmented_image = np.zeros_like(image_np) single_segmented_image[single_object_mask.astype(bool)] = image_np[single_object_mask.astype(bool)] # 去除黑色背景(即裁剪图像到非黑色区域) non_black_area = np.any(single_segmented_image > 0, axis=-1) if non_black_area.any(): rows = np.any(non_black_area, axis=1) cols = np.any(non_black_area, axis=0) rmin, rmax = np.where(rows)[0][[0, -1]] cmin, cmax = np.where(cols)[0][[0, -1]] # 裁剪图像 cropped_image = single_segmented_image[rmin:rmax+1, cmin:cmax+1] # 显示裁剪后的图像 st.image(cropped_image, caption=f"{texts[i]} - 图 {global_counter}", use_column_width=True) # 增加全局计数器 global_counter += 1 end_time = time.time() # 结束计时 elapsed_time = end_time - start_time # 计算运行时间 # 显示程序运行时间 st.write(f"程序运行时间: {elapsed_time:.2f} 秒")