import streamlit as st
import torch
import numpy as np
from PIL import Image
import requests
from io import BytesIO
from transformers import AutoProcessor, CLIPSegForImageSegmentation
from scipy.ndimage import label, find_objects
import time

# Streamlit 应用标题
st.title("使用图像分割模型分割证件照")

# 输入图像 URL
url = st.text_input("输入图像地址:", "https://i.ibb.co/GRCGQ3n/464.jpg")

# 输入要识别的物体文本
texts_input = st.text_input("输入要检测的对象（以逗号分隔）:", "a card")
texts = [text.strip() for text in texts_input.split(',')]

# 选择面积阈值
area_threshold = st.slider("忽略小区域的面积阈值", 0, 10000, 5000)

# 添加 GPU/CPU 选择按钮
device_option = st.radio("选择设备", ("GPU", "CPU"))

# 提交按钮
if st.button('提交'):
    # 在按钮点击后确定设备
    device = torch.device('cuda' if device_option == 'GPU' and torch.cuda.is_available() else 'cpu')
    st.write(f"设备: {device}")

    start_time = time.time()  # 开始计时

    # 加载模型和处理器到选定设备
    processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
    model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined").to(device)

    # 下载并处理图像
    response = requests.get(url)
    image = Image.open(BytesIO(response.content))

    # 显示原始图像
    st.image(image, caption="原图", use_column_width=True)

    # 处理图像和文本
    inputs = processor(text=texts, images=[image] * len(texts), padding=True, return_tensors="pt").to(device)
    outputs = model(**inputs)

    # 将 logits 转换为概率值并生成掩码
    probabilities = torch.sigmoid(outputs.logits)
    masks = probabilities > 0.5
    masks = masks.detach().cpu().numpy()  # 将数据移回 CPU 以进行后续处理

    # 获取原始图像的 NumPy 数组
    image_np = np.array(image)

    # 全局计数器初始化
    global_counter = 1

    # 对每个物体生成分割图像
    for i, mask in enumerate(masks):
        # 将掩码调整为与原始图像相同的尺寸
        mask_resized = Image.fromarray(mask).resize((image_np.shape[1], image_np.shape[0]), resample=Image.LANCZOS)
        mask_resized = np.array(mask_resized) > 0.5
        
        # 标记连通区域
        labeled_mask, num_features = label(mask_resized)
        object_slices = find_objects(labeled_mask)
        
        for j in range(1, num_features + 1):
            # 获取当前连通区域的边界框
            object_slice = object_slices[j-1]
            area = (object_slice[0].stop - object_slice[0].start) * (object_slice[1].stop - object_slice[1].start)
            
            # 忽略面积小于阈值的区域
            if area < area_threshold:
                continue
            
            # 创建一个矩形掩码
            single_object_mask = np.zeros_like(mask_resized)
            single_object_mask[object_slice] = 1
            
            # 仅保留原始图像中与当前矩形掩码匹配的区域
            single_segmented_image = np.zeros_like(image_np)
            single_segmented_image[single_object_mask.astype(bool)] = image_np[single_object_mask.astype(bool)]
            
            # 去除黑色背景（即裁剪图像到非黑色区域）
            non_black_area = np.any(single_segmented_image > 0, axis=-1)
            if non_black_area.any():
                rows = np.any(non_black_area, axis=1)
                cols = np.any(non_black_area, axis=0)
                rmin, rmax = np.where(rows)[0][[0, -1]]
                cmin, cmax = np.where(cols)[0][[0, -1]]
                
                # 裁剪图像
                cropped_image = single_segmented_image[rmin:rmax+1, cmin:cmax+1]
                
                # 显示裁剪后的图像
                st.image(cropped_image, caption=f"{texts[i]} - 图 {global_counter}", use_column_width=True)

                # 增加全局计数器
                global_counter += 1

    end_time = time.time()  # 结束计时
    elapsed_time = end_time - start_time  # 计算运行时间

    # 显示程序运行时间
    st.write(f"程序运行时间: {elapsed_time:.2f} 秒")