|
import streamlit as st
|
|
import torch
|
|
import torchvision.transforms as T
|
|
import numpy as np
|
|
import cv2
|
|
from PIL import Image, UnidentifiedImageError
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
d2_imported_successfully = False
|
|
try:
|
|
import detectron2
|
|
from detectron2.engine import DefaultPredictor
|
|
from detectron2.config import get_cfg
|
|
from detectron2 import model_zoo
|
|
from detectron2.utils.visualizer import Visualizer, ColorMode
|
|
from detectron2.data import MetadataCatalog
|
|
from detectron2.structures import Boxes
|
|
d2_imported_successfully = True
|
|
print("Detectron2 utilities imported successfully.")
|
|
except ImportError:
|
|
st.error("Detectron2 not found or not installed correctly. Please ensure it's installed in your environment.")
|
|
print("❌ Failed to import Detectron2 utilities.")
|
|
except Exception as e:
|
|
st.error(f"An error occurred during Detectron2 imports: {e}")
|
|
print(f"❌ An error occurred during Detectron2 imports: {e}")
|
|
|
|
|
|
|
|
from torchvision import models as torchvision_models
|
|
import torch.nn as nn
|
|
|
|
|
|
|
|
|
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
CNN_INPUT_SIZE = 224
|
|
IMAGENET_MEAN = [0.485, 0.456, 0.406]
|
|
IMAGENET_STD = [0.229, 0.224, 0.225]
|
|
|
|
MODEL_PATH = r"pix3d_dimension_estimator_mask_crop.pth"
|
|
OUTPUT_DIR = 'streamlit_d2_output'
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
def create_dimension_estimator_cnn_for_inference(num_outputs=4):
|
|
model = torchvision_models.resnet50(weights=None)
|
|
num_ftrs = model.fc.in_features
|
|
model.fc = nn.Sequential(
|
|
nn.Linear(num_ftrs, 512),
|
|
nn.ReLU(),
|
|
nn.Dropout(0.5),
|
|
nn.Linear(512, num_outputs)
|
|
)
|
|
return model
|
|
|
|
@st.cache_resource
|
|
def load_dimension_model():
|
|
model = None
|
|
if not os.path.exists(MODEL_PATH):
|
|
st.error(f"Dimension estimation model not found at {MODEL_PATH}. Please check the path.")
|
|
return None
|
|
try:
|
|
model = create_dimension_estimator_cnn_for_inference()
|
|
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
|
model.to(DEVICE)
|
|
model.eval()
|
|
print(f"Dimension estimation model loaded from {MODEL_PATH}")
|
|
except Exception as e:
|
|
st.error(f"Error loading dimension estimation model: {e}")
|
|
return None
|
|
return model
|
|
|
|
|
|
|
|
|
|
@st.cache_resource
|
|
def load_detectron2_model():
|
|
if not d2_imported_successfully:
|
|
return None, None
|
|
try:
|
|
cfg = get_cfg()
|
|
|
|
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
|
|
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
|
|
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
|
|
if not torch.cuda.is_available():
|
|
cfg.MODEL.DEVICE = "cpu"
|
|
else:
|
|
cfg.MODEL.DEVICE = "cuda"
|
|
predictor = DefaultPredictor(cfg)
|
|
print("Detectron2 predictor created.")
|
|
return predictor, cfg
|
|
except Exception as e:
|
|
st.error(f"Error loading Detectron2 model: {e}")
|
|
return None, None
|
|
|
|
|
|
|
|
|
|
def get_largest_instance_index(instances):
|
|
"""Returns the index of the largest instance based on mask area or box area."""
|
|
if not len(instances):
|
|
return -1
|
|
|
|
if instances.has("pred_masks"):
|
|
areas = instances.pred_masks.sum(dim=(1,2))
|
|
if len(areas) > 0:
|
|
return areas.argmax().item()
|
|
elif instances.has("pred_boxes"):
|
|
boxes_tensor = instances.pred_boxes.tensor
|
|
areas = (boxes_tensor[:, 2] - boxes_tensor[:, 0]) * (boxes_tensor[:, 3] - boxes_tensor[:, 1])
|
|
if len(areas) > 0:
|
|
return areas.argmax().item()
|
|
return 0
|
|
|
|
def crop_from_mask(image_np_rgb, mask_tensor):
|
|
"""Crops an object from an image using a boolean mask tensor."""
|
|
mask_np = mask_tensor.cpu().numpy().astype(np.uint8)
|
|
if mask_np.sum() == 0: return None
|
|
|
|
rows = np.any(mask_np, axis=1)
|
|
cols = np.any(mask_np, axis=0)
|
|
if not np.any(rows) or not np.any(cols): return None
|
|
|
|
ymin, ymax = np.where(rows)[0][[0, -1]]
|
|
xmin, xmax = np.where(cols)[0][[0, -1]]
|
|
|
|
|
|
padding = 5
|
|
ymin = max(0, ymin - padding)
|
|
xmin = max(0, xmin - padding)
|
|
ymax = min(image_np_rgb.shape[0] - 1, ymax + padding)
|
|
xmax = min(image_np_rgb.shape[1] - 1, xmax + padding)
|
|
|
|
|
|
if ymin >= ymax or xmin >= xmax : return None
|
|
cropped_image = image_np_rgb[ymin:ymax+1, xmin:xmax+1, :]
|
|
return cropped_image
|
|
|
|
def predict_dimensions_cnn(image_patch_np_rgb, model):
|
|
"""Predicts dimensions using the custom CNN."""
|
|
if model is None:
|
|
return {"L": "N/A", "W": "N/A", "H": "N/A", "V": "N/A", "Note": "DimCNN not loaded"}
|
|
try:
|
|
if image_patch_np_rgb.dtype != np.uint8:
|
|
image_patch_np_rgb = image_patch_np_rgb.astype(np.uint8)
|
|
|
|
transform = T.Compose([
|
|
T.ToPILImage(),
|
|
T.Resize((CNN_INPUT_SIZE, CNN_INPUT_SIZE)),
|
|
T.ToTensor(),
|
|
T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
|
|
])
|
|
input_tensor = transform(image_patch_np_rgb).unsqueeze(0).to(DEVICE)
|
|
with torch.no_grad():
|
|
pred = model(input_tensor)
|
|
dims = pred.squeeze().cpu().tolist()
|
|
|
|
if not isinstance(dims, list): dims = [dims]
|
|
while len(dims) < 4: dims.append(0.0)
|
|
|
|
|
|
L_cm = dims[0] * 100
|
|
W_cm = dims[1] * 100
|
|
H_cm = dims[2] * 100
|
|
V_cm3 = dims[3] * 1_000_000
|
|
|
|
return {
|
|
"Length (cm)": f"{L_cm:.1f}",
|
|
"Width (cm)": f"{W_cm:.1f}",
|
|
"Height (cm)": f"{H_cm:.1f}",
|
|
"Volume (cm³)": f"{V_cm3:.1f}",
|
|
"Note": "CustomCNN (Pix3D Scale)"
|
|
}
|
|
except Exception as e:
|
|
print(f"Error in predict_dimensions_cnn: {e}")
|
|
return {"L": "N/A", "W": "N/A", "H": "N/A", "V": "N/A", "Note": "CNN Predict Error"}
|
|
|
|
|
|
|
|
|
|
st.set_page_config(layout="wide", page_title="Object Dimension Estimator")
|
|
st.title("📦 Object Dimension & Volume Estimation")
|
|
st.write("Upload an image. The system will detect objects using Detectron2, draw bounding boxes and masks, and estimate dimensions for the largest detected object using a custom-trained CNN.")
|
|
|
|
|
|
dim_model = load_dimension_model()
|
|
if d2_imported_successfully:
|
|
d2_predictor, d2_cfg = load_detectron2_model()
|
|
if d2_cfg is not None:
|
|
|
|
try:
|
|
d2_metadata = MetadataCatalog.get(d2_cfg.DATASETS.TRAIN[0] if d2_cfg.DATASETS.TRAIN else "coco_2017_val")
|
|
except KeyError:
|
|
st.warning("Default COCO metadata not found. Trying 'coco_2017_train'. Class names might be generic if this also fails.")
|
|
try:
|
|
d2_metadata = MetadataCatalog.get("coco_2017_train")
|
|
except KeyError:
|
|
st.warning("Could not load standard COCO metadata. Using dummy metadata.")
|
|
dummy_name = "streamlit_dummy_coco_dataset_main"
|
|
if dummy_name not in MetadataCatalog.list():
|
|
MetadataCatalog.get(dummy_name).thing_classes = [f"class_{i}" for i in range(80)]
|
|
d2_metadata = MetadataCatalog.get(dummy_name)
|
|
else:
|
|
d2_metadata = None
|
|
else:
|
|
d2_predictor = None
|
|
d2_cfg = None
|
|
d2_metadata = None
|
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a single image (JPG/PNG)", accept_multiple_files=False, type=['jpg', 'jpeg', 'png'])
|
|
|
|
if uploaded_file:
|
|
st.subheader(f"🖼️ Processing: {uploaded_file.name}")
|
|
try:
|
|
image_pil = Image.open(uploaded_file).convert("RGB")
|
|
image_np_rgb = np.array(image_pil)
|
|
image_bgr = cv2.cvtColor(image_np_rgb, cv2.COLOR_RGB2BGR)
|
|
except UnidentifiedImageError:
|
|
st.error("Cannot identify image file. Please upload a valid image.")
|
|
image_bgr = None
|
|
except Exception as e:
|
|
st.error(f"Error loading image: {e}")
|
|
image_bgr = None
|
|
|
|
if image_bgr is not None:
|
|
st.image(image_pil, caption="Uploaded Image", use_container_width=True)
|
|
|
|
if d2_predictor is None or dim_model is None:
|
|
st.error("One or more models (Detectron2, Dimension CNN) failed to load. Cannot process.")
|
|
else:
|
|
with st.spinner("Detecting objects and estimating dimensions..."):
|
|
|
|
outputs = d2_predictor(image_bgr)
|
|
instances = outputs["instances"].to("cpu")
|
|
|
|
if len(instances) == 0:
|
|
st.warning("No objects detected by Detectron2.")
|
|
else:
|
|
|
|
|
|
viz_image_bgr = image_bgr.copy()
|
|
v = Visualizer(viz_image_bgr[:, :, ::-1], metadata=d2_metadata, scale=0.8, instance_mode=ColorMode.IMAGE_BW)
|
|
out_vis = v.draw_instance_predictions(instances)
|
|
annotated_img_d2_rgb = out_vis.get_image()[:, :, ::-1]
|
|
|
|
st.image(annotated_img_d2_rgb, caption="Detectron2 Detections (Masks & Boxes)", use_container_width=True)
|
|
|
|
|
|
largest_idx = get_largest_instance_index(instances)
|
|
if largest_idx != -1:
|
|
instance = instances[largest_idx]
|
|
|
|
class_name = "Unknown"
|
|
if instance.has("pred_classes") and d2_metadata and hasattr(d2_metadata, 'thing_classes'):
|
|
class_id = instance.pred_classes.item()
|
|
if class_id < len(d2_metadata.thing_classes):
|
|
class_name = d2_metadata.thing_classes[class_id]
|
|
score = instance.scores.item() if instance.has("scores") else 0.0
|
|
st.write(f"**Processing largest detected object:** {class_name} (Confidence: {score:.2f})")
|
|
|
|
|
|
if instance.has("pred_masks"):
|
|
mask_tensor = instance.pred_masks[0]
|
|
|
|
object_crop_rgb = crop_from_mask(image_np_rgb, mask_tensor)
|
|
|
|
if object_crop_rgb is not None and object_crop_rgb.shape[0] > 0 and object_crop_rgb.shape[1] > 0:
|
|
st.image(object_crop_rgb, caption="Cropped Object Patch for Dimension CNN", width=250)
|
|
|
|
|
|
dims = predict_dimensions_cnn(object_crop_rgb, dim_model)
|
|
st.write("📏 **Predicted Dimensions (from Custom CNN):**")
|
|
st.json(dims)
|
|
else:
|
|
st.error("Could not crop a valid object patch from the mask.")
|
|
else:
|
|
st.warning("No segmentation mask found for the largest instance. Cannot estimate dimensions with custom CNN.")
|
|
else:
|
|
st.info("Could not determine the largest object to process for dimensions.")
|
|
else:
|
|
if uploaded_file:
|
|
st.error("Image could not be loaded for processing.")
|
|
|
|
|
|
|
|
st.sidebar.markdown("---")
|
|
st.sidebar.subheader("ℹ️ System Status")
|
|
st.sidebar.markdown(f"**Processing Device:** `{DEVICE}`")
|
|
st.sidebar.markdown(f"**Detectron2 Predictor:** `{'Loaded' if d2_predictor else 'Not Loaded'}`")
|
|
st.sidebar.markdown(f"**Dimension CNN:** `{'Loaded' if dim_model else 'Not Loaded'}`")
|
|
if not os.path.exists(MODEL_PATH):
|
|
st.sidebar.warning(f"Dimension CNN weights file not found at the specified path.")
|
|
|
|
|