import gradio as gr from PIL import Image, ImageDraw, ImageFont import warnings import os os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' import json import os import torch from scipy.ndimage import gaussian_filter import cv2 from method import AdaCLIP_Trainer import numpy as np ############ Init Model ckt_path1 = 'weights/pretrained_mvtec_colondb.pth' ckt_path2 = "weights/pretrained_visa_clinicdb.pth" ckt_path3 = 'weights/pretrained_all.pth' # Configurations image_size = 518 device = 'cuda' if torch.cuda.is_available() else 'cpu' # device = 'cpu' model = "ViT-L-14-336" prompting_depth = 4 prompting_length = 5 prompting_type = 'SD' prompting_branch = 'VL' use_hsf = True k_clusters = 20 config_path = os.path.join('./model_configs', f'{model}.json') # Prepare model with open(config_path, 'r') as f: model_configs = json.load(f) # Set up the feature hierarchy n_layers = model_configs['vision_cfg']['layers'] substage = n_layers // 4 features_list = [substage, substage * 2, substage * 3, substage * 4] model = AdaCLIP_Trainer( backbone=model, feat_list=features_list, input_dim=model_configs['vision_cfg']['width'], output_dim=model_configs['embed_dim'], learning_rate=0., device=device, image_size=image_size, prompting_depth=prompting_depth, prompting_length=prompting_length, prompting_branch=prompting_branch, prompting_type=prompting_type, use_hsf=use_hsf, k_clusters=k_clusters ).to(device) def process_image(image, text, options): # Load the model based on selected options if 'MVTec AD+Colondb' in options: model.load(ckt_path1) elif 'VisA+Clinicdb' in options: model.load(ckt_path2) elif 'All' in options: model.load(ckt_path3) else: # Default to 'All' if no valid option is provided model.load(ckt_path3) print('Invalid option. Defaulting to All.') # Ensure image is in RGB mode image = image.convert('RGB') # Convert PIL image to NumPy array np_image = np.array(image) # Convert RGB to BGR for OpenCV np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR) np_image = cv2.resize(np_image, (image_size, image_size)) # Preprocess the image and run the model img_input = model.preprocess(image).unsqueeze(0) img_input = img_input.to(model.device) with torch.no_grad(): anomaly_map, anomaly_score = model.clip_model(img_input, [text], aggregation=True) # Process anomaly map anomaly_map = anomaly_map[0, :, :].cpu().numpy() anomaly_score = anomaly_score[0].cpu().numpy() anomaly_map = gaussian_filter(anomaly_map, sigma=4) anomaly_map = (anomaly_map * 255).astype(np.uint8) # Apply color map and blend with original image heat_map = cv2.applyColorMap(anomaly_map, cv2.COLORMAP_JET) vis_map = cv2.addWeighted(heat_map, 0.5, np_image, 0.5, 0) # Convert OpenCV image back to PIL image for Gradio vis_map_pil = Image.fromarray(cv2.cvtColor(vis_map, cv2.COLOR_BGR2RGB)) return vis_map_pil, f'{anomaly_score:.3f}' # Define examples examples = [ ["asset/img.png", "candle", "MVTec AD+Colondb"], ["asset/img2.png", "bottle", "VisA+Clinicdb"], ["asset/img3.png", "button", "All"], ] # Gradio interface layout demo = gr.Interface( fn=process_image, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Class Name"), gr.Radio(["MVTec AD+Colondb", "VisA+Clinicdb", "All"], label="Pre-trained Datasets") ], outputs=[ gr.Image(type="pil", label="Output Image"), gr.Textbox(label="Anomaly Score"), ], examples=examples, title="AdaCLIP -- Zero-shot Anomaly Detection", description="Upload an image, enter class name, and select pre-trained datasets to do zero-shot anomaly detection" ) # Launch the demo demo.launch() # demo.launch(server_name="0.0.0.0", server_port=10002)