Spaces:

shenyunhang
/

APE_demo

Build error

App Files Files Community

APE_demo / app.py

shenyunhang

update

0784f23 about 1 year ago

raw

history blame

34.3 kB

	import gc
	import multiprocessing as mp
	import os
	import shutil
	import sys
	import time
	from os import path

	import cv2
	import torch
	from huggingface_hub import hf_hub_download
	from PIL import Image

	import ape
	import detectron2.data.transforms as T
	import gradio as gr
	from ape.model_zoo import get_config_file
	from demo_lazy import get_parser, setup_cfg
	from detectron2.config import CfgNode
	from detectron2.data.detection_utils import read_image
	from detectron2.evaluation.coco_evaluation import instances_to_coco_json
	from detectron2.utils.logger import setup_logger
	from predictor_lazy import VisualizationDemo

	this_dir = path.dirname(path.abspath(__file__))

	# os.system("git clone https://github.com/shenyunhang/APE.git")
	# os.system("python3.10 -m pip install -e APE/")

	example_list = [
	[
	this_dir + "/examples/Totoro01.png",
	# "Sky, Water, Tree, The biggest Chinchilla, The older girl wearing skirt on branch, Grass",
	"Girl with hat",
	# 0.05,
	0.25,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/Totoro01.png",
	"Sky, Water, Tree, Chinchilla, Grass, Girl",
	0.15,
	["semantic segmentation"],
	],
	[
	this_dir + "/examples/199_3946193540.jpg",
	"chess piece of horse head",
	0.30,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/TheGreatWall.jpg",
	"The Great Wall",
	0.1,
	["semantic segmentation"],
	],
	[
	this_dir + "/examples/Pisa.jpg",
	"Pisa",
	0.01,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/SolvayConference1927.jpg",
	# "Albert Einstein, Madame Curie",
	"Madame Curie",
	# 0.01,
	0.03,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/Transformers.webp",
	"Optimus Prime",
	0.11,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/Terminator3.jpg",
	"Humanoid Robot",
	0.10,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/MatrixRevolutionForZion.jpg",
	"""machine killer with gun in fighting,
	donut with colored granules on the surface,
	railings being crossed by horses,
	a horse running or jumping,
	equestrian rider's helmet,
	outdoor dog led by rope,
	a dog being touched,
	clothed dog,
	basketball in hand,
	a basketball player with both feet off the ground,
	player with basketball in the hand,
	spoon on the plate,
	coffee cup with coffee,
	the nearest dessert to the coffee cup,
	the bartender who is mixing wine,
	a bartender in a suit,
	wine glass with wine,
	a person in aprons,
	pot with food,
	a knife being used to cut vegetables,
	striped sofa in the room,
	a sofa with pillows on it in the room,
	lights on in the room,
	an indoor lying pet,
	a cat on the sofa,
	one pet looking directly at the camera indoors,
	a bed with patterns in the room,
	the lamp on the table beside the bed,
	pillow placed at the head of the bed,
	a blackboard full of words in the classroom,
	child sitting at desks in the classroom,
	a person standing in front of bookshelves in the library,
	the table someone is using in the library,
	a person who touches books in the library,
	a person standing in front of the cake counter,
	a square plate full of cakes,
	a cake decorated with cream,
	hot dog with vegetables,
	hot dog with sauce on the surface,
	red sausage,
	flowerpot with flowers potted inside,
	monochrome flowerpot,
	a flowerpot filled with black soil,
	apple growing on trees,
	red complete apple,
	apple with a stalk,
	a woman brushing her teeth,
	toothbrush held by someone,
	toilet brush with colored bristles,
	a customer whose hair is being cut by barber,
	a barber at work,
	cloth covering the barber,
	shopping cart pushed by people in the supermarket,
	shopping cart with people in the supermarket,
	shopping cart full of goods,
	a child wearing a mask,
	refrigerator with fruit,
	a drink bottle in the refrigerator,
	refrigerator with more than two doors,
	a watch placed on a table or cloth,
	a watch with three or more watch hands can be seen,
	a watch with one or more small dials,
	clothes hanger,
	a piece of clothing hanging on the hanger,
	a piece of clothing worn on plastic models,
	leather bag with glossy surface,
	backpack,
	open package,
	a fish held by people,
	a person who is fishing with a fishing rod,
	a fisherman standing on the shore with his body soaked in water, camera hold on someone's shoulder,
	a person being interviewed,
	a person with microphone hold in hand,
	""",
	0.20,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/094_56726435.jpg",
	# "donut with colored granules on the surface",
	"""donut with colored granules on the surface,
	railings being crossed by horses,
	a horse running or jumping,
	equestrian rider's helmet,
	outdoor dog led by rope,
	a dog being touched,
	clothed dog,
	basketball in hand,
	a basketball player with both feet off the ground,
	player with basketball in the hand,
	spoon on the plate,
	coffee cup with coffee,
	the nearest dessert to the coffee cup,
	the bartender who is mixing wine,
	a bartender in a suit,
	wine glass with wine,
	a person in aprons,
	pot with food,
	a knife being used to cut vegetables,
	striped sofa in the room,
	a sofa with pillows on it in the room,
	lights on in the room,
	an indoor lying pet,
	a cat on the sofa,
	one pet looking directly at the camera indoors,
	a bed with patterns in the room,
	the lamp on the table beside the bed,
	pillow placed at the head of the bed,
	a blackboard full of words in the classroom,
	a blackboard or whiteboard with something pasted,
	child sitting at desks in the classroom,
	a person standing in front of bookshelves in the library,
	the table someone is using in the library,
	a person who touches books in the library,
	a person standing in front of the cake counter,
	a square plate full of cakes,
	a cake decorated with cream,
	hot dog with vegetables,
	hot dog with sauce on the surface,
	red sausage,
	flowerpot with flowers potted inside,
	monochrome flowerpot,
	a flowerpot filled with black soil,
	apple growing on trees,
	red complete apple,
	apple with a stalk,
	a woman brushing her teeth,
	toothbrush held by someone,
	toilet brush with colored bristles,
	a customer whose hair is being cut by barber,
	a barber at work,
	cloth covering the barber,
	a plastic toy,
	a plush toy,
	a humanoid toy,
	shopping cart pushed by people in the supermarket,
	shopping cart with people in the supermarket,
	shopping cart full of goods,
	a child wearing a mask,
	a mask on face with half a face exposed,
	a mask on face with only eyes exposed,
	refrigerator with fruit,
	a drink bottle in the refrigerator,
	refrigerator with more than two doors,
	a watch placed on a table or cloth,
	a watch with three or more watch hands can be seen,
	a watch with one or more small dials,
	clothes hanger,
	a piece of clothing hanging on the hanger,
	a piece of clothing worn on plastic models,
	leather bag with glossy surface,
	backpack,
	open package,
	a fish held by people,
	a person who is fishing with a fishing rod,
	a fisherman standing on the shore with his body soaked in water, camera hold on someone's shoulder,
	a person being interviewed,
	a person with microphone hold in hand,
	""",
	0.50,
	["object detection", "instance segmentation"],
	],
	[
	this_dir + "/examples/013_438973263.jpg",
	# "a male lion with a mane",
	"""a male lion with a mane,
	railings being crossed by horses,
	a horse running or jumping,
	equestrian rider's helmet,
	outdoor dog led by rope,
	a dog being touched,
	clothed dog,
	basketball in hand,
	a basketball player with both feet off the ground,
	player with basketball in the hand,
	spoon on the plate,
	coffee cup with coffee,
	the nearest dessert to the coffee cup,
	the bartender who is mixing wine,
	a bartender in a suit,
	wine glass with wine,
	a person in aprons,
	pot with food,
	a knife being used to cut vegetables,
	striped sofa in the room,
	a sofa with pillows on it in the room,
	lights on in the room,
	an indoor lying pet,
	a cat on the sofa,
	one pet looking directly at the camera indoors,
	a bed with patterns in the room,
	the lamp on the table beside the bed,
	pillow placed at the head of the bed,
	a blackboard full of words in the classroom,
	a blackboard or whiteboard with something pasted,
	child sitting at desks in the classroom,
	a person standing in front of bookshelves in the library,
	the table someone is using in the library,
	a person who touches books in the library,
	a person standing in front of the cake counter,
	a square plate full of cakes,
	a cake decorated with cream,
	hot dog with vegetables,
	hot dog with sauce on the surface,
	red sausage,
	flowerpot with flowers potted inside,
	monochrome flowerpot,
	a flowerpot filled with black soil,
	apple growing on trees,
	red complete apple,
	apple with a stalk,
	a woman brushing her teeth,
	toothbrush held by someone,
	toilet brush with colored bristles,
	a customer whose hair is being cut by barber,
	a barber at work,
	cloth covering the barber,
	a plastic toy,
	a plush toy,
	a humanoid toy,
	shopping cart pushed by people in the supermarket,
	shopping cart with people in the supermarket,
	shopping cart full of goods,
	a child wearing a mask,
	a mask on face with half a face exposed,
	a mask on face with only eyes exposed,
	refrigerator with fruit,
	a drink bottle in the refrigerator,
	refrigerator with more than two doors,
	a watch placed on a table or cloth,
	a watch with three or more watch hands can be seen,
	a watch with one or more small dials,
	clothes hanger,
	a piece of clothing hanging on the hanger,
	a piece of clothing worn on plastic models,
	leather bag with glossy surface,
	backpack,
	open package,
	a fish held by people,
	a person who is fishing with a fishing rod,
	a fisherman standing on the shore with his body soaked in water, camera hold on someone's shoulder,
	a person being interviewed,
	a person with microphone hold in hand,
	""",
	# 0.25,
	0.50,
	["object detection", "instance segmentation"],
	],
	]

	ckpt_repo_id = "shenyunhang/APE"


	def setup_model(name):
	gc.collect()
	torch.cuda.empty_cache()

	if save_memory:
	pass
	else:
	return

	for key, demo in all_demo.items():
	if key == name:
	demo.predictor.model.to(running_device)
	else:
	demo.predictor.model.to("cpu")

	gc.collect()
	torch.cuda.empty_cache()


	def run_on_image_A(input_image_path, input_text, score_threshold, output_type):
	logger.info("run_on_image")

	setup_model("APE_A")
	demo = all_demo["APE_A"]
	cfg = all_cfg["APE_A"]
	demo.predictor.model.model_vision.test_score_thresh = score_threshold

	return run_on_image(
	input_image_path,
	input_text,
	output_type,
	demo,
	cfg,
	)


	def run_on_image_C(input_image_path, input_text, score_threshold, output_type):
	logger.info("run_on_image_C")

	setup_model("APE_C")
	demo = all_demo["APE_C"]
	cfg = all_cfg["APE_C"]
	demo.predictor.model.model_vision.test_score_thresh = score_threshold

	return run_on_image(
	input_image_path,
	input_text,
	output_type,
	demo,
	cfg,
	)


	def run_on_image_D(input_image_path, input_text, score_threshold, output_type):
	logger.info("run_on_image_D")

	setup_model("APE_D")
	demo = all_demo["APE_D"]
	cfg = all_cfg["APE_D"]
	demo.predictor.model.model_vision.test_score_thresh = score_threshold

	return run_on_image(
	input_image_path,
	input_text,
	output_type,
	demo,
	cfg,
	)


	def run_on_image_comparison(input_image_path, input_text, score_threshold, output_type):
	logger.info("run_on_image_comparison")

	r = []
	for key in all_demo.keys():
	logger.info("run_on_image_comparison {}".format(key))
	setup_model(key)
	demo = all_demo[key]
	cfg = all_cfg[key]
	demo.predictor.model.model_vision.test_score_thresh = score_threshold

	img, _ = run_on_image(
	input_image_path,
	input_text,
	output_type,
	demo,
	cfg,
	)
	r.append(img)

	return r


	def run_on_image(
	input_image_path,
	input_text,
	output_type,
	demo,
	cfg,
	):
	with_box = False
	with_mask = False
	with_sseg = False
	if "object detection" in output_type:
	with_box = True
	if "instance segmentation" in output_type:
	with_mask = True
	if "semantic segmentation" in output_type:
	with_sseg = True

	if isinstance(input_image_path, dict):
	input_mask_path = input_image_path["mask"]
	input_image_path = input_image_path["image"]
	print("input_image_path", input_image_path)
	print("input_mask_path", input_mask_path)
	else:
	input_mask_path = None

	print("input_text", input_text)

	if isinstance(cfg, CfgNode):
	input_format = cfg.INPUT.FORMAT
	else:
	if "model_vision" in cfg.model:
	input_format = cfg.model.model_vision.input_format
	else:
	input_format = cfg.model.input_format

	input_image = read_image(input_image_path, format="BGR")
	# img = cv2.imread(input_image_path)
	# cv2.imwrite("tmp.jpg", img)
	# # input_image = read_image("tmp.jpg", format=input_format)
	# input_image = read_image("tmp.jpg", format="BGR")

	if input_mask_path is not None:
	input_mask = read_image(input_mask_path, "L").squeeze(2)
	print("input_mask", input_mask)
	print("input_mask", input_mask.shape)
	else:
	input_mask = None

	if not with_box and not with_mask and not with_sseg:
	return input_image[:, :, ::-1]

	if input_image.shape[0] > 1024 or input_image.shape[1] > 1024:
	transform = aug.get_transform(input_image)
	input_image = transform.apply_image(input_image)
	else:
	transform = None

	start_time = time.time()
	predictions, visualized_output, _, metadata = demo.run_on_image(
	input_image,
	text_prompt=input_text,
	mask_prompt=input_mask,
	with_box=with_box,
	with_mask=with_mask,
	with_sseg=with_sseg,
	)

	logger.info(
	"{} in {:.2f}s".format(
	"detected {} instances".format(len(predictions["instances"]))
	if "instances" in predictions
	else "finished",
	time.time() - start_time,
	)
	)

	output_image = visualized_output.get_image()
	print("output_image", output_image.shape)
	# if input_format == "RGB":
	# output_image = output_image[:, :, ::-1]
	if transform:
	output_image = transform.inverse().apply_image(output_image)
	print("output_image", output_image.shape)

	output_image = Image.fromarray(output_image)

	gc.collect()
	torch.cuda.empty_cache()

	json_results = instances_to_coco_json(predictions["instances"].to(demo.cpu_device), 0)
	for json_result in json_results:
	json_result["category_name"] = metadata.thing_classes[json_result["category_id"]]
	del json_result["image_id"]

	return output_image, json_results


	def load_APE_A():
	# init_checkpoint= "output2/APE/configs/LVISCOCOCOCOSTUFF_O365_OID_VG/ape_deta/ape_deta_vitl_eva02_lsj_cp_720k_20230504_002019/model_final.pth"
	init_checkpoint = "configs/LVISCOCOCOCOSTUFF_O365_OID_VG/ape_deta/ape_deta_vitl_eva02_lsj_cp_720k_20230504_002019/model_final.pth"
	init_checkpoint = hf_hub_download(repo_id=ckpt_repo_id, filename=init_checkpoint)

	args = get_parser().parse_args()
	args.config_file = get_config_file(
	"LVISCOCOCOCOSTUFF_O365_OID_VG/ape_deta/ape_deta_vitl_eva02_lsj1024_cp_720k.py"
	)
	args.confidence_threshold = 0.01
	args.opts = [
	"train.init_checkpoint='{}'".format(init_checkpoint),
	"model.model_language.cache_dir=''",
	"model.model_vision.select_box_nums_for_evaluation=500",
	"model.model_vision.backbone.net.xattn=False",
	"model.model_vision.transformer.encoder.pytorch_attn=True",
	"model.model_vision.transformer.decoder.pytorch_attn=True",
	]
	if running_device == "cpu":
	args.opts += [
	"model.model_language.dtype='float32'",
	]
	logger.info("Arguments: " + str(args))
	cfg = setup_cfg(args)

	cfg.model.model_vision.criterion[0].use_fed_loss = False
	cfg.model.model_vision.criterion[2].use_fed_loss = False
	cfg.train.device = running_device

	ape.modeling.text.eva01_clip.eva_clip._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["layers"] = 1
	ape.modeling.text.eva01_clip.eva_clip._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["fusedLN"] = False

	demo = VisualizationDemo(cfg, args=args)
	if save_memory:
	demo.predictor.model.to("cpu")
	# demo.predictor.model.half()
	else:
	demo.predictor.model.to(running_device)

	all_demo["APE_A"] = demo
	all_cfg["APE_A"] = cfg


	def load_APE_B():
	# init_checkpoint= "output2/APE/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_REFCOCO/ape_deta/ape_deta_vitl_eva02_vlf_lsj_cp_1080k_20230702_225418/model_final.pth"
	init_checkpoint = "configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_REFCOCO/ape_deta/ape_deta_vitl_eva02_vlf_lsj_cp_1080k_20230702_225418/model_final.pth"
	init_checkpoint = hf_hub_download(repo_id=ckpt_repo_id, filename=init_checkpoint)

	args = get_parser().parse_args()
	args.config_file = get_config_file(
	"LVISCOCOCOCOSTUFF_O365_OID_VGR_REFCOCO/ape_deta/ape_deta_vitl_eva02_vlf_lsj1024_cp_1080k.py"
	)
	args.confidence_threshold = 0.01
	args.opts = [
	"train.init_checkpoint='{}'".format(init_checkpoint),
	"model.model_language.cache_dir=''",
	"model.model_vision.select_box_nums_for_evaluation=500",
	"model.model_vision.text_feature_bank_reset=True",
	"model.model_vision.backbone.net.xattn=False",
	"model.model_vision.transformer.encoder.pytorch_attn=True",
	"model.model_vision.transformer.decoder.pytorch_attn=True",
	]
	if running_device == "cpu":
	args.opts += [
	"model.model_language.dtype='float32'",
	]
	logger.info("Arguments: " + str(args))
	cfg = setup_cfg(args)

	cfg.model.model_vision.criterion[0].use_fed_loss = False
	cfg.model.model_vision.criterion[2].use_fed_loss = False
	cfg.train.device = running_device

	ape.modeling.text.eva01_clip.eva_clip._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["layers"] = 1
	ape.modeling.text.eva01_clip.eva_clip._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["fusedLN"] = False

	demo = VisualizationDemo(cfg, args=args)
	if save_memory:
	demo.predictor.model.to("cpu")
	# demo.predictor.model.half()
	else:
	demo.predictor.model.to(running_device)

	all_demo["APE_B"] = demo
	all_cfg["APE_B"] = cfg


	def load_APE_C():
	# init_checkpoint= "output2/APE/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO/ape_deta/ape_deta_vitl_eva02_vlf_lsj_cp_1080k_20230702_210950/model_final.pth"
	init_checkpoint = "configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO/ape_deta/ape_deta_vitl_eva02_vlf_lsj_cp_1080k_20230702_210950/model_final.pth"
	init_checkpoint = hf_hub_download(repo_id=ckpt_repo_id, filename=init_checkpoint)

	args = get_parser().parse_args()
	args.config_file = get_config_file(
	"LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO/ape_deta/ape_deta_vitl_eva02_vlf_lsj1024_cp_1080k.py"
	)
	args.confidence_threshold = 0.01
	args.opts = [
	"train.init_checkpoint='{}'".format(init_checkpoint),
	"model.model_language.cache_dir=''",
	"model.model_vision.select_box_nums_for_evaluation=500",
	"model.model_vision.text_feature_bank_reset=True",
	"model.model_vision.backbone.net.xattn=False",
	"model.model_vision.transformer.encoder.pytorch_attn=True",
	"model.model_vision.transformer.decoder.pytorch_attn=True",
	]
	if running_device == "cpu":
	args.opts += [
	"model.model_language.dtype='float32'",
	]
	logger.info("Arguments: " + str(args))
	cfg = setup_cfg(args)

	cfg.model.model_vision.criterion[0].use_fed_loss = False
	cfg.model.model_vision.criterion[2].use_fed_loss = False
	cfg.train.device = running_device

	ape.modeling.text.eva01_clip.eva_clip._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["layers"] = 1
	ape.modeling.text.eva01_clip.eva_clip._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["fusedLN"] = False

	demo = VisualizationDemo(cfg, args=args)
	if save_memory:
	demo.predictor.model.to("cpu")
	# demo.predictor.model.half()
	else:
	demo.predictor.model.to(running_device)

	all_demo["APE_C"] = demo
	all_cfg["APE_C"] = cfg


	def load_APE_D():
	# init_checkpoint= "output2/APE/configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4_1080k_mdl_20230829_162438/model_final.pth"
	init_checkpoint = "configs/LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4_1080k_mdl_20230829_162438/model_final.pth"
	init_checkpoint = hf_hub_download(repo_id=ckpt_repo_id, filename=init_checkpoint)

	args = get_parser().parse_args()
	args.config_file = get_config_file(
	"LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/ape_deta/ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py"
	)
	args.confidence_threshold = 0.01
	args.opts = [
	"train.init_checkpoint='{}'".format(init_checkpoint),
	"model.model_language.cache_dir=''",
	"model.model_vision.select_box_nums_for_evaluation=500",
	"model.model_vision.text_feature_bank_reset=True",
	"model.model_vision.backbone.net.xattn=False",
	"model.model_vision.transformer.encoder.pytorch_attn=True",
	"model.model_vision.transformer.decoder.pytorch_attn=True",
	]
	if running_device == "cpu":
	args.opts += [
	"model.model_language.dtype='float32'",
	]
	logger.info("Arguments: " + str(args))
	cfg = setup_cfg(args)

	cfg.model.model_vision.criterion[0].use_fed_loss = False
	cfg.model.model_vision.criterion[2].use_fed_loss = False
	cfg.train.device = running_device

	ape.modeling.text.eva02_clip.factory._MODEL_CONFIGS[cfg.model.model_language.clip_model][
	"vision_cfg"
	]["layers"] = 1

	demo = VisualizationDemo(cfg, args=args)
	if save_memory:
	demo.predictor.model.to("cpu")
	# demo.predictor.model.half()
	else:
	demo.predictor.model.to(running_device)

	all_demo["APE_D"] = demo
	all_cfg["APE_D"] = cfg


	def APE_A_tab():
	with gr.Tab("APE A"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1):
	input_image = gr.Image(
	sources=["upload"],
	type="filepath",
	# tool="sketch",
	# brush_radius=50,
	)
	input_text = gr.Textbox(
	label="Object Prompt (optional, if not provided, will only find COCO object.)",
	info="格式: word1,word2,word3,...",
	)

	score_threshold = gr.Slider(
	label="Score Threshold", minimum=0.01, maximum=1.0, value=0.3, step=0.01
	)

	output_type = gr.CheckboxGroup(
	["object detection", "instance segmentation"],
	value=["object detection", "instance segmentation"],
	label="Output Type",
	info="Which kind of output is displayed?",
	).style(item_container=True, container=True)

	run_button = gr.Button("Run")

	with gr.Column(scale=2):
	gallery = gr.Image(
	type="pil",
	)

	example_data = gr.Dataset(
	components=[input_image, input_text, score_threshold],
	samples=examples,
	samples_per_page=5,
	)
	example_data.click(fn=set_example, inputs=example_data, outputs=example_data.components)

	# add_tail_info()
	output_json = gr.JSON(label="json results")

	run_button.click(
	fn=run_on_image,
	inputs=[input_image, input_text, score_threshold, output_type],
	outputs=[gallery, output_json],
	)


	def APE_C_tab():
	with gr.Tab("APE C"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1):
	input_image = gr.Image(
	sources=["upload"],
	type="filepath",
	# tool="sketch",
	# brush_radius=50,
	)
	input_text = gr.Textbox(
	label="Object Prompt (optional, if not provided, will only find COCO object.)",
	info="格式: word1,word2,sentence1,sentence2,...",
	)

	score_threshold = gr.Slider(
	label="Score Threshold", minimum=0.01, maximum=1.0, value=0.3, step=0.01
	)

	output_type = gr.CheckboxGroup(
	["object detection", "instance segmentation", "semantic segmentation"],
	value=["object detection", "instance segmentation"],
	label="Output Type",
	info="Which kind of output is displayed?",
	).style(item_container=True, container=True)

	run_button = gr.Button("Run")

	with gr.Column(scale=2):
	gallery = gr.Image(
	type="pil",
	)

	example_data = gr.Dataset(
	components=[input_image, input_text, score_threshold],
	samples=example_list,
	samples_per_page=5,
	)
	example_data.click(fn=set_example, inputs=example_data, outputs=example_data.components)

	# add_tail_info()
	output_json = gr.JSON(label="json results")

	run_button.click(
	fn=run_on_image_C,
	inputs=[input_image, input_text, score_threshold, output_type],
	outputs=[gallery, output_json],
	)


	def APE_D_tab():
	with gr.Tab("APE D"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1):
	input_image = gr.Image(
	sources=["upload"],
	type="filepath",
	# tool="sketch",
	# brush_radius=50,
	)
	input_text = gr.Textbox(
	label="Object Prompt (optional, if not provided, will only find COCO object.)",
	info="格式: word1,word2,sentence1,sentence2,...",
	)

	score_threshold = gr.Slider(
	label="Score Threshold", minimum=0.01, maximum=1.0, value=0.1, step=0.01
	)

	output_type = gr.CheckboxGroup(
	["object detection", "instance segmentation", "semantic segmentation"],
	value=["object detection", "instance segmentation"],
	label="Output Type",
	info="Which kind of output is displayed?",
	)

	run_button = gr.Button("Run")

	with gr.Column(scale=2):
	gallery = gr.Image(
	type="pil",
	)

	gr.Examples(
	examples=example_list,
	inputs=[input_image, input_text, score_threshold, output_type],
	examples_per_page=20,
	)

	# add_tail_info()
	output_json = gr.JSON(label="json results")

	run_button.click(
	fn=run_on_image_D,
	inputs=[input_image, input_text, score_threshold, output_type],
	outputs=[gallery, output_json],
	)


	def comparison_tab():
	with gr.Tab("APE all"):
	with gr.Row(equal_height=False):
	with gr.Column(scale=1):
	input_image = gr.Image(
	sources=["upload"],
	type="filepath",
	# tool="sketch",
	# brush_radius=50,
	)
	input_text = gr.Textbox(
	label="Object Prompt (optional, if not provided, will only find COCO object.)",
	info="格式: word1,word2,sentence1,sentence2,...",
	)

	score_threshold = gr.Slider(
	label="Score Threshold", minimum=0.01, maximum=1.0, value=0.1, step=0.01
	)

	output_type = gr.CheckboxGroup(
	["object detection", "instance segmentation", "semantic segmentation"],
	value=["object detection", "instance segmentation"],
	label="Output Type",
	info="Which kind of output is displayed?",
	)

	run_button = gr.Button("Run")

	gallery_all = []
	with gr.Column(scale=2):
	for key in all_demo.keys():
	gallery = gr.Image(
	label=key,
	type="pil",
	)
	gallery_all.append(gallery)

	gr.Examples(
	examples=example_list,
	inputs=[input_image, input_text, score_threshold, output_type],
	examples_per_page=20,
	)

	# add_tail_info()

	run_button.click(
	fn=run_on_image_comparison,
	inputs=[input_image, input_text, score_threshold, output_type],
	outputs=gallery_all,
	)


	def is_port_in_use(port: int) -> bool:
	import socket

	with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
	return s.connect_ex(("localhost", port)) == 0


	def add_head_info(max_available_memory):
	gr.Markdown(
	"# APE: Aligning and Prompting Everything All at Once for Universal Visual Perception"
	)
	if max_available_memory:
	gr.Markdown(
	"Note multiple models are deployed on single GPU, so it may take several minutes to run the models and visualize the results."
	)
	else:
	gr.Markdown(
	"Note multiple models are deployed on CPU, so it may take a while to run the models and visualize the results."
	)
	gr.Markdown(
	"Noted results computed by CPU are slightly different to results computed by GPU, and some libraries are disabled on CPU."
	)
	gr.Markdown(
	"If the demo is out of memory, try to *decrease* the number of object prompt and *increase* score threshold."
	)

	gr.Markdown("---")


	def add_tail_info():
	gr.Markdown("---")
	gr.Markdown("### We also support Prompt")
	gr.Markdown(
	"""
	\| Location prompt \| result \| Location prompt \| result \|
	\| ---- \| ---- \| ---- \| ---- \|
	\| ![Location prompt](/file=examples/prompt/20230627-131346_11.176.20.67_mask.PNG) \| ![结果](/file=examples/prompt/20230627-131346_11.176.20.67_pred.png) \| ![Location prompt](/file=examples/prompt/20230627-131530_11.176.20.67_mask.PNG) \| ![结果](/file=examples/prompt/20230627-131530_11.176.20.67_pred.png) \|
	\| ![Location prompt](/file=examples/prompt/20230627-131520_11.176.20.67_mask.PNG) \| ![结果](/file=examples/prompt/20230627-131520_11.176.20.67_pred.png) \| ![Location prompt](/file=examples/prompt/20230627-114219_11.176.20.67_mask.PNG) \| ![结果](/file=examples/prompt/20230627-114219_11.176.20.67_pred.png) \|
	"""
	)
	gr.Markdown("---")


	if __name__ == "__main__":
	available_port = [80, 8080]
	for port in available_port:
	if is_port_in_use(port):
	continue
	else:
	server_port = port
	break
	print("server_port", server_port)

	available_memory = [
	torch.cuda.mem_get_info(i)[0] / 1024**3 for i in range(torch.cuda.device_count())
	]

	global running_device
	if len(available_memory) > 0:
	max_available_memory = max(available_memory)
	device_id = available_memory.index(max_available_memory)

	running_device = "cuda:" + str(device_id)
	else:
	max_available_memory = 0
	running_device = "cpu"

	global save_memory
	save_memory = False
	if max_available_memory > 0 and max_available_memory < 40:
	save_memory = True

	print("available_memory", available_memory)
	print("max_available_memory", max_available_memory)
	print("running_device", running_device)
	print("save_memory", save_memory)

	# ==========================================================================================

	mp.set_start_method("spawn", force=True)
	setup_logger(name="fvcore")
	setup_logger(name="ape")
	global logger
	logger = setup_logger()

	global aug
	aug = T.ResizeShortestEdge([1024, 1024], 1024)

	global all_demo
	all_demo = {}
	all_cfg = {}

	# load_APE_A()
	# load_APE_B()
	# load_APE_C()
	save_memory = False
	load_APE_D()

	title = "APE: Aligning and Prompting Everything All at Once for Universal Visual Perception"
	block = gr.Blocks(title=title).queue()
	with block:
	add_head_info(max_available_memory)

	# APE_A_tab()
	# APE_C_tab()
	APE_D_tab()

	comparison_tab()

	# add_tail_info()

	block.launch(
	share=False,
	# server_name="0.0.0.0",
	# server_port=server_port,
	show_api=False,
	show_error=True,
	)