Spaces:

vivien
/

depth-aware-caption

Sleeping

Vivien

Initial commit

8ca63da over 2 years ago

6.95 kB

	import numpy as np
	import PIL
	import torch
	import streamlit as st
	import cv2

	DEBUG = False
	if DEBUG:
	device = torch.device("cpu")
	model_name = "MiDaS_small"
	else:
	device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
	model_name = "DPT_Large"

	FONTS = [
	"Font: Serif - EBGaramond",
	"Font: Serif - Cinzel",
	"Font: Sans - Roboto",
	"Font: Sans - Lato",
	"Font: Display - Lobster",
	"Font: Display - LilitaOne",
	"Font: Handwriting - GreatVibes",
	"Font: Handwriting - Pacifico",
	"Font: Mono - Inconsolata",
	"Font: Mono - Cutive",
	]

	CACHE_KWARGS = {
	"show_spinner": False,
	"hash_funcs": {torch.nn.parameter.Parameter: lambda _: None},
	"allow_output_mutation": True,
	"ttl": 900,
	"max_entries": 20,
	}


	def hex_to_rgb(hex):
	rgb = []
	for i in (0, 2, 4):
	decimal = int(hex[i : i + 2], 16)
	rgb.append(decimal)
	return tuple(rgb)


	@st.cache(
	show_spinner=True,
	hash_funcs={torch.nn.parameter.Parameter: lambda _: None},
	allow_output_mutation=True,
	)
	def load(model_type):
	midas = torch.hub.load("intel-isl/MiDaS", model_type)
	midas.to(device)
	_ = midas.eval()
	midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
	if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
	transform = midas_transforms.dpt_transform
	else:
	transform = midas_transforms.small_transform
	return midas, transform


	midas, transform = load(model_name)


	@st.cache(**CACHE_KWARGS)
	def compute_depth(img):
	with torch.no_grad():
	prediction = midas(transform(img).to(device))

	prediction = torch.nn.functional.interpolate(
	prediction.unsqueeze(1),
	size=img.shape[:2],
	mode="bicubic",
	align_corners=False,
	).squeeze()
	return prediction.cpu().numpy()


	@st.cache(**CACHE_KWARGS)
	def get_mask1(shape, caption, font=None, font_size=0.08, color=(0, 0, 0), alpha=0.8):
	img_text = PIL.Image.new("RGBA", (shape[1], shape[0]), (0, 0, 0, 0))
	draw = PIL.ImageDraw.Draw(img_text)
	font = PIL.ImageFont.truetype(font, int(font_size * img.shape[1]))
	draw.text(
	(x * img.shape[1], (1 - y) * img.shape[0]),
	caption,
	fill=(color, int(max(min(1, alpha), 0) 255)),
	font=font,
	)
	text = np.array(img_text)
	mask1 = np.dot(np.expand_dims(text[:, :, -1] / 255, -1), np.ones((1, 3)))
	text = text[:, :, :-1]
	return text, mask1


	@st.cache(**CACHE_KWARGS)
	def get_mask2(depth_map, depth):
	m = np.expand_dims(
	(depth_map[:, :] < depth * np.min(depth_map) + (1 - depth) * np.max(depth_map)),
	-1,
	)
	return np.dot(m, np.ones((1, 3)))


	@st.cache(**CACHE_KWARGS)
	def add_caption(
	img,
	caption,
	depth_map=None,
	x=0.5,
	y=0.5,
	depth=0.5,
	font_size=50,
	color=(255, 255, 255),
	font="",
	alpha=1,
	):
	if depth_map is None:
	depth_map = compute_depth(img)
	text, mask1 = get_mask1(
	img.shape, caption, font=font, font_size=font_size, color=color, alpha=alpha
	)
	mask2 = get_mask2(depth_map, depth)
	mask = mask1 * mask2

	return ((1 - mask) * img + mask * text).astype(np.uint8)


	st.markdown(
	"""
	<style>
	label{
	height: 0px !important;
	min-height: 0px !important;
	margin-bottom: 0px !important;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	st.sidebar.markdown(
	"""
	# Depth-aware text addition

	Add text *inside* an image!

	Upload an image, enter some text and adjust the *depth* where you want the text to be displayed. You can also define its location and appearance (font, color, transparency and size).

	Built with [PyTorch](https://pytorch.org/), Intel's [MiDaS model](https://pytorch.org/hub/intelisl_midas_v2/), [Streamlit](https://streamlit.io/), [pillow](https://python-pillow.org/) and inspired by the official [video](https://youtu.be/eTa1jHk1Lxc) of Jenny of Oldstones by Florence + the Machine
	"""
	)

	uploaded_file = st.file_uploader("", type=["jpg", "jpeg"])


	@st.cache(**CACHE_KWARGS)
	def load_img(uploaded_file):
	if uploaded_file is None:
	img = np.array(PIL.Image.open("pulp.jpg"))
	default = True
	else:
	img = np.array(PIL.Image.open(uploaded_file))
	if img.shape[0] > 800 or img.shape[1] > 800:
	if img.shape[0] < img.shape[1]:
	new_size = (800, int(800 * img.shape[0] / img.shape[1]))
	else:
	new_size = (int(800 * img.shape[1] / img.shape[0]), 800)
	img = cv2.resize(img, dsize=new_size, interpolation=cv2.INTER_CUBIC)
	default = False
	depth_map = compute_depth(img)
	return img, depth_map, default


	img, depth_map, default = load_img(uploaded_file)

	if default:
	x0, y0, alpha0, font_size0, depth0, font0 = 0.02, 0.68, 0.99, 0.07, 0.23, 4
	text0 = "Pulp Fiction"
	else:
	x0, y0, alpha0, font_size0, depth0, font0 = 0.1, 0.9, 0.8, 0.08, 0.5, 0
	text0 = "Enter your text here"

	colA, colB, colC = st.columns((13, 1, 1))

	with colA:
	text = st.text_input("", text0)

	with colB:
	st.markdown("Color:")

	with colC:
	color = st.color_picker("", value="#FFFFFF")


	col1, _, col2 = st.columns((4, 1, 4))

	with col1:
	depth = st.select_slider(
	"",
	options=[i / 100 for i in range(101)],
	value=depth0,
	format_func=lambda x: "Foreground"
	if x == 0.0
	else "Background"
	if x == 1.0
	else "",
	)
	x = st.select_slider(
	"",
	options=[i / 100 for i in range(101)],
	value=x0,
	format_func=lambda x: "Left" if x == 0.0 else "Right" if x == 1.0 else "",
	)
	y = st.select_slider(
	"",
	options=[i / 100 for i in range(101)],
	value=y0,
	format_func=lambda x: "Bottom" if x == 0.0 else "Top" if x == 1.0 else "",
	)

	with col2:
	font_size = st.select_slider(
	"",
	options=[0.04 + i / 100 for i in range(0, 17)],
	value=font_size0,
	format_func=lambda x: "Small font"
	if x == 0.04
	else "Large font"
	if x == 0.2
	else "",
	)
	alpha = st.select_slider(
	"",
	options=[i / 100 for i in range(101)],
	value=alpha0,
	format_func=lambda x: "Transparent"
	if x == 0.0
	else "Opaque"
	if x == 1.0
	else "",
	)
	font = st.selectbox("", FONTS, index=font0)

	font = f"fonts/{font[6:]}.ttf"

	captioned = add_caption(
	img,
	text,
	depth_map=depth_map,
	x=x,
	y=y,
	depth=depth,
	font=font,
	font_size=font_size,
	alpha=alpha,
	color=hex_to_rgb(color[1:]),
	)

	st.image(captioned)

	PIL.Image.fromarray(captioned).save("result.jpg")
	with open("result.jpg", "rb") as file:
	btn = st.download_button(
	label="Download image", data=file, file_name="result.jpg", mime="image/jpeg"
	)