Vivien
Initial commit
8ca63da
raw
history blame
6.95 kB
import numpy as np
import PIL
import torch
import streamlit as st
import cv2
DEBUG = False
if DEBUG:
device = torch.device("cpu")
model_name = "MiDaS_small"
else:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model_name = "DPT_Large"
FONTS = [
"Font: Serif - EBGaramond",
"Font: Serif - Cinzel",
"Font: Sans - Roboto",
"Font: Sans - Lato",
"Font: Display - Lobster",
"Font: Display - LilitaOne",
"Font: Handwriting - GreatVibes",
"Font: Handwriting - Pacifico",
"Font: Mono - Inconsolata",
"Font: Mono - Cutive",
]
CACHE_KWARGS = {
"show_spinner": False,
"hash_funcs": {torch.nn.parameter.Parameter: lambda _: None},
"allow_output_mutation": True,
"ttl": 900,
"max_entries": 20,
}
def hex_to_rgb(hex):
rgb = []
for i in (0, 2, 4):
decimal = int(hex[i : i + 2], 16)
rgb.append(decimal)
return tuple(rgb)
@st.cache(
show_spinner=True,
hash_funcs={torch.nn.parameter.Parameter: lambda _: None},
allow_output_mutation=True,
)
def load(model_type):
midas = torch.hub.load("intel-isl/MiDaS", model_type)
midas.to(device)
_ = midas.eval()
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
transform = midas_transforms.dpt_transform
else:
transform = midas_transforms.small_transform
return midas, transform
midas, transform = load(model_name)
@st.cache(**CACHE_KWARGS)
def compute_depth(img):
with torch.no_grad():
prediction = midas(transform(img).to(device))
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
return prediction.cpu().numpy()
@st.cache(**CACHE_KWARGS)
def get_mask1(shape, caption, font=None, font_size=0.08, color=(0, 0, 0), alpha=0.8):
img_text = PIL.Image.new("RGBA", (shape[1], shape[0]), (0, 0, 0, 0))
draw = PIL.ImageDraw.Draw(img_text)
font = PIL.ImageFont.truetype(font, int(font_size * img.shape[1]))
draw.text(
(x * img.shape[1], (1 - y) * img.shape[0]),
caption,
fill=(*color, int(max(min(1, alpha), 0) * 255)),
font=font,
)
text = np.array(img_text)
mask1 = np.dot(np.expand_dims(text[:, :, -1] / 255, -1), np.ones((1, 3)))
text = text[:, :, :-1]
return text, mask1
@st.cache(**CACHE_KWARGS)
def get_mask2(depth_map, depth):
m = np.expand_dims(
(depth_map[:, :] < depth * np.min(depth_map) + (1 - depth) * np.max(depth_map)),
-1,
)
return np.dot(m, np.ones((1, 3)))
@st.cache(**CACHE_KWARGS)
def add_caption(
img,
caption,
depth_map=None,
x=0.5,
y=0.5,
depth=0.5,
font_size=50,
color=(255, 255, 255),
font="",
alpha=1,
):
if depth_map is None:
depth_map = compute_depth(img)
text, mask1 = get_mask1(
img.shape, caption, font=font, font_size=font_size, color=color, alpha=alpha
)
mask2 = get_mask2(depth_map, depth)
mask = mask1 * mask2
return ((1 - mask) * img + mask * text).astype(np.uint8)
st.markdown(
"""
<style>
label{
height: 0px !important;
min-height: 0px !important;
margin-bottom: 0px !important;
}
</style>
""",
unsafe_allow_html=True,
)
st.sidebar.markdown(
"""
# Depth-aware text addition
Add text ***inside*** an image!
Upload an image, enter some text and adjust the ***depth*** where you want the text to be displayed. You can also define its location and appearance (font, color, transparency and size).
Built with [PyTorch](https://pytorch.org/), Intel's [MiDaS model](https://pytorch.org/hub/intelisl_midas_v2/), [Streamlit](https://streamlit.io/), [pillow](https://python-pillow.org/) and inspired by the official [video](https://youtu.be/eTa1jHk1Lxc) of *Jenny of Oldstones* by Florence + the Machine
"""
)
uploaded_file = st.file_uploader("", type=["jpg", "jpeg"])
@st.cache(**CACHE_KWARGS)
def load_img(uploaded_file):
if uploaded_file is None:
img = np.array(PIL.Image.open("pulp.jpg"))
default = True
else:
img = np.array(PIL.Image.open(uploaded_file))
if img.shape[0] > 800 or img.shape[1] > 800:
if img.shape[0] < img.shape[1]:
new_size = (800, int(800 * img.shape[0] / img.shape[1]))
else:
new_size = (int(800 * img.shape[1] / img.shape[0]), 800)
img = cv2.resize(img, dsize=new_size, interpolation=cv2.INTER_CUBIC)
default = False
depth_map = compute_depth(img)
return img, depth_map, default
img, depth_map, default = load_img(uploaded_file)
if default:
x0, y0, alpha0, font_size0, depth0, font0 = 0.02, 0.68, 0.99, 0.07, 0.23, 4
text0 = "Pulp Fiction"
else:
x0, y0, alpha0, font_size0, depth0, font0 = 0.1, 0.9, 0.8, 0.08, 0.5, 0
text0 = "Enter your text here"
colA, colB, colC = st.columns((13, 1, 1))
with colA:
text = st.text_input("", text0)
with colB:
st.markdown("Color:")
with colC:
color = st.color_picker("", value="#FFFFFF")
col1, _, col2 = st.columns((4, 1, 4))
with col1:
depth = st.select_slider(
"",
options=[i / 100 for i in range(101)],
value=depth0,
format_func=lambda x: "Foreground"
if x == 0.0
else "Background"
if x == 1.0
else "",
)
x = st.select_slider(
"",
options=[i / 100 for i in range(101)],
value=x0,
format_func=lambda x: "Left" if x == 0.0 else "Right" if x == 1.0 else "",
)
y = st.select_slider(
"",
options=[i / 100 for i in range(101)],
value=y0,
format_func=lambda x: "Bottom" if x == 0.0 else "Top" if x == 1.0 else "",
)
with col2:
font_size = st.select_slider(
"",
options=[0.04 + i / 100 for i in range(0, 17)],
value=font_size0,
format_func=lambda x: "Small font"
if x == 0.04
else "Large font"
if x == 0.2
else "",
)
alpha = st.select_slider(
"",
options=[i / 100 for i in range(101)],
value=alpha0,
format_func=lambda x: "Transparent"
if x == 0.0
else "Opaque"
if x == 1.0
else "",
)
font = st.selectbox("", FONTS, index=font0)
font = f"fonts/{font[6:]}.ttf"
captioned = add_caption(
img,
text,
depth_map=depth_map,
x=x,
y=y,
depth=depth,
font=font,
font_size=font_size,
alpha=alpha,
color=hex_to_rgb(color[1:]),
)
st.image(captioned)
PIL.Image.fromarray(captioned).save("result.jpg")
with open("result.jpg", "rb") as file:
btn = st.download_button(
label="Download image", data=file, file_name="result.jpg", mime="image/jpeg"
)