Spaces:

vivien
/

depth-aware-caption

Sleeping

+import numpy as np
+import PIL
+import torch
+import streamlit as st
+import cv2
+DEBUG = False
+if DEBUG:
+    device = torch.device("cpu")
+    model_name = "MiDaS_small"
+else:
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    model_name = "DPT_Large"
+FONTS = [
+    "Font: Serif - EBGaramond",
+    "Font: Serif - Cinzel",
+    "Font: Sans - Roboto",
+    "Font: Sans - Lato",
+    "Font: Display - Lobster",
+    "Font: Display - LilitaOne",
+    "Font: Handwriting - GreatVibes",
+    "Font: Handwriting - Pacifico",
+    "Font: Mono - Inconsolata",
+    "Font: Mono - Cutive",
+]
+CACHE_KWARGS = {
+    "show_spinner": False,
+    "hash_funcs": {torch.nn.parameter.Parameter: lambda _: None},
+    "allow_output_mutation": True,
+    "ttl": 900,
+    "max_entries": 20,
+}
+def hex_to_rgb(hex):
+    rgb = []
+    for i in (0, 2, 4):
+        decimal = int(hex[i : i + 2], 16)
+        rgb.append(decimal)
+    return tuple(rgb)
+@st.cache(
+    show_spinner=True,
+    hash_funcs={torch.nn.parameter.Parameter: lambda _: None},
+    allow_output_mutation=True,
+)
+def load(model_type):
+    midas = torch.hub.load("intel-isl/MiDaS", model_type)
+    midas.to(device)
+    _ = midas.eval()
+    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
+    if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
+        transform = midas_transforms.dpt_transform
+    else:
+        transform = midas_transforms.small_transform
+    return midas, transform
+midas, transform = load(model_name)
+@st.cache(**CACHE_KWARGS)
+def compute_depth(img):
+    with torch.no_grad():
+        prediction = midas(transform(img).to(device))
+        prediction = torch.nn.functional.interpolate(
+            prediction.unsqueeze(1),
+            size=img.shape[:2],
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze()
+    return prediction.cpu().numpy()
+@st.cache(**CACHE_KWARGS)
+def get_mask1(shape, caption, font=None, font_size=0.08, color=(0, 0, 0), alpha=0.8):
+    img_text = PIL.Image.new("RGBA", (shape[1], shape[0]), (0, 0, 0, 0))
+    draw = PIL.ImageDraw.Draw(img_text)
+    font = PIL.ImageFont.truetype(font, int(font_size * img.shape[1]))
+    draw.text(
+        (x * img.shape[1], (1 - y) * img.shape[0]),
+        caption,
+        fill=(*color, int(max(min(1, alpha), 0) * 255)),
+        font=font,
+    )
+    text = np.array(img_text)
+    mask1 = np.dot(np.expand_dims(text[:, :, -1] / 255, -1), np.ones((1, 3)))
+    text = text[:, :, :-1]
+    return text, mask1
+@st.cache(**CACHE_KWARGS)
+def get_mask2(depth_map, depth):
+    m = np.expand_dims(
+        (depth_map[:, :] < depth * np.min(depth_map) + (1 - depth) * np.max(depth_map)),
+        -1,
+    )
+    return np.dot(m, np.ones((1, 3)))
+@st.cache(**CACHE_KWARGS)
+def add_caption(
+    img,
+    caption,
+    depth_map=None,
+    x=0.5,
+    y=0.5,
+    depth=0.5,
+    font_size=50,
+    color=(255, 255, 255),
+    font="",
+    alpha=1,
+):
+    if depth_map is None:
+        depth_map = compute_depth(img)
+    text, mask1 = get_mask1(
+        img.shape, caption, font=font, font_size=font_size, color=color, alpha=alpha
+    )
+    mask2 = get_mask2(depth_map, depth)
+    mask = mask1 * mask2
+    return ((1 - mask) * img + mask * text).astype(np.uint8)
+st.markdown(
+    """
+<style>
+    label{
+        height: 0px !important;
+        min-height: 0px !important;
+        margin-bottom: 0px !important;
+    }
+</style>
+    """,
+    unsafe_allow_html=True,
+)
+st.sidebar.markdown(
+    """
+# Depth-aware text addition
+Add text ***inside*** an image!
+Upload an image, enter some text and adjust the ***depth*** where you want the text to be displayed. You can also define its location and appearance (font, color, transparency and size).
+Built with [PyTorch](https://pytorch.org/), Intel's [MiDaS model](https://pytorch.org/hub/intelisl_midas_v2/), [Streamlit](https://streamlit.io/), [pillow](https://python-pillow.org/) and inspired by the official [video](https://youtu.be/eTa1jHk1Lxc) of *Jenny of Oldstones* by Florence + the Machine
+"""
+)
+uploaded_file = st.file_uploader("", type=["jpg", "jpeg"])
+@st.cache(**CACHE_KWARGS)
+def load_img(uploaded_file):
+    if uploaded_file is None:
+        img = np.array(PIL.Image.open("pulp.jpg"))
+        default = True
+    else:
+        img = np.array(PIL.Image.open(uploaded_file))
+        if img.shape[0] > 800 or img.shape[1] > 800:
+            if img.shape[0] < img.shape[1]:
+                new_size = (800, int(800 * img.shape[0] / img.shape[1]))
+            else:
+                new_size = (int(800 * img.shape[1] / img.shape[0]), 800)
+            img = cv2.resize(img, dsize=new_size, interpolation=cv2.INTER_CUBIC)
+        default = False
+    depth_map = compute_depth(img)
+    return img, depth_map, default
+img, depth_map, default = load_img(uploaded_file)
+if default:
+    x0, y0, alpha0, font_size0, depth0, font0 = 0.02, 0.68, 0.99, 0.07, 0.23, 4
+    text0 = "Pulp Fiction"
+else:
+    x0, y0, alpha0, font_size0, depth0, font0 = 0.1, 0.9, 0.8, 0.08, 0.5, 0
+    text0 = "Enter your text here"
+colA, colB, colC = st.columns((13, 1, 1))
+with colA:
+    text = st.text_input("", text0)
+with colB:
+    st.markdown("Color:")
+with colC:
+    color = st.color_picker("", value="#FFFFFF")
+col1, _, col2 = st.columns((4, 1, 4))
+with col1:
+    depth = st.select_slider(
+        "",
+        options=[i / 100 for i in range(101)],
+        value=depth0,
+        format_func=lambda x: "Foreground"
+        if x == 0.0
+        else "Background"
+        if x == 1.0
+        else "",
+    )
+    x = st.select_slider(
+        "",
+        options=[i / 100 for i in range(101)],
+        value=x0,
+        format_func=lambda x: "Left" if x == 0.0 else "Right" if x == 1.0 else "",
+    )
+    y = st.select_slider(
+        "",
+        options=[i / 100 for i in range(101)],
+        value=y0,
+        format_func=lambda x: "Bottom" if x == 0.0 else "Top" if x == 1.0 else "",
+    )
+with col2:
+    font_size = st.select_slider(
+        "",
+        options=[0.04 + i / 100 for i in range(0, 17)],
+        value=font_size0,
+        format_func=lambda x: "Small font"
+        if x == 0.04
+        else "Large font"
+        if x == 0.2
+        else "",
+    )
+    alpha = st.select_slider(
+        "",
+        options=[i / 100 for i in range(101)],
+        value=alpha0,
+        format_func=lambda x: "Transparent"
+        if x == 0.0
+        else "Opaque"
+        if x == 1.0
+        else "",
+    )
+    font = st.selectbox("", FONTS, index=font0)
+font = f"fonts/{font[6:]}.ttf"
+captioned = add_caption(
+    img,
+    text,
+    depth_map=depth_map,
+    x=x,
+    y=y,
+    depth=depth,
+    font=font,
+    font_size=font_size,
+    alpha=alpha,
+    color=hex_to_rgb(color[1:]),
+)
+st.image(captioned)
+PIL.Image.fromarray(captioned).save("result.jpg")
+with open("result.jpg", "rb") as file:
+    btn = st.download_button(
+        label="Download image", data=file, file_name="result.jpg", mime="image/jpeg"
+    )

fonts/Display - LilitaOne.ttf ADDED Viewed

Binary file (26.8 kB). View file

fonts/Display - Lobster.ttf ADDED Viewed

Binary file (397 kB). View file

fonts/Handwriting - GreatVibes.ttf ADDED Viewed

Binary file (154 kB). View file

fonts/Handwriting - Pacifico.ttf ADDED Viewed

Binary file (315 kB). View file

fonts/Mono - Cutive.ttf ADDED Viewed

Binary file (77.4 kB). View file

fonts/Mono - Inconsolata.ttf ADDED Viewed

Binary file (339 kB). View file

fonts/Sans - Lato.ttf ADDED Viewed

Binary file (75.2 kB). View file

fonts/Sans - Roboto.ttf ADDED Viewed

Binary file (168 kB). View file

fonts/Serif - Cinzel.ttf ADDED Viewed

Binary file (125 kB). View file

fonts/Serif - EBGaramond.ttf ADDED Viewed

Binary file (929 kB). View file

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libgl1

pulp.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+torch
+timm
+pillow
+opencv-python