Spaces:

Derendering
/

Model-Output-Playground

Running

App Files Files Community

Charlie Li commited on Feb 14

Commit

4697797

•

1 Parent(s): f44710a

build all

Browse files

Files changed (6) hide show

.gitignore +7 -0
README.md +2 -2
app.py +101 -0
org/cor.svg +264 -0
requirements.txt +5 -0
utils.py +235 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__
+*.mp4
+flagged/
+derendering_supp/
+*.zip
+__MACOSX/
+.DS_Store

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Model Output Playground
-emoji: 🐨
 colorFrom: purple
 colorTo: green
 sdk: gradio
@@ -10,4 +10,4 @@ pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Model Output Playground
+emoji: 🛝
 colorFrom: purple
 colorTo: green
 sdk: gradio
 license: apache-2.0
 ---
+Paper: https://arxiv.org/abs/2402.05804

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import gradio as gr
+from utils import *
+file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
+filename = "derendering_supp.zip"
+download_file(file_url, filename)
+unzip_file(filename)
+print("Downloaded and unzipped the file.")
+diagram = get_svg_content("derendering_supp/derender_diagram.svg")
+org = get_svg_content("org/cor.svg")
+org_content = f"""
+{org}
+"""
+def demo(Dataset, Model):
+    if Model == "Small-i":
+        inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
+    elif Model == "Small-p":
+        inkml_path = f"./derendering_supp/small-p_{Dataset}_inkml"
+    elif Model == "Large-i":
+        inkml_path = f"./derendering_supp/large-i_{Dataset}_inkml"
+    path = f"./derendering_supp/{Dataset}/images_sample"
+    samples = os.listdir(path)
+    # Randomly pick a sample
+    picked_samples = random.sample(samples, min(1, len(samples)))
+    query_modes = ["d+t", "r+d", "vanilla"]
+    plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
+    text_outputs = []
+    for name in picked_samples:
+        img_path = os.path.join(path, name)
+        img = load_and_pad_img_dir(img_path)
+        for mode in query_modes:
+            example_id = name.strip(".png")
+            inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
+            text_field = parse_inkml_annotations(inkml_file)["textField"]
+            output_text = f"{plot_title[mode]}{text_field}"
+            text_outputs.append(output_text)  # Append text output for the current mode
+            ink = inkml_to_ink(inkml_file)
+            plot_ink_to_video(ink, mode + ".mp4", input_image=img)
+    return (
+        img,
+        text_outputs[0],
+        "d+t.mp4",
+        text_outputs[1],
+        "r+d.mp4",
+        text_outputs[2],
+        "vanilla.mp4",
+    )
+with gr.Blocks() as app:
+    gr.HTML(org_content)
+    gr.Markdown(
+        f"""
+        # InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br>
+        <div>{diagram}</div>
+        🔔 This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br>
+        ℹ️ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types..<br>
+        """
+    )
+    with gr.Row():
+        dataset = gr.Dropdown(
+            ["IMGUR5K", "IAM", "HierText"], label="Dataset", value="HierText"
+        )
+        model = gr.Dropdown(
+            ["Small-i", "Large-i", "Small-p"],
+            label="InkSight Model Variant",
+            value="Small-i",
+        )
+        im = gr.Image(label="Input Image")
+    with gr.Row():
+        d_t_text = gr.Textbox(
+            label="OCR recognition input to the model", interactive=False
+        )
+        r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
+        vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
+    with gr.Row():
+        d_t = gr.Video(label="Derender with Text", autoplay=True)
+        r_d = gr.Video(label="Recognize and Derender", autoplay=True)
+        vanilla = gr.Video(label="Vanilla", autoplay=True)
+    with gr.Row():
+        btn_sub = gr.Button("Sample")
+    btn_sub.click(
+        fn=demo,
+        inputs=[dataset, model],
+        outputs=[im, d_t_text, d_t, r_d_text, r_d, vanilla_text, vanilla],
+    )
+app.launch()

org/cor.svg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+tqdm
+numpy
+matplotlib
+Pillow
+numpy

utils.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import json
+from tqdm import tqdm
+import numpy as np
+import matplotlib.pyplot as plt
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+import os
+from PIL import Image
+import matplotlib.animation as animation
+import copy
+from PIL import ImageEnhance
+import colorsys
+import matplotlib.colors as mcolors
+from matplotlib.collections import LineCollection
+from matplotlib.patheffects import withStroke
+import random
+import warnings
+from matplotlib.figure import Figure
+from io import BytesIO
+from matplotlib.animation import FuncAnimation, FFMpegWriter, PillowWriter
+import requests
+import zipfile
+warnings.filterwarnings("ignore")
+def get_svg_content(svg_path):
+    with open(svg_path, "r") as file:
+        return file.read()
+def download_file(url, filename):
+    response = requests.get(url)
+    with open(filename, "wb") as f:
+        f.write(response.content)
+def unzip_file(filename, extract_to="."):
+    with zipfile.ZipFile(filename, "r") as zip_ref:
+        zip_ref.extractall(extract_to)
+def load_and_pad_img_dir(file_dir):
+    image_path = os.path.join(file_dir)
+    image = Image.open(image_path)
+    width, height = image.size
+    ratio = min(224 / width, 224 / height)
+    image = image.resize((int(width * ratio), int(height * ratio)))
+    width, height = image.size
+    if height < 224:
+        # If width is shorter than height pad top and bottom.
+        top_padding = (224 - height) // 2
+        bottom_padding = 224 - height - top_padding
+        padded_image = Image.new("RGB", (width, 224), (255, 255, 255))
+        padded_image.paste(image, (0, top_padding))
+    else:
+        # Otherwise pad left and right.
+        left_padding = (224 - width) // 2
+        right_padding = 224 - width - left_padding
+        padded_image = Image.new("RGB", (224, height), (255, 255, 255))
+        padded_image.paste(image, (left_padding, 0))
+    return padded_image
+def plot_ink(ink, ax, lw=1.8, input_image=None, with_path=True, path_color="white"):
+    if input_image is not None:
+        img = copy.deepcopy(input_image)
+        enhancer = ImageEnhance.Brightness(img)
+        img = enhancer.enhance(0.45)
+        ax.imshow(img)
+    base_colors = plt.cm.get_cmap("rainbow", len(ink.strokes))
+    for i, stroke in enumerate(ink.strokes):
+        x, y = np.array(stroke.x), np.array(stroke.y)
+        base_color = base_colors(len(ink.strokes) - 1 - i)
+        hsv_color = colorsys.rgb_to_hsv(*base_color[:3])
+        darker_color = colorsys.hsv_to_rgb(
+            hsv_color[0], hsv_color[1], max(0, hsv_color[2] * 0.65)
+        )
+        colors = [
+            mcolors.to_rgba(darker_color, alpha=1 - (0.5 * j / len(x)))
+            for j in range(len(x))
+        ]
+        points = np.array([x, y]).T.reshape(-1, 1, 2)
+        segments = np.concatenate([points[:-1], points[1:]], axis=1)
+        lc = LineCollection(segments, colors=colors, linewidth=lw)
+        if with_path:
+            lc.set_path_effects(
+                [withStroke(linewidth=lw * 1.25, foreground=path_color)]
+            )
+        ax.add_collection(lc)
+    ax.set_xlim(0, 224)
+    ax.set_ylim(0, 224)
+    ax.invert_yaxis()
+def plot_ink_to_video(
+    ink, output_name, lw=1.8, input_image=None, path_color="white", fps=30
+):
+    fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
+    if input_image is not None:
+        img = copy.deepcopy(input_image)
+        enhancer = ImageEnhance.Brightness(img)
+        img = enhancer.enhance(0.45)
+        ax.imshow(img)
+    ax.set_xlim(0, 224)
+    ax.set_ylim(0, 224)
+    ax.invert_yaxis()
+    ax.axis("off")
+    base_colors = plt.cm.get_cmap("rainbow", len(ink.strokes))
+    all_points = sum([len(stroke.x) for stroke in ink.strokes], 0)
+    def update(frame):
+        ax.clear()
+        if input_image is not None:
+            ax.imshow(img)
+        ax.set_xlim(0, 224)
+        ax.set_ylim(0, 224)
+        ax.invert_yaxis()
+        ax.axis("off")
+        points_drawn = 0
+        for stroke_index, stroke in enumerate(ink.strokes):
+            x, y = np.array(stroke.x), np.array(stroke.y)
+            points = np.array([x, y]).T.reshape(-1, 1, 2)
+            segments = np.concatenate([points[:-1], points[1:]], axis=1)
+            base_color = base_colors(len(ink.strokes) - 1 - stroke_index)
+            hsv_color = colorsys.rgb_to_hsv(*base_color[:3])
+            darker_color = colorsys.hsv_to_rgb(
+                hsv_color[0], hsv_color[1], max(0, hsv_color[2] * 0.65)
+            )
+            visible_segments = (
+                segments[: frame - points_drawn]
+                if frame - points_drawn < len(segments)
+                else segments
+            )
+            colors = [
+                mcolors.to_rgba(
+                    darker_color, alpha=1 - (0.5 * j / len(visible_segments))
+                )
+                for j in range(len(visible_segments))
+            ]
+            if len(visible_segments) > 0:
+                lc = LineCollection(visible_segments, colors=colors, linewidth=lw)
+                lc.set_path_effects(
+                    [withStroke(linewidth=lw * 1.25, foreground=path_color)]
+                )
+                ax.add_collection(lc)
+            points_drawn += len(segments)
+            if points_drawn >= frame:
+                break
+    ani = FuncAnimation(fig, update, frames=all_points + 1, blit=False)
+    Writer = FFMpegWriter(fps=fps)
+    ani.save(output_name, writer=Writer)
+    plt.close(fig)
+class Stroke:
+    def __init__(self, list_of_coordinates=None) -> None:
+        self.x = []
+        self.y = []
+        if list_of_coordinates:
+            for point in list_of_coordinates:
+                self.x.append(point[0])
+                self.y.append(point[1])
+    def __len__(self):
+        return len(self.x)
+    def __getitem__(self, index):
+        return (self.x[index], self.y[index])
+class Ink:
+    def __init__(self, list_of_strokes=None) -> None:
+        self.strokes = []
+        if list_of_strokes:
+            self.strokes = list_of_strokes
+    def __len__(self):
+        return len(self.strokes)
+    def __getitem__(self, index):
+        return self.strokes[index]
+def inkml_to_ink(inkml_file):
+    """Convert inkml file to Ink"""
+    tree = ET.parse(inkml_file)
+    root = tree.getroot()
+    inkml_namespace = {"inkml": "http://www.w3.org/2003/InkML"}
+    strokes = []
+    for trace in root.findall("inkml:trace", inkml_namespace):
+        points = trace.text.strip().split()
+        stroke_points = []
+        for point in points:
+            x, y = point.split(",")
+            stroke_points.append((float(x), float(y)))
+        strokes.append(Stroke(stroke_points))
+    return Ink(strokes)
+def parse_inkml_annotations(inkml_file):
+    tree = ET.parse(inkml_file)
+    root = tree.getroot()
+    annotations = root.findall(".//{http://www.w3.org/2003/InkML}annotation")
+    annotation_dict = {}
+    for annotation in annotations:
+        annotation_type = annotation.get("type")
+        annotation_text = annotation.text
+        annotation_dict[annotation_type] = annotation_text
+    return annotation_dict