import gradio as gr import cv2 import json import os import numpy as np from pathlib import Path from pylena.scribo import line_detector from pylena.scribo import VSegment, LSuperposition from pylena.scribo import e_segdet_preprocess, e_segdet_process_extraction, e_segdet_process_tracking, e_segdet_process_traversal_mode import time from typing import List, Tuple, Dict # Define all the default values default_min_len = 10 default_preprocess = "NONE" default_tracker = "KALMAN" default_traversal_mode = "HORIZONTAL_VERTICAL" default_extraction_type = "BINARY" default_negate_image = False default_dyn = 0.6 default_size_mask = 11 default_double_exponential_alpha = 0.6 default_simple_moving_average_memory = 30.0 default_exponential_moving_average_memory = 16.0 default_one_euro_beta = 0.007 default_one_euro_mincutoff = 1.0 default_one_euro_dcutoff = 1.0 default_bucket_size = 32 default_nb_values_to_keep = 30 default_discontinuity_relative = 0 default_discontinuity_absolute = 0 default_minimum_for_fusion = 15 default_default_sigma_position = 2 default_default_sigma_thickness = 2 default_default_sigma_luminosity = 57 default_min_nb_values_sigma = 10 default_sigma_pos_min = 1.0 default_sigma_thickness_min = 0.64 default_sigma_luminosity_min = 13.0 default_gradient_threshold = 30 default_llumi = 225 default_blumi = 225 default_ratio_lum = 1.0 default_max_thickness = 100 default_threshold_intersection = 0.8 default_remove_duplicates = True def get_json_extract(full_json: dict) -> dict: """Extract 5 samples from a json dictionnary Args: full_json (dict): The full json dictionnary Returns: dict: A sub sample of the full json dictionnary containing the first 5 samples. """ extract_json = {} count = 5 for key, value in full_json.items(): extract_json[key] = value count -= 1 if count == 0: break return extract_json def save_json(data: dict, path: Path) -> None: """Save a json dictionnary to a file Args: data (dict): The json dictionnary to save path (Path): The path to the file """ with open(path, "w") as f: json.dump(data, f) def get_new_white(height: int, width: int) -> np.ndarray: """Create a new white image Args: height (int): The height of the image width (int): The width of the image Returns: np.ndarray: The new white image """ img = np.ones((height, width, 3), dtype=np.uint8) * 255 return img # fmt: off def generate_vector_output(img_rgb_input: np.ndarray, lines: List[VSegment], lines_colors: Dict[int, np.ndarray]): """Generate the vector output using the VSegment list Args: img_rgb_input (np.ndarray): Input image with 3 channels lines (List[VSegment]): The identified lines in the image lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label Returns: Tuple[np.ndarray, np.ndarray, Path, dict]: The vector output """ def draw_lines(img: np.ndarray, lines: List[VSegment]) -> np.ndarray: """Draw the lines as vector on the image Args: img (np.ndarray): The image to draw on lines (List[VSegment]): The lines to draw Returns: np.ndarray: The image with the lines drawn on it """ for line in lines: cv2.line(img, (line.x0, line.y0), (line.x1, line.y1), lines_colors[line.label].tolist(), 2) return img def get_vector_json(lines: List[VSegment]) -> dict: """Generate the json dictionnary containing the vector output Args: lines (List[VSegment]): The lines to draw Returns: dict: The json dictionnary containing the vector output """ ret = {} for line in lines: ret[str(line.label)] = {"x0": line.x0, "y0": line.y0, "x1": line.x1, "y1": line.y1} return ret img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1]) out_vector_over_img = draw_lines(img_rgb_input.copy(), lines) out_vector_label_img = draw_lines(img_empty, lines) out_vector_file = Path("vector_output_full.json") out_vector_file_full = get_vector_json(lines) save_json(out_vector_file_full, out_vector_file) out_vector_file_extract = get_json_extract(out_vector_file_full) return out_vector_over_img, out_vector_label_img, out_vector_file, out_vector_file_extract, def generate_pixel_output(img_rgb_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]): """Generate the pixel output using the LSuperposition list and the img_label Args: img_rgb_input (np.ndarray): Input image with 3 channels img_label (np.ndarray): The labelized image superpositions (List[LSuperposition]): The identified superpositions in the image lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label Returns: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The pixel output """ def draw_pixels(img: np.ndarray, img_label: np.ndarray, lines_colors: Dict[int, np.ndarray]) -> np.ndarray: """Draw the pixels as vector on the image Args: img (np.ndarray): The image to draw on img_label (np.ndarray): The labelized image lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label Returns: np.ndarray: The image with the pixels drawn on it """ for x in range(img.shape[0]): for y in range(img.shape[1]): if img_label[x, y] != 0 and img_label[x, y] != 1: img[x, y, :] = lines_colors[img_label[x, y]] return img def draw_superposition(img: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]) -> np.ndarray: """Draw the superpositions as vector on the image Args: img (np.ndarray): The image to draw on superpositions (List[LSuperposition]): The superpositions to draw lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label Returns: np.ndarray: The image with the superpositions drawn on it """ for superposition in superpositions: img[superposition.y, superposition.x, :] = lines_colors[1] return img def get_superposition_json(superpositions: List[LSuperposition]) -> dict: """Generate the json dictionnary containing the superposition output Args: superpositions (List[LSuperposition]): The superpositions Returns: dict: The json dictionnary containing the superposition output """ ret = {} for superposition in superpositions: key = f"{superposition.x}_{superposition.y}" if not key in ret: ret[key] = [] ret[key].append(superposition.label) return ret def draw_full(img: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]): """Draw the full output (pixels and superpositions) on the image Args: img (np.ndarray): The image to draw on img_label (np.ndarray): The labelized image superpositions (List[LSuperposition]): The superpositions lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label Returns: np.ndarray: The image with the full output drawn on it """ img = draw_pixels(img, img_label, lines_colors) img = draw_superposition(img, superpositions, lines_colors) return img out_pixel_full_over_img = draw_full(img_rgb_input.copy(), img_label, superpositions, lines_colors) out_pixel_line_over_img = draw_pixels(img_rgb_input.copy(), img_label, lines_colors) out_pixel_superposition_over_img = draw_superposition(img_rgb_input.copy(), superpositions, lines_colors) img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1]) out_pixel_full_img = draw_full(img_empty.copy(), img_label, superpositions, lines_colors) out_pixel_line_img = draw_pixels(img_empty.copy(), img_label, lines_colors) out_pixel_superposition_img = draw_superposition(img_empty.copy(), superpositions, lines_colors) out_pixel_file_label = Path("pixel_output_label.npy") img_label.dump(out_pixel_file_label) out_pixel_file_superposition = Path("pixel_output_superposition.json") out_pixel_file_superposition_full = get_superposition_json(superpositions) save_json(out_pixel_file_superposition_full, out_pixel_file_superposition) out_pixel_file_superposition_extract = get_json_extract(out_pixel_file_superposition_full) return out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, out_pixel_file_label, out_pixel_file_superposition, out_pixel_file_superposition_extract def generate_output(img_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines: List[VSegment]): """Generate the output using the LSuperposition list and the img_label Args: img_input (np.ndarray): Input image with 1 channel img_label (np.ndarray): The labelized image superpositions (List[LSuperposition]): The identified superpositions in the image lines (List[VSegment]): The identified lines in the image Returns: Tuple[np.ndarray, np.ndarray, Path, dict, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The complete output for gradio application """ def get_rgb_input_img(greyscale_input_img: np.ndarray) -> np.ndarray: """Convert a greyscale image to a rgb image Args: greyscale_input_img (np.ndarray): The greyscale / 1 channel image Returns: np.ndarray: The 3 channels version of the input image """ rgb_input_img: np.ndarray = np.zeros((greyscale_input_img.shape[0], greyscale_input_img.shape[1], 3), dtype=np.uint8) rgb_input_img[:, :, 0] = greyscale_input_img rgb_input_img[:, :, 1] = greyscale_input_img rgb_input_img[:, :, 2] = greyscale_input_img return rgb_input_img def generate_line_colors(lines: List[VSegment]) -> Dict[int, np.ndarray]: """Generate a color for each line Args: lines (List[VSegment]): The lines Returns: Dict[int, np.ndarray]: A dictionary containing the color for each line according to their label """ np.random.seed(0) color = np.random.randint(low=0, high=255, size=(len(lines), 3)) ret = {} ret[0] = np.array([0, 0, 0]) ret[1] = np.array([255, 0, 0]) for i, line in enumerate(lines): ret[line.label] = color[i, :].astype(np.uint8) return ret rgb_input_img: np.ndarray = get_rgb_input_img(img_input) lines_colors: Dict[int, np.ndarray] = generate_line_colors(lines) out_vector: Tuple[np.ndarray, np.ndarray, Path, dict] out_vector = generate_vector_output(rgb_input_img, lines, lines_colors) out_pixel: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict] out_pixel = generate_pixel_output(rgb_input_img, img_label, superpositions, lines_colors) return *out_vector, *out_pixel def app_function( greyscale_input_img, min_len, preprocess, tracker, traversal_mode, extraction_type, negate_image, dyn, size_mask, double_exponential_alpha, simple_moving_average_memory, exponential_moving_average_memory, one_euro_beta, one_euro_mincutoff, one_euro_dcutoff, bucket_size, nb_values_to_keep, discontinuity_relative, discontinuity_absolute, minimum_for_fusion, default_sigma_position, default_sigma_thickness, default_sigma_luminosity, min_nb_values_sigma, sigma_pos_min, sigma_thickness_min, sigma_luminosity_min, gradient_threshold, llumi, blumi, ratio_lum, max_thickness, threshold_intersection, remove_duplicates): img_label: np.ndarray superpositions: List[LSuperposition] lines: List[VSegment] def get_enum_value(enum, value): return enum.__members__[value] t0 = time.time() img_label, superpositions, lines = line_detector( greyscale_input_img, "full", min_len=int(min_len), preprocess=get_enum_value(e_segdet_preprocess, preprocess), tracker=get_enum_value(e_segdet_process_tracking, tracker), traversal_mode=get_enum_value(e_segdet_process_traversal_mode, traversal_mode), extraction_type=get_enum_value(e_segdet_process_extraction, extraction_type), negate_image=bool(negate_image), dyn=float(dyn), size_mask=int(size_mask), double_exponential_alpha=float(double_exponential_alpha), simple_moving_average_memory=int(simple_moving_average_memory), exponential_moving_average_memory=int(exponential_moving_average_memory), one_euro_beta=float(one_euro_beta), one_euro_mincutoff=float(one_euro_mincutoff), one_euro_dcutoff=float(one_euro_dcutoff), bucket_size=int(bucket_size), nb_values_to_keep=int(nb_values_to_keep), discontinuity_relative=int(discontinuity_relative), discontinuity_absolute=int(discontinuity_absolute), minimum_for_fusion=int(minimum_for_fusion), default_sigma_position=int(default_sigma_position), default_sigma_thickness=int(default_sigma_thickness), default_sigma_luminosity=int(default_sigma_luminosity), min_nb_values_sigma=int(min_nb_values_sigma), sigma_pos_min=float(sigma_pos_min), sigma_thickness_min=float(sigma_thickness_min), sigma_luminosity_min=float(sigma_luminosity_min), gradient_threshold=int(gradient_threshold), llumi=int(llumi), blumi=int(blumi), ratio_lum=float(ratio_lum), max_thickness=int(max_thickness), threshold_intersection=float(threshold_intersection), remove_duplicates=bool(remove_duplicates) ) t1 = time.time() duration = t1 - t0 outputs = generate_output(greyscale_input_img, img_label, superpositions, lines) return duration, *outputs with gr.Blocks() as app: gr.Markdown(""" # Pylena line detection demonstration This is a demonstration of the line detector described in the article *Linear Object Detection in Document Images using Multiple Object Tracking* accepted at ICDAR 2023. The article is available at: https://arxiv.org/abs/2305.16968. ## How to use this demonstration ? You can either upload your own (greyscale/8bit image) image or use one of the examples, then change the parameters and click on the run button. The complete documentation is available at: http://olena.pages.lre.epita.fr/pylena/ """) with gr.Row(): with gr.Column(): gr.Markdown("## Input") img_input = gr.Image(type="numpy", image_mode="L", label="Greyscale input image") with gr.Tab("Parameters"): with gr.Tab("Tracking"): min_len = gr.Number(label="min_len", value=default_min_len) tracker = gr.Radio(label="tracker", choices=["KALMAN", "ONE_EURO", "DOUBLE_EXPONENTIAL", "LAST_INTEGRATION", "SIMPLE_MOVING_AVERAGE", "EXPONENTIAL_MOVING_AVERAGE"], value=default_tracker) traversal_mode = gr.Radio(label="traversal_mode", choices=["HORIZONTAL_VERTICAL", "HORIZONTAL", "VERTICAL"], value=default_traversal_mode) with gr.Tab("Observation extraction"): blumi = gr.Number(label="blumi", value=default_blumi) llumi = gr.Number(label="llumi", value=default_llumi) max_thickness = gr.Number(label="max_thickness", value=default_max_thickness) with gr.Tab("Discontinuity"): discontinuity_relative = gr.Number(label="discontinuity_relative", value=default_discontinuity_relative) discontinuity_absolute = gr.Number(label="discontinuity_absolute", value=default_discontinuity_absolute) with gr.Tab("Advanced parameters"): with gr.Tab("Preprocessing"): preprocess = gr.Radio(label="preprocess", choices=["NONE", "Black top hat"], value=default_preprocess) negate_image = gr.Checkbox(label="negate_image", value=default_negate_image) dyn = gr.Number(label="dyn", value=default_dyn) size_mask = gr.Number(label="size_mask", value=default_size_mask) with gr.Tab("Tracker specific parameters"): double_exponential_alpha = gr.Number(label="double_exponential_alpha", value=default_double_exponential_alpha) simple_moving_average_memory = gr.Number(label="simple_moving_average_memory", value=default_simple_moving_average_memory) exponential_moving_average_memory = gr.Number(label="exponential_moving_average_memory", value=default_exponential_moving_average_memory) one_euro_beta = gr.Number(label="one_euro_beta", value=default_one_euro_beta) one_euro_mincutoff = gr.Number(label="one_euro_mincutoff", value=default_one_euro_mincutoff) one_euro_dcutoff = gr.Number(label="one_euro_dcutoff", value=default_one_euro_dcutoff) with gr.Tab("Tracker parameters"): nb_values_to_keep = gr.Number(label="nb_values_to_keep", value=default_nb_values_to_keep) minimum_for_fusion = gr.Number(label="minimum_for_fusion", value=default_minimum_for_fusion) with gr.Tab("Observation extraction"): extraction_type = gr.Radio(label="extraction_type", choices=["BINARY", "GRADIENT"], value="BINARY") gradient_threshold = gr.Number(label="gradient_threshold", value=default_gradient_threshold) with gr.Tab("Observation matching"): default_sigma_position = gr.Number(label="default_sigma_position", value=default_default_sigma_position) default_sigma_thickness = gr.Number(label="default_sigma_thickness", value=default_default_sigma_thickness) default_sigma_luminosity = gr.Number(label="default_sigma_luminosity", value=default_default_sigma_luminosity) min_nb_values_sigma = gr.Number(label="min_nb_values_sigma", value=default_min_nb_values_sigma) sigma_pos_min = gr.Number(label="sigma_pos_min", value=default_sigma_pos_min) sigma_thickness_min = gr.Number(label="sigma_thickness_min", value=default_sigma_thickness_min) sigma_luminosity_min = gr.Number(label="sigma_luminosity_min", value=default_sigma_luminosity_min) with gr.Tab("Extraction"): ratio_lum = gr.Number(label="ratio_lum", value=default_ratio_lum) with gr.Tab("Post Processing"): threshold_intersection = gr.Number(label="threshold_intersection", value=default_threshold_intersection) remove_duplicates = gr.Checkbox(label="remove_duplicates", value=default_remove_duplicates) with gr.Tab("Optimisation"): bucket_size = gr.Number(label="bucket_size", value=default_bucket_size) with gr.Column(): gr.Markdown("## Output") out_duration = gr.Number(label="Line detection duration (in seconds)", value=-1, interactive=False) with gr.Tab("Output Vector"): with gr.Tab("Over input"): out_vector_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("Line only"): out_vector_label_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("File"): out_vector_file = gr.File(label="Vector output full", interactive=False) out_vector_file_extract = gr.Json(label="Vector sample") with gr.Tab("Output Pixel"): with gr.Tab("Line and Superposition over input"): out_pixel_full_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("Line over input"): out_pixel_line_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("Superposition over input"): out_pixel_superposition_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("Line and Superposition"): out_pixel_full_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("Line only"): out_pixel_line_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) with gr.Tab("Superposition only"): out_pixel_superposition_img = gr.Image(type="numpy", image_mode="RGB", label="Labelized image") with gr.Tab("File"): out_pixel_file_label = gr.File(label="Pixel output full", interactive=False) out_pixel_file_superposition = gr.File(label="Pixel output full", interactive=False) out_pixel_file_superposition_extract = gr.Json(label="Superposition sample") run_button = gr.Button("Run") run_button.click( app_function, inputs=[ img_input, min_len, preprocess, tracker, traversal_mode, extraction_type, negate_image, dyn, size_mask, double_exponential_alpha, simple_moving_average_memory, exponential_moving_average_memory, one_euro_beta, one_euro_mincutoff, one_euro_dcutoff, bucket_size, nb_values_to_keep, discontinuity_relative, discontinuity_absolute, minimum_for_fusion, default_sigma_position, default_sigma_thickness, default_sigma_luminosity, min_nb_values_sigma, sigma_pos_min, sigma_thickness_min, sigma_luminosity_min, gradient_threshold, llumi, blumi, ratio_lum, max_thickness, threshold_intersection, remove_duplicates ], outputs=[ out_duration, out_vector_over_img, out_vector_label_img, out_vector_file, out_vector_file_extract, out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, out_pixel_file_label, out_pixel_file_superposition, out_pixel_file_superposition_extract ]) gr.Markdown(""" ## Examples Be aware that parameters are not reset when you change example. """) current_dir = os.path.dirname(__file__) with gr.Tab("trade_directory"): gr.Examples( examples=[[os.path.join(current_dir, "image", "trade_directories.png"), 200, 200, 200]], inputs=[img_input, blumi, llumi, min_len] ) with gr.Tab("music_sheet"): gr.Examples( examples=[[os.path.join(current_dir, "image", "music_sheet.png"), 30, 5, 20, "HORIZONTAL"]], inputs=[img_input, discontinuity_relative, max_thickness, min_len, traversal_mode] ) with gr.Tab("map"): gr.Examples( examples=[[os.path.join(current_dir, "image", "map.png"), 4, 180, 180, 20, 6]], inputs=[img_input, discontinuity_relative, blumi, llumi, min_len, max_thickness] ) gr.Markdown(""" ## A question ? If you have any question, please contact us at: """) # fmt: on app.launch()