Spaces:
Runtime error
Runtime error
import gradio as gr | |
import cv2 | |
import json | |
import os | |
import numpy as np | |
from pathlib import Path | |
from pylena.scribo import line_detector | |
from pylena.scribo import VSegment, LSuperposition | |
from pylena.scribo import e_segdet_preprocess, e_segdet_process_extraction, e_segdet_process_tracking, e_segdet_process_traversal_mode | |
import time | |
from typing import List, Tuple, Dict | |
# Define all the default values | |
default_min_len = 10 | |
default_preprocess = "NONE" | |
default_tracker = "KALMAN" | |
default_traversal_mode = "HORIZONTAL_VERTICAL" | |
default_extraction_type = "BINARY" | |
default_negate_image = False | |
default_dyn = 0.6 | |
default_size_mask = 11 | |
default_double_exponential_alpha = 0.6 | |
default_simple_moving_average_memory = 30.0 | |
default_exponential_moving_average_memory = 16.0 | |
default_one_euro_beta = 0.007 | |
default_one_euro_mincutoff = 1.0 | |
default_one_euro_dcutoff = 1.0 | |
default_bucket_size = 32 | |
default_nb_values_to_keep = 30 | |
default_discontinuity_relative = 0 | |
default_discontinuity_absolute = 0 | |
default_minimum_for_fusion = 15 | |
default_default_sigma_position = 2 | |
default_default_sigma_thickness = 2 | |
default_default_sigma_luminosity = 57 | |
default_min_nb_values_sigma = 10 | |
default_sigma_pos_min = 1.0 | |
default_sigma_thickness_min = 0.64 | |
default_sigma_luminosity_min = 13.0 | |
default_gradient_threshold = 30 | |
default_llumi = 225 | |
default_blumi = 225 | |
default_ratio_lum = 1.0 | |
default_max_thickness = 100 | |
default_threshold_intersection = 0.8 | |
default_remove_duplicates = True | |
def get_json_extract(full_json: dict) -> dict: | |
"""Extract 5 samples from a json dictionnary | |
Args: | |
full_json (dict): The full json dictionnary | |
Returns: | |
dict: A sub sample of the full json dictionnary containing the first 5 samples. | |
""" | |
extract_json = {} | |
count = 5 | |
for key, value in full_json.items(): | |
extract_json[key] = value | |
count -= 1 | |
if count == 0: | |
break | |
return extract_json | |
def save_json(data: dict, path: Path) -> None: | |
"""Save a json dictionnary to a file | |
Args: | |
data (dict): The json dictionnary to save | |
path (Path): The path to the file | |
""" | |
with open(path, "w") as f: | |
json.dump(data, f) | |
def get_new_white(height: int, width: int) -> np.ndarray: | |
"""Create a new white image | |
Args: | |
height (int): The height of the image | |
width (int): The width of the image | |
Returns: | |
np.ndarray: The new white image | |
""" | |
img = np.ones((height, width, 3), dtype=np.uint8) * 255 | |
return img | |
# fmt: off | |
def generate_vector_output(img_rgb_input: np.ndarray, lines: List[VSegment], lines_colors: Dict[int, np.ndarray]): | |
"""Generate the vector output using the VSegment list | |
Args: | |
img_rgb_input (np.ndarray): Input image with 3 channels | |
lines (List[VSegment]): The identified lines in the image | |
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label | |
Returns: | |
Tuple[np.ndarray, np.ndarray, Path, dict]: The vector output | |
""" | |
def draw_lines(img: np.ndarray, lines: List[VSegment]) -> np.ndarray: | |
"""Draw the lines as vector on the image | |
Args: | |
img (np.ndarray): The image to draw on | |
lines (List[VSegment]): The lines to draw | |
Returns: | |
np.ndarray: The image with the lines drawn on it | |
""" | |
for line in lines: | |
cv2.line(img, (line.x0, line.y0), (line.x1, line.y1), lines_colors[line.label].tolist(), 2) | |
return img | |
def get_vector_json(lines: List[VSegment]) -> dict: | |
"""Generate the json dictionnary containing the vector output | |
Args: | |
lines (List[VSegment]): The lines to draw | |
Returns: | |
dict: The json dictionnary containing the vector output | |
""" | |
ret = {} | |
for line in lines: | |
ret[str(line.label)] = {"x0": line.x0, "y0": line.y0, "x1": line.x1, "y1": line.y1} | |
return ret | |
img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1]) | |
out_vector_over_img = draw_lines(img_rgb_input.copy(), lines) | |
out_vector_label_img = draw_lines(img_empty, lines) | |
out_vector_file = Path("vector_output_full.json") | |
out_vector_file_full = get_vector_json(lines) | |
save_json(out_vector_file_full, out_vector_file) | |
out_vector_file_extract = get_json_extract(out_vector_file_full) | |
return out_vector_over_img, out_vector_label_img, out_vector_file, out_vector_file_extract, | |
def generate_pixel_output(img_rgb_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]): | |
"""Generate the pixel output using the LSuperposition list and the img_label | |
Args: | |
img_rgb_input (np.ndarray): Input image with 3 channels | |
img_label (np.ndarray): The labelized image | |
superpositions (List[LSuperposition]): The identified superpositions in the image | |
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label | |
Returns: | |
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The pixel output | |
""" | |
def draw_pixels(img: np.ndarray, img_label: np.ndarray, lines_colors: Dict[int, np.ndarray]) -> np.ndarray: | |
"""Draw the pixels as vector on the image | |
Args: | |
img (np.ndarray): The image to draw on | |
img_label (np.ndarray): The labelized image | |
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label | |
Returns: | |
np.ndarray: The image with the pixels drawn on it | |
""" | |
for x in range(img.shape[0]): | |
for y in range(img.shape[1]): | |
if img_label[x, y] != 0 and img_label[x, y] != 1: | |
img[x, y, :] = lines_colors[img_label[x, y]] | |
return img | |
def draw_superposition(img: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]) -> np.ndarray: | |
"""Draw the superpositions as vector on the image | |
Args: | |
img (np.ndarray): The image to draw on | |
superpositions (List[LSuperposition]): The superpositions to draw | |
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label | |
Returns: | |
np.ndarray: The image with the superpositions drawn on it | |
""" | |
for superposition in superpositions: | |
img[superposition.y, superposition.x, :] = lines_colors[1] | |
return img | |
def get_superposition_json(superpositions: List[LSuperposition]) -> dict: | |
"""Generate the json dictionnary containing the superposition output | |
Args: | |
superpositions (List[LSuperposition]): The superpositions | |
Returns: | |
dict: The json dictionnary containing the superposition output | |
""" | |
ret = {} | |
for superposition in superpositions: | |
key = f"{superposition.x}_{superposition.y}" | |
if not key in ret: | |
ret[key] = [] | |
ret[key].append(superposition.label) | |
return ret | |
def draw_full(img: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]): | |
"""Draw the full output (pixels and superpositions) on the image | |
Args: | |
img (np.ndarray): The image to draw on | |
img_label (np.ndarray): The labelized image | |
superpositions (List[LSuperposition]): The superpositions | |
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label | |
Returns: | |
np.ndarray: The image with the full output drawn on it | |
""" | |
img = draw_pixels(img, img_label, lines_colors) | |
img = draw_superposition(img, superpositions, lines_colors) | |
return img | |
out_pixel_full_over_img = draw_full(img_rgb_input.copy(), img_label, superpositions, lines_colors) | |
out_pixel_line_over_img = draw_pixels(img_rgb_input.copy(), img_label, lines_colors) | |
out_pixel_superposition_over_img = draw_superposition(img_rgb_input.copy(), superpositions, lines_colors) | |
img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1]) | |
out_pixel_full_img = draw_full(img_empty.copy(), img_label, superpositions, lines_colors) | |
out_pixel_line_img = draw_pixels(img_empty.copy(), img_label, lines_colors) | |
out_pixel_superposition_img = draw_superposition(img_empty.copy(), superpositions, lines_colors) | |
out_pixel_file_label = Path("pixel_output_label.npy") | |
img_label.dump(out_pixel_file_label) | |
out_pixel_file_superposition = Path("pixel_output_superposition.json") | |
out_pixel_file_superposition_full = get_superposition_json(superpositions) | |
save_json(out_pixel_file_superposition_full, out_pixel_file_superposition) | |
out_pixel_file_superposition_extract = get_json_extract(out_pixel_file_superposition_full) | |
return out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, out_pixel_file_label, out_pixel_file_superposition, out_pixel_file_superposition_extract | |
def generate_output(img_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines: List[VSegment]): | |
"""Generate the output using the LSuperposition list and the img_label | |
Args: | |
img_input (np.ndarray): Input image with 1 channel | |
img_label (np.ndarray): The labelized image | |
superpositions (List[LSuperposition]): The identified superpositions in the image | |
lines (List[VSegment]): The identified lines in the image | |
Returns: | |
Tuple[np.ndarray, np.ndarray, Path, dict, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The complete output for gradio application | |
""" | |
def get_rgb_input_img(greyscale_input_img: np.ndarray) -> np.ndarray: | |
"""Convert a greyscale image to a rgb image | |
Args: | |
greyscale_input_img (np.ndarray): The greyscale / 1 channel image | |
Returns: | |
np.ndarray: The 3 channels version of the input image | |
""" | |
rgb_input_img: np.ndarray = np.zeros((greyscale_input_img.shape[0], greyscale_input_img.shape[1], 3), dtype=np.uint8) | |
rgb_input_img[:, :, 0] = greyscale_input_img | |
rgb_input_img[:, :, 1] = greyscale_input_img | |
rgb_input_img[:, :, 2] = greyscale_input_img | |
return rgb_input_img | |
def generate_line_colors(lines: List[VSegment]) -> Dict[int, np.ndarray]: | |
"""Generate a color for each line | |
Args: | |
lines (List[VSegment]): The lines | |
Returns: | |
Dict[int, np.ndarray]: A dictionary containing the color for each line according to their label | |
""" | |
np.random.seed(0) | |
color = np.random.randint(low=0, high=255, size=(len(lines), 3)) | |
ret = {} | |
ret[0] = np.array([0, 0, 0]) | |
ret[1] = np.array([255, 0, 0]) | |
for i, line in enumerate(lines): | |
ret[line.label] = color[i, :].astype(np.uint8) | |
return ret | |
rgb_input_img: np.ndarray = get_rgb_input_img(img_input) | |
lines_colors: Dict[int, np.ndarray] = generate_line_colors(lines) | |
out_vector: Tuple[np.ndarray, np.ndarray, Path, dict] | |
out_vector = generate_vector_output(rgb_input_img, lines, lines_colors) | |
out_pixel: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict] | |
out_pixel = generate_pixel_output(rgb_input_img, img_label, superpositions, lines_colors) | |
return *out_vector, *out_pixel | |
def app_function( | |
greyscale_input_img, | |
min_len, | |
preprocess, | |
tracker, | |
traversal_mode, | |
extraction_type, | |
negate_image, | |
dyn, | |
size_mask, | |
double_exponential_alpha, | |
simple_moving_average_memory, | |
exponential_moving_average_memory, | |
one_euro_beta, | |
one_euro_mincutoff, | |
one_euro_dcutoff, | |
bucket_size, | |
nb_values_to_keep, | |
discontinuity_relative, | |
discontinuity_absolute, | |
minimum_for_fusion, | |
default_sigma_position, | |
default_sigma_thickness, | |
default_sigma_luminosity, | |
min_nb_values_sigma, | |
sigma_pos_min, | |
sigma_thickness_min, | |
sigma_luminosity_min, | |
gradient_threshold, | |
llumi, | |
blumi, | |
ratio_lum, | |
max_thickness, | |
threshold_intersection, | |
remove_duplicates): | |
img_label: np.ndarray | |
superpositions: List[LSuperposition] | |
lines: List[VSegment] | |
def get_enum_value(enum, value): | |
return enum.__members__[value] | |
t0 = time.time() | |
img_label, superpositions, lines = line_detector( | |
greyscale_input_img, "full", | |
min_len=int(min_len), | |
preprocess=get_enum_value(e_segdet_preprocess, preprocess), | |
tracker=get_enum_value(e_segdet_process_tracking, tracker), | |
traversal_mode=get_enum_value(e_segdet_process_traversal_mode, traversal_mode), | |
extraction_type=get_enum_value(e_segdet_process_extraction, extraction_type), | |
negate_image=bool(negate_image), | |
dyn=float(dyn), | |
size_mask=int(size_mask), | |
double_exponential_alpha=float(double_exponential_alpha), | |
simple_moving_average_memory=int(simple_moving_average_memory), | |
exponential_moving_average_memory=int(exponential_moving_average_memory), | |
one_euro_beta=float(one_euro_beta), | |
one_euro_mincutoff=float(one_euro_mincutoff), | |
one_euro_dcutoff=float(one_euro_dcutoff), | |
bucket_size=int(bucket_size), | |
nb_values_to_keep=int(nb_values_to_keep), | |
discontinuity_relative=int(discontinuity_relative), | |
discontinuity_absolute=int(discontinuity_absolute), | |
minimum_for_fusion=int(minimum_for_fusion), | |
default_sigma_position=int(default_sigma_position), | |
default_sigma_thickness=int(default_sigma_thickness), | |
default_sigma_luminosity=int(default_sigma_luminosity), | |
min_nb_values_sigma=int(min_nb_values_sigma), | |
sigma_pos_min=float(sigma_pos_min), | |
sigma_thickness_min=float(sigma_thickness_min), | |
sigma_luminosity_min=float(sigma_luminosity_min), | |
gradient_threshold=int(gradient_threshold), | |
llumi=int(llumi), | |
blumi=int(blumi), | |
ratio_lum=float(ratio_lum), | |
max_thickness=int(max_thickness), | |
threshold_intersection=float(threshold_intersection), | |
remove_duplicates=bool(remove_duplicates) | |
) | |
t1 = time.time() | |
duration = t1 - t0 | |
outputs = generate_output(greyscale_input_img, img_label, superpositions, lines) | |
return duration, *outputs | |
with gr.Blocks() as app: | |
gr.Markdown(""" | |
# Pylena line detection demonstration | |
This is a demonstration of the line detector described in the article *Linear Object Detection in Document Images using Multiple Object Tracking* | |
accepted at ICDAR 2023. The article is available at: https://arxiv.org/abs/2305.16968. | |
## How to use this demonstration ? | |
You can either upload your own (greyscale/8bit image) image or use one of the examples, then change the parameters and click on the run button. | |
The complete documentation is available at: http://olena.pages.lre.epita.fr/pylena/ | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Input") | |
img_input = gr.Image(type="numpy", image_mode="L", label="Greyscale input image") | |
with gr.Tab("Parameters"): | |
with gr.Tab("Tracking"): | |
min_len = gr.Number(label="min_len", value=default_min_len) | |
tracker = gr.Radio(label="tracker", choices=["KALMAN", "ONE_EURO", "DOUBLE_EXPONENTIAL", "LAST_INTEGRATION", "SIMPLE_MOVING_AVERAGE", "EXPONENTIAL_MOVING_AVERAGE"], value=default_tracker) | |
traversal_mode = gr.Radio(label="traversal_mode", choices=["HORIZONTAL_VERTICAL", "HORIZONTAL", "VERTICAL"], value=default_traversal_mode) | |
with gr.Tab("Observation extraction"): | |
blumi = gr.Number(label="blumi", value=default_blumi) | |
llumi = gr.Number(label="llumi", value=default_llumi) | |
max_thickness = gr.Number(label="max_thickness", value=default_max_thickness) | |
with gr.Tab("Discontinuity"): | |
discontinuity_relative = gr.Number(label="discontinuity_relative", value=default_discontinuity_relative) | |
discontinuity_absolute = gr.Number(label="discontinuity_absolute", value=default_discontinuity_absolute) | |
with gr.Tab("Advanced parameters"): | |
with gr.Tab("Preprocessing"): | |
preprocess = gr.Radio(label="preprocess", choices=["NONE", "Black top hat"], value=default_preprocess) | |
negate_image = gr.Checkbox(label="negate_image", value=default_negate_image) | |
dyn = gr.Number(label="dyn", value=default_dyn) | |
size_mask = gr.Number(label="size_mask", value=default_size_mask) | |
with gr.Tab("Tracker specific parameters"): | |
double_exponential_alpha = gr.Number(label="double_exponential_alpha", value=default_double_exponential_alpha) | |
simple_moving_average_memory = gr.Number(label="simple_moving_average_memory", value=default_simple_moving_average_memory) | |
exponential_moving_average_memory = gr.Number(label="exponential_moving_average_memory", value=default_exponential_moving_average_memory) | |
one_euro_beta = gr.Number(label="one_euro_beta", value=default_one_euro_beta) | |
one_euro_mincutoff = gr.Number(label="one_euro_mincutoff", value=default_one_euro_mincutoff) | |
one_euro_dcutoff = gr.Number(label="one_euro_dcutoff", value=default_one_euro_dcutoff) | |
with gr.Tab("Tracker parameters"): | |
nb_values_to_keep = gr.Number(label="nb_values_to_keep", value=default_nb_values_to_keep) | |
minimum_for_fusion = gr.Number(label="minimum_for_fusion", value=default_minimum_for_fusion) | |
with gr.Tab("Observation extraction"): | |
extraction_type = gr.Radio(label="extraction_type", choices=["BINARY", "GRADIENT"], value="BINARY") | |
gradient_threshold = gr.Number(label="gradient_threshold", value=default_gradient_threshold) | |
with gr.Tab("Observation matching"): | |
default_sigma_position = gr.Number(label="default_sigma_position", value=default_default_sigma_position) | |
default_sigma_thickness = gr.Number(label="default_sigma_thickness", value=default_default_sigma_thickness) | |
default_sigma_luminosity = gr.Number(label="default_sigma_luminosity", value=default_default_sigma_luminosity) | |
min_nb_values_sigma = gr.Number(label="min_nb_values_sigma", value=default_min_nb_values_sigma) | |
sigma_pos_min = gr.Number(label="sigma_pos_min", value=default_sigma_pos_min) | |
sigma_thickness_min = gr.Number(label="sigma_thickness_min", value=default_sigma_thickness_min) | |
sigma_luminosity_min = gr.Number(label="sigma_luminosity_min", value=default_sigma_luminosity_min) | |
with gr.Tab("Extraction"): | |
ratio_lum = gr.Number(label="ratio_lum", value=default_ratio_lum) | |
with gr.Tab("Post Processing"): | |
threshold_intersection = gr.Number(label="threshold_intersection", value=default_threshold_intersection) | |
remove_duplicates = gr.Checkbox(label="remove_duplicates", value=default_remove_duplicates) | |
with gr.Tab("Optimisation"): | |
bucket_size = gr.Number(label="bucket_size", value=default_bucket_size) | |
with gr.Column(): | |
gr.Markdown("## Output") | |
out_duration = gr.Number(label="Line detection duration (in seconds)", value=-1, interactive=False) | |
with gr.Tab("Output Vector"): | |
with gr.Tab("Over input"): | |
out_vector_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("Line only"): | |
out_vector_label_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("File"): | |
out_vector_file = gr.File(label="Vector output full", interactive=False) | |
out_vector_file_extract = gr.Json(label="Vector sample") | |
with gr.Tab("Output Pixel"): | |
with gr.Tab("Line and Superposition over input"): | |
out_pixel_full_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("Line over input"): | |
out_pixel_line_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("Superposition over input"): | |
out_pixel_superposition_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("Line and Superposition"): | |
out_pixel_full_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("Line only"): | |
out_pixel_line_img = gr.Image(type="numpy", image_mode="RGB", interactive=False) | |
with gr.Tab("Superposition only"): | |
out_pixel_superposition_img = gr.Image(type="numpy", image_mode="RGB", label="Labelized image") | |
with gr.Tab("File"): | |
out_pixel_file_label = gr.File(label="Pixel output full", interactive=False) | |
out_pixel_file_superposition = gr.File(label="Pixel output full", interactive=False) | |
out_pixel_file_superposition_extract = gr.Json(label="Superposition sample") | |
run_button = gr.Button("Run") | |
run_button.click( | |
app_function, | |
inputs=[ | |
img_input, | |
min_len, | |
preprocess, | |
tracker, | |
traversal_mode, | |
extraction_type, | |
negate_image, | |
dyn, | |
size_mask, | |
double_exponential_alpha, | |
simple_moving_average_memory, | |
exponential_moving_average_memory, | |
one_euro_beta, | |
one_euro_mincutoff, | |
one_euro_dcutoff, | |
bucket_size, | |
nb_values_to_keep, | |
discontinuity_relative, | |
discontinuity_absolute, | |
minimum_for_fusion, | |
default_sigma_position, | |
default_sigma_thickness, | |
default_sigma_luminosity, | |
min_nb_values_sigma, | |
sigma_pos_min, | |
sigma_thickness_min, | |
sigma_luminosity_min, | |
gradient_threshold, | |
llumi, | |
blumi, | |
ratio_lum, | |
max_thickness, | |
threshold_intersection, | |
remove_duplicates | |
], | |
outputs=[ | |
out_duration, | |
out_vector_over_img, out_vector_label_img, | |
out_vector_file, out_vector_file_extract, | |
out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, | |
out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, | |
out_pixel_file_label, | |
out_pixel_file_superposition, out_pixel_file_superposition_extract | |
]) | |
gr.Markdown(""" | |
## Examples | |
Be aware that parameters are not reset when you change example. | |
""") | |
current_dir = os.path.dirname(__file__) | |
with gr.Tab("trade_directory"): | |
gr.Examples( | |
examples=[[os.path.join(current_dir, "image", "trade_directories.png"), 200, 200, 200]], | |
inputs=[img_input, blumi, llumi, min_len] | |
) | |
with gr.Tab("music_sheet"): | |
gr.Examples( | |
examples=[[os.path.join(current_dir, "image", "music_sheet.png"), 30, 5, 20, "HORIZONTAL"]], | |
inputs=[img_input, discontinuity_relative, max_thickness, min_len, traversal_mode] | |
) | |
with gr.Tab("map"): | |
gr.Examples( | |
examples=[[os.path.join(current_dir, "image", "map.png"), 4, 180, 180, 20, 6]], | |
inputs=[img_input, discontinuity_relative, blumi, llumi, min_len, max_thickness] | |
) | |
gr.Markdown(""" | |
## A question ? | |
If you have any question, please contact us at: <philippe.bernet@epita.fr> | |
""") | |
# fmt: on | |
app.launch() | |