Sefray's picture
v1
1bae6d9
raw
history blame
24.8 kB
import gradio as gr
import cv2
import json
import os
import numpy as np
from pathlib import Path
from pylena.scribo import line_detector
from pylena.scribo import VSegment, LSuperposition
from pylena.scribo import e_segdet_preprocess, e_segdet_process_extraction, e_segdet_process_tracking, e_segdet_process_traversal_mode
import time
from typing import List, Tuple, Dict
# Define all the default values
default_min_len = 10
default_preprocess = "NONE"
default_tracker = "KALMAN"
default_traversal_mode = "HORIZONTAL_VERTICAL"
default_extraction_type = "BINARY"
default_negate_image = False
default_dyn = 0.6
default_size_mask = 11
default_double_exponential_alpha = 0.6
default_simple_moving_average_memory = 30.0
default_exponential_moving_average_memory = 16.0
default_one_euro_beta = 0.007
default_one_euro_mincutoff = 1.0
default_one_euro_dcutoff = 1.0
default_bucket_size = 32
default_nb_values_to_keep = 30
default_discontinuity_relative = 0
default_discontinuity_absolute = 0
default_minimum_for_fusion = 15
default_default_sigma_position = 2
default_default_sigma_thickness = 2
default_default_sigma_luminosity = 57
default_min_nb_values_sigma = 10
default_sigma_pos_min = 1.0
default_sigma_thickness_min = 0.64
default_sigma_luminosity_min = 13.0
default_gradient_threshold = 30
default_llumi = 225
default_blumi = 225
default_ratio_lum = 1.0
default_max_thickness = 100
default_threshold_intersection = 0.8
default_remove_duplicates = True
def get_json_extract(full_json: dict) -> dict:
"""Extract 5 samples from a json dictionnary
Args:
full_json (dict): The full json dictionnary
Returns:
dict: A sub sample of the full json dictionnary containing the first 5 samples.
"""
extract_json = {}
count = 5
for key, value in full_json.items():
extract_json[key] = value
count -= 1
if count == 0:
break
return extract_json
def save_json(data: dict, path: Path) -> None:
"""Save a json dictionnary to a file
Args:
data (dict): The json dictionnary to save
path (Path): The path to the file
"""
with open(path, "w") as f:
json.dump(data, f)
def get_new_white(height: int, width: int) -> np.ndarray:
"""Create a new white image
Args:
height (int): The height of the image
width (int): The width of the image
Returns:
np.ndarray: The new white image
"""
img = np.ones((height, width, 3), dtype=np.uint8) * 255
return img
# fmt: off
def generate_vector_output(img_rgb_input: np.ndarray, lines: List[VSegment], lines_colors: Dict[int, np.ndarray]):
"""Generate the vector output using the VSegment list
Args:
img_rgb_input (np.ndarray): Input image with 3 channels
lines (List[VSegment]): The identified lines in the image
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
Tuple[np.ndarray, np.ndarray, Path, dict]: The vector output
"""
def draw_lines(img: np.ndarray, lines: List[VSegment]) -> np.ndarray:
"""Draw the lines as vector on the image
Args:
img (np.ndarray): The image to draw on
lines (List[VSegment]): The lines to draw
Returns:
np.ndarray: The image with the lines drawn on it
"""
for line in lines:
cv2.line(img, (line.x0, line.y0), (line.x1, line.y1), lines_colors[line.label].tolist(), 2)
return img
def get_vector_json(lines: List[VSegment]) -> dict:
"""Generate the json dictionnary containing the vector output
Args:
lines (List[VSegment]): The lines to draw
Returns:
dict: The json dictionnary containing the vector output
"""
ret = {}
for line in lines:
ret[str(line.label)] = {"x0": line.x0, "y0": line.y0, "x1": line.x1, "y1": line.y1}
return ret
img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1])
out_vector_over_img = draw_lines(img_rgb_input.copy(), lines)
out_vector_label_img = draw_lines(img_empty, lines)
out_vector_file = Path("vector_output_full.json")
out_vector_file_full = get_vector_json(lines)
save_json(out_vector_file_full, out_vector_file)
out_vector_file_extract = get_json_extract(out_vector_file_full)
return out_vector_over_img, out_vector_label_img, out_vector_file, out_vector_file_extract,
def generate_pixel_output(img_rgb_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]):
"""Generate the pixel output using the LSuperposition list and the img_label
Args:
img_rgb_input (np.ndarray): Input image with 3 channels
img_label (np.ndarray): The labelized image
superpositions (List[LSuperposition]): The identified superpositions in the image
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The pixel output
"""
def draw_pixels(img: np.ndarray, img_label: np.ndarray, lines_colors: Dict[int, np.ndarray]) -> np.ndarray:
"""Draw the pixels as vector on the image
Args:
img (np.ndarray): The image to draw on
img_label (np.ndarray): The labelized image
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
np.ndarray: The image with the pixels drawn on it
"""
for x in range(img.shape[0]):
for y in range(img.shape[1]):
if img_label[x, y] != 0 and img_label[x, y] != 1:
img[x, y, :] = lines_colors[img_label[x, y]]
return img
def draw_superposition(img: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]) -> np.ndarray:
"""Draw the superpositions as vector on the image
Args:
img (np.ndarray): The image to draw on
superpositions (List[LSuperposition]): The superpositions to draw
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
np.ndarray: The image with the superpositions drawn on it
"""
for superposition in superpositions:
img[superposition.y, superposition.x, :] = lines_colors[1]
return img
def get_superposition_json(superpositions: List[LSuperposition]) -> dict:
"""Generate the json dictionnary containing the superposition output
Args:
superpositions (List[LSuperposition]): The superpositions
Returns:
dict: The json dictionnary containing the superposition output
"""
ret = {}
for superposition in superpositions:
key = f"{superposition.x}_{superposition.y}"
if not key in ret:
ret[key] = []
ret[key].append(superposition.label)
return ret
def draw_full(img: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]):
"""Draw the full output (pixels and superpositions) on the image
Args:
img (np.ndarray): The image to draw on
img_label (np.ndarray): The labelized image
superpositions (List[LSuperposition]): The superpositions
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
np.ndarray: The image with the full output drawn on it
"""
img = draw_pixels(img, img_label, lines_colors)
img = draw_superposition(img, superpositions, lines_colors)
return img
out_pixel_full_over_img = draw_full(img_rgb_input.copy(), img_label, superpositions, lines_colors)
out_pixel_line_over_img = draw_pixels(img_rgb_input.copy(), img_label, lines_colors)
out_pixel_superposition_over_img = draw_superposition(img_rgb_input.copy(), superpositions, lines_colors)
img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1])
out_pixel_full_img = draw_full(img_empty.copy(), img_label, superpositions, lines_colors)
out_pixel_line_img = draw_pixels(img_empty.copy(), img_label, lines_colors)
out_pixel_superposition_img = draw_superposition(img_empty.copy(), superpositions, lines_colors)
out_pixel_file_label = Path("pixel_output_label.npy")
img_label.dump(out_pixel_file_label)
out_pixel_file_superposition = Path("pixel_output_superposition.json")
out_pixel_file_superposition_full = get_superposition_json(superpositions)
save_json(out_pixel_file_superposition_full, out_pixel_file_superposition)
out_pixel_file_superposition_extract = get_json_extract(out_pixel_file_superposition_full)
return out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, out_pixel_file_label, out_pixel_file_superposition, out_pixel_file_superposition_extract
def generate_output(img_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines: List[VSegment]):
"""Generate the output using the LSuperposition list and the img_label
Args:
img_input (np.ndarray): Input image with 1 channel
img_label (np.ndarray): The labelized image
superpositions (List[LSuperposition]): The identified superpositions in the image
lines (List[VSegment]): The identified lines in the image
Returns:
Tuple[np.ndarray, np.ndarray, Path, dict, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The complete output for gradio application
"""
def get_rgb_input_img(greyscale_input_img: np.ndarray) -> np.ndarray:
"""Convert a greyscale image to a rgb image
Args:
greyscale_input_img (np.ndarray): The greyscale / 1 channel image
Returns:
np.ndarray: The 3 channels version of the input image
"""
rgb_input_img: np.ndarray = np.zeros((greyscale_input_img.shape[0], greyscale_input_img.shape[1], 3), dtype=np.uint8)
rgb_input_img[:, :, 0] = greyscale_input_img
rgb_input_img[:, :, 1] = greyscale_input_img
rgb_input_img[:, :, 2] = greyscale_input_img
return rgb_input_img
def generate_line_colors(lines: List[VSegment]) -> Dict[int, np.ndarray]:
"""Generate a color for each line
Args:
lines (List[VSegment]): The lines
Returns:
Dict[int, np.ndarray]: A dictionary containing the color for each line according to their label
"""
np.random.seed(0)
color = np.random.randint(low=0, high=255, size=(len(lines), 3))
ret = {}
ret[0] = np.array([0, 0, 0])
ret[1] = np.array([255, 0, 0])
for i, line in enumerate(lines):
ret[line.label] = color[i, :].astype(np.uint8)
return ret
rgb_input_img: np.ndarray = get_rgb_input_img(img_input)
lines_colors: Dict[int, np.ndarray] = generate_line_colors(lines)
out_vector: Tuple[np.ndarray, np.ndarray, Path, dict]
out_vector = generate_vector_output(rgb_input_img, lines, lines_colors)
out_pixel: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]
out_pixel = generate_pixel_output(rgb_input_img, img_label, superpositions, lines_colors)
return *out_vector, *out_pixel
def app_function(
greyscale_input_img,
min_len,
preprocess,
tracker,
traversal_mode,
extraction_type,
negate_image,
dyn,
size_mask,
double_exponential_alpha,
simple_moving_average_memory,
exponential_moving_average_memory,
one_euro_beta,
one_euro_mincutoff,
one_euro_dcutoff,
bucket_size,
nb_values_to_keep,
discontinuity_relative,
discontinuity_absolute,
minimum_for_fusion,
default_sigma_position,
default_sigma_thickness,
default_sigma_luminosity,
min_nb_values_sigma,
sigma_pos_min,
sigma_thickness_min,
sigma_luminosity_min,
gradient_threshold,
llumi,
blumi,
ratio_lum,
max_thickness,
threshold_intersection,
remove_duplicates):
img_label: np.ndarray
superpositions: List[LSuperposition]
lines: List[VSegment]
def get_enum_value(enum, value):
return enum.__members__[value]
t0 = time.time()
img_label, superpositions, lines = line_detector(
greyscale_input_img, "full",
min_len=int(min_len),
preprocess=get_enum_value(e_segdet_preprocess, preprocess),
tracker=get_enum_value(e_segdet_process_tracking, tracker),
traversal_mode=get_enum_value(e_segdet_process_traversal_mode, traversal_mode),
extraction_type=get_enum_value(e_segdet_process_extraction, extraction_type),
negate_image=bool(negate_image),
dyn=float(dyn),
size_mask=int(size_mask),
double_exponential_alpha=float(double_exponential_alpha),
simple_moving_average_memory=int(simple_moving_average_memory),
exponential_moving_average_memory=int(exponential_moving_average_memory),
one_euro_beta=float(one_euro_beta),
one_euro_mincutoff=float(one_euro_mincutoff),
one_euro_dcutoff=float(one_euro_dcutoff),
bucket_size=int(bucket_size),
nb_values_to_keep=int(nb_values_to_keep),
discontinuity_relative=int(discontinuity_relative),
discontinuity_absolute=int(discontinuity_absolute),
minimum_for_fusion=int(minimum_for_fusion),
default_sigma_position=int(default_sigma_position),
default_sigma_thickness=int(default_sigma_thickness),
default_sigma_luminosity=int(default_sigma_luminosity),
min_nb_values_sigma=int(min_nb_values_sigma),
sigma_pos_min=float(sigma_pos_min),
sigma_thickness_min=float(sigma_thickness_min),
sigma_luminosity_min=float(sigma_luminosity_min),
gradient_threshold=int(gradient_threshold),
llumi=int(llumi),
blumi=int(blumi),
ratio_lum=float(ratio_lum),
max_thickness=int(max_thickness),
threshold_intersection=float(threshold_intersection),
remove_duplicates=bool(remove_duplicates)
)
t1 = time.time()
duration = t1 - t0
outputs = generate_output(greyscale_input_img, img_label, superpositions, lines)
return duration, *outputs
with gr.Blocks() as app:
gr.Markdown("""
# Pylena line detection demonstration
This is a demonstration of the line detector described in the article *Linear Object Detection in Document Images using Multiple Object Tracking*
accepted at ICDAR 2023. The article is available at: https://arxiv.org/abs/2305.16968.
## How to use this demonstration ?
You can either upload your own (greyscale/8bit image) image or use one of the examples, then change the parameters and click on the run button.
The complete documentation is available at: http://olena.pages.lre.epita.fr/pylena/
""")
with gr.Row():
with gr.Column():
gr.Markdown("## Input")
img_input = gr.Image(type="numpy", image_mode="L", label="Greyscale input image")
with gr.Tab("Parameters"):
with gr.Tab("Tracking"):
min_len = gr.Number(label="min_len", value=default_min_len)
tracker = gr.Radio(label="tracker", choices=["KALMAN", "ONE_EURO", "DOUBLE_EXPONENTIAL", "LAST_INTEGRATION", "SIMPLE_MOVING_AVERAGE", "EXPONENTIAL_MOVING_AVERAGE"], value=default_tracker)
traversal_mode = gr.Radio(label="traversal_mode", choices=["HORIZONTAL_VERTICAL", "HORIZONTAL", "VERTICAL"], value=default_traversal_mode)
with gr.Tab("Observation extraction"):
blumi = gr.Number(label="blumi", value=default_blumi)
llumi = gr.Number(label="llumi", value=default_llumi)
max_thickness = gr.Number(label="max_thickness", value=default_max_thickness)
with gr.Tab("Discontinuity"):
discontinuity_relative = gr.Number(label="discontinuity_relative", value=default_discontinuity_relative)
discontinuity_absolute = gr.Number(label="discontinuity_absolute", value=default_discontinuity_absolute)
with gr.Tab("Advanced parameters"):
with gr.Tab("Preprocessing"):
preprocess = gr.Radio(label="preprocess", choices=["NONE", "Black top hat"], value=default_preprocess)
negate_image = gr.Checkbox(label="negate_image", value=default_negate_image)
dyn = gr.Number(label="dyn", value=default_dyn)
size_mask = gr.Number(label="size_mask", value=default_size_mask)
with gr.Tab("Tracker specific parameters"):
double_exponential_alpha = gr.Number(label="double_exponential_alpha", value=default_double_exponential_alpha)
simple_moving_average_memory = gr.Number(label="simple_moving_average_memory", value=default_simple_moving_average_memory)
exponential_moving_average_memory = gr.Number(label="exponential_moving_average_memory", value=default_exponential_moving_average_memory)
one_euro_beta = gr.Number(label="one_euro_beta", value=default_one_euro_beta)
one_euro_mincutoff = gr.Number(label="one_euro_mincutoff", value=default_one_euro_mincutoff)
one_euro_dcutoff = gr.Number(label="one_euro_dcutoff", value=default_one_euro_dcutoff)
with gr.Tab("Tracker parameters"):
nb_values_to_keep = gr.Number(label="nb_values_to_keep", value=default_nb_values_to_keep)
minimum_for_fusion = gr.Number(label="minimum_for_fusion", value=default_minimum_for_fusion)
with gr.Tab("Observation extraction"):
extraction_type = gr.Radio(label="extraction_type", choices=["BINARY", "GRADIENT"], value="BINARY")
gradient_threshold = gr.Number(label="gradient_threshold", value=default_gradient_threshold)
with gr.Tab("Observation matching"):
default_sigma_position = gr.Number(label="default_sigma_position", value=default_default_sigma_position)
default_sigma_thickness = gr.Number(label="default_sigma_thickness", value=default_default_sigma_thickness)
default_sigma_luminosity = gr.Number(label="default_sigma_luminosity", value=default_default_sigma_luminosity)
min_nb_values_sigma = gr.Number(label="min_nb_values_sigma", value=default_min_nb_values_sigma)
sigma_pos_min = gr.Number(label="sigma_pos_min", value=default_sigma_pos_min)
sigma_thickness_min = gr.Number(label="sigma_thickness_min", value=default_sigma_thickness_min)
sigma_luminosity_min = gr.Number(label="sigma_luminosity_min", value=default_sigma_luminosity_min)
with gr.Tab("Extraction"):
ratio_lum = gr.Number(label="ratio_lum", value=default_ratio_lum)
with gr.Tab("Post Processing"):
threshold_intersection = gr.Number(label="threshold_intersection", value=default_threshold_intersection)
remove_duplicates = gr.Checkbox(label="remove_duplicates", value=default_remove_duplicates)
with gr.Tab("Optimisation"):
bucket_size = gr.Number(label="bucket_size", value=default_bucket_size)
with gr.Column():
gr.Markdown("## Output")
out_duration = gr.Number(label="Line detection duration (in seconds)", value=-1, interactive=False)
with gr.Tab("Output Vector"):
with gr.Tab("Over input"):
out_vector_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line only"):
out_vector_label_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("File"):
out_vector_file = gr.File(label="Vector output full", interactive=False)
out_vector_file_extract = gr.Json(label="Vector sample")
with gr.Tab("Output Pixel"):
with gr.Tab("Line and Superposition over input"):
out_pixel_full_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line over input"):
out_pixel_line_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Superposition over input"):
out_pixel_superposition_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line and Superposition"):
out_pixel_full_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line only"):
out_pixel_line_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Superposition only"):
out_pixel_superposition_img = gr.Image(type="numpy", image_mode="RGB", label="Labelized image")
with gr.Tab("File"):
out_pixel_file_label = gr.File(label="Pixel output full", interactive=False)
out_pixel_file_superposition = gr.File(label="Pixel output full", interactive=False)
out_pixel_file_superposition_extract = gr.Json(label="Superposition sample")
run_button = gr.Button("Run")
run_button.click(
app_function,
inputs=[
img_input,
min_len,
preprocess,
tracker,
traversal_mode,
extraction_type,
negate_image,
dyn,
size_mask,
double_exponential_alpha,
simple_moving_average_memory,
exponential_moving_average_memory,
one_euro_beta,
one_euro_mincutoff,
one_euro_dcutoff,
bucket_size,
nb_values_to_keep,
discontinuity_relative,
discontinuity_absolute,
minimum_for_fusion,
default_sigma_position,
default_sigma_thickness,
default_sigma_luminosity,
min_nb_values_sigma,
sigma_pos_min,
sigma_thickness_min,
sigma_luminosity_min,
gradient_threshold,
llumi,
blumi,
ratio_lum,
max_thickness,
threshold_intersection,
remove_duplicates
],
outputs=[
out_duration,
out_vector_over_img, out_vector_label_img,
out_vector_file, out_vector_file_extract,
out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img,
out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img,
out_pixel_file_label,
out_pixel_file_superposition, out_pixel_file_superposition_extract
])
gr.Markdown("""
## Examples
Be aware that parameters are not reset when you change example.
""")
current_dir = os.path.dirname(__file__)
with gr.Tab("trade_directory"):
gr.Examples(
examples=[[os.path.join(current_dir, "image", "trade_directories.png"), 200, 200, 200]],
inputs=[img_input, blumi, llumi, min_len]
)
with gr.Tab("music_sheet"):
gr.Examples(
examples=[[os.path.join(current_dir, "image", "music_sheet.png"), 30, 5, 20, "HORIZONTAL"]],
inputs=[img_input, discontinuity_relative, max_thickness, min_len, traversal_mode]
)
with gr.Tab("map"):
gr.Examples(
examples=[[os.path.join(current_dir, "image", "map.png"), 4, 180, 180, 20, 6]],
inputs=[img_input, discontinuity_relative, blumi, llumi, min_len, max_thickness]
)
gr.Markdown("""
## A question ?
If you have any question, please contact us at: <philippe.bernet@epita.fr>
""")
# fmt: on
app.launch()