Sefray's picture
v1
1bae6d9
raw history blame
No virus
24.8 kB
import gradio as gr
import cv2
import json
import os
import numpy as np
from pathlib import Path
from pylena.scribo import line_detector
from pylena.scribo import VSegment, LSuperposition
from pylena.scribo import e_segdet_preprocess, e_segdet_process_extraction, e_segdet_process_tracking, e_segdet_process_traversal_mode
import time
from typing import List, Tuple, Dict
# Define all the default values
default_min_len = 10
default_preprocess = "NONE"
default_tracker = "KALMAN"
default_traversal_mode = "HORIZONTAL_VERTICAL"
default_extraction_type = "BINARY"
default_negate_image = False
default_dyn = 0.6
default_size_mask = 11
default_double_exponential_alpha = 0.6
default_simple_moving_average_memory = 30.0
default_exponential_moving_average_memory = 16.0
default_one_euro_beta = 0.007
default_one_euro_mincutoff = 1.0
default_one_euro_dcutoff = 1.0
default_bucket_size = 32
default_nb_values_to_keep = 30
default_discontinuity_relative = 0
default_discontinuity_absolute = 0
default_minimum_for_fusion = 15
default_default_sigma_position = 2
default_default_sigma_thickness = 2
default_default_sigma_luminosity = 57
default_min_nb_values_sigma = 10
default_sigma_pos_min = 1.0
default_sigma_thickness_min = 0.64
default_sigma_luminosity_min = 13.0
default_gradient_threshold = 30
default_llumi = 225
default_blumi = 225
default_ratio_lum = 1.0
default_max_thickness = 100
default_threshold_intersection = 0.8
default_remove_duplicates = True
def get_json_extract(full_json: dict) -> dict:
"""Extract 5 samples from a json dictionnary
Args:
full_json (dict): The full json dictionnary
Returns:
dict: A sub sample of the full json dictionnary containing the first 5 samples.
"""
extract_json = {}
count = 5
for key, value in full_json.items():
extract_json[key] = value
count -= 1
if count == 0:
break
return extract_json
def save_json(data: dict, path: Path) -> None:
"""Save a json dictionnary to a file
Args:
data (dict): The json dictionnary to save
path (Path): The path to the file
"""
with open(path, "w") as f:
json.dump(data, f)
def get_new_white(height: int, width: int) -> np.ndarray:
"""Create a new white image
Args:
height (int): The height of the image
width (int): The width of the image
Returns:
np.ndarray: The new white image
"""
img = np.ones((height, width, 3), dtype=np.uint8) * 255
return img
# fmt: off
def generate_vector_output(img_rgb_input: np.ndarray, lines: List[VSegment], lines_colors: Dict[int, np.ndarray]):
"""Generate the vector output using the VSegment list
Args:
img_rgb_input (np.ndarray): Input image with 3 channels
lines (List[VSegment]): The identified lines in the image
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
Tuple[np.ndarray, np.ndarray, Path, dict]: The vector output
"""
def draw_lines(img: np.ndarray, lines: List[VSegment]) -> np.ndarray:
"""Draw the lines as vector on the image
Args:
img (np.ndarray): The image to draw on
lines (List[VSegment]): The lines to draw
Returns:
np.ndarray: The image with the lines drawn on it
"""
for line in lines:
cv2.line(img, (line.x0, line.y0), (line.x1, line.y1), lines_colors[line.label].tolist(), 2)
return img
def get_vector_json(lines: List[VSegment]) -> dict:
"""Generate the json dictionnary containing the vector output
Args:
lines (List[VSegment]): The lines to draw
Returns:
dict: The json dictionnary containing the vector output
"""
ret = {}
for line in lines:
ret[str(line.label)] = {"x0": line.x0, "y0": line.y0, "x1": line.x1, "y1": line.y1}
return ret
img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1])
out_vector_over_img = draw_lines(img_rgb_input.copy(), lines)
out_vector_label_img = draw_lines(img_empty, lines)
out_vector_file = Path("vector_output_full.json")
out_vector_file_full = get_vector_json(lines)
save_json(out_vector_file_full, out_vector_file)
out_vector_file_extract = get_json_extract(out_vector_file_full)
return out_vector_over_img, out_vector_label_img, out_vector_file, out_vector_file_extract,
def generate_pixel_output(img_rgb_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]):
"""Generate the pixel output using the LSuperposition list and the img_label
Args:
img_rgb_input (np.ndarray): Input image with 3 channels
img_label (np.ndarray): The labelized image
superpositions (List[LSuperposition]): The identified superpositions in the image
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The pixel output
"""
def draw_pixels(img: np.ndarray, img_label: np.ndarray, lines_colors: Dict[int, np.ndarray]) -> np.ndarray:
"""Draw the pixels as vector on the image
Args:
img (np.ndarray): The image to draw on
img_label (np.ndarray): The labelized image
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
np.ndarray: The image with the pixels drawn on it
"""
for x in range(img.shape[0]):
for y in range(img.shape[1]):
if img_label[x, y] != 0 and img_label[x, y] != 1:
img[x, y, :] = lines_colors[img_label[x, y]]
return img
def draw_superposition(img: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]) -> np.ndarray:
"""Draw the superpositions as vector on the image
Args:
img (np.ndarray): The image to draw on
superpositions (List[LSuperposition]): The superpositions to draw
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
np.ndarray: The image with the superpositions drawn on it
"""
for superposition in superpositions:
img[superposition.y, superposition.x, :] = lines_colors[1]
return img
def get_superposition_json(superpositions: List[LSuperposition]) -> dict:
"""Generate the json dictionnary containing the superposition output
Args:
superpositions (List[LSuperposition]): The superpositions
Returns:
dict: The json dictionnary containing the superposition output
"""
ret = {}
for superposition in superpositions:
key = f"{superposition.x}_{superposition.y}"
if not key in ret:
ret[key] = []
ret[key].append(superposition.label)
return ret
def draw_full(img: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]):
"""Draw the full output (pixels and superpositions) on the image
Args:
img (np.ndarray): The image to draw on
img_label (np.ndarray): The labelized image
superpositions (List[LSuperposition]): The superpositions
lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label
Returns:
np.ndarray: The image with the full output drawn on it
"""
img = draw_pixels(img, img_label, lines_colors)
img = draw_superposition(img, superpositions, lines_colors)
return img
out_pixel_full_over_img = draw_full(img_rgb_input.copy(), img_label, superpositions, lines_colors)
out_pixel_line_over_img = draw_pixels(img_rgb_input.copy(), img_label, lines_colors)
out_pixel_superposition_over_img = draw_superposition(img_rgb_input.copy(), superpositions, lines_colors)
img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1])
out_pixel_full_img = draw_full(img_empty.copy(), img_label, superpositions, lines_colors)
out_pixel_line_img = draw_pixels(img_empty.copy(), img_label, lines_colors)
out_pixel_superposition_img = draw_superposition(img_empty.copy(), superpositions, lines_colors)
out_pixel_file_label = Path("pixel_output_label.npy")
img_label.dump(out_pixel_file_label)
out_pixel_file_superposition = Path("pixel_output_superposition.json")
out_pixel_file_superposition_full = get_superposition_json(superpositions)
save_json(out_pixel_file_superposition_full, out_pixel_file_superposition)
out_pixel_file_superposition_extract = get_json_extract(out_pixel_file_superposition_full)
return out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, out_pixel_file_label, out_pixel_file_superposition, out_pixel_file_superposition_extract
def generate_output(img_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines: List[VSegment]):
"""Generate the output using the LSuperposition list and the img_label
Args:
img_input (np.ndarray): Input image with 1 channel
img_label (np.ndarray): The labelized image
superpositions (List[LSuperposition]): The identified superpositions in the image
lines (List[VSegment]): The identified lines in the image
Returns:
Tuple[np.ndarray, np.ndarray, Path, dict, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The complete output for gradio application
"""
def get_rgb_input_img(greyscale_input_img: np.ndarray) -> np.ndarray:
"""Convert a greyscale image to a rgb image
Args:
greyscale_input_img (np.ndarray): The greyscale / 1 channel image
Returns:
np.ndarray: The 3 channels version of the input image
"""
rgb_input_img: np.ndarray = np.zeros((greyscale_input_img.shape[0], greyscale_input_img.shape[1], 3), dtype=np.uint8)
rgb_input_img[:, :, 0] = greyscale_input_img
rgb_input_img[:, :, 1] = greyscale_input_img
rgb_input_img[:, :, 2] = greyscale_input_img
return rgb_input_img
def generate_line_colors(lines: List[VSegment]) -> Dict[int, np.ndarray]:
"""Generate a color for each line
Args:
lines (List[VSegment]): The lines
Returns:
Dict[int, np.ndarray]: A dictionary containing the color for each line according to their label
"""
np.random.seed(0)
color = np.random.randint(low=0, high=255, size=(len(lines), 3))
ret = {}
ret[0] = np.array([0, 0, 0])
ret[1] = np.array([255, 0, 0])
for i, line in enumerate(lines):
ret[line.label] = color[i, :].astype(np.uint8)
return ret
rgb_input_img: np.ndarray = get_rgb_input_img(img_input)
lines_colors: Dict[int, np.ndarray] = generate_line_colors(lines)
out_vector: Tuple[np.ndarray, np.ndarray, Path, dict]
out_vector = generate_vector_output(rgb_input_img, lines, lines_colors)
out_pixel: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]
out_pixel = generate_pixel_output(rgb_input_img, img_label, superpositions, lines_colors)
return *out_vector, *out_pixel
def app_function(
greyscale_input_img,
min_len,
preprocess,
tracker,
traversal_mode,
extraction_type,
negate_image,
dyn,
size_mask,
double_exponential_alpha,
simple_moving_average_memory,
exponential_moving_average_memory,
one_euro_beta,
one_euro_mincutoff,
one_euro_dcutoff,
bucket_size,
nb_values_to_keep,
discontinuity_relative,
discontinuity_absolute,
minimum_for_fusion,
default_sigma_position,
default_sigma_thickness,
default_sigma_luminosity,
min_nb_values_sigma,
sigma_pos_min,
sigma_thickness_min,
sigma_luminosity_min,
gradient_threshold,
llumi,
blumi,
ratio_lum,
max_thickness,
threshold_intersection,
remove_duplicates):
img_label: np.ndarray
superpositions: List[LSuperposition]
lines: List[VSegment]
def get_enum_value(enum, value):
return enum.__members__[value]
t0 = time.time()
img_label, superpositions, lines = line_detector(
greyscale_input_img, "full",
min_len=int(min_len),
preprocess=get_enum_value(e_segdet_preprocess, preprocess),
tracker=get_enum_value(e_segdet_process_tracking, tracker),
traversal_mode=get_enum_value(e_segdet_process_traversal_mode, traversal_mode),
extraction_type=get_enum_value(e_segdet_process_extraction, extraction_type),
negate_image=bool(negate_image),
dyn=float(dyn),
size_mask=int(size_mask),
double_exponential_alpha=float(double_exponential_alpha),
simple_moving_average_memory=int(simple_moving_average_memory),
exponential_moving_average_memory=int(exponential_moving_average_memory),
one_euro_beta=float(one_euro_beta),
one_euro_mincutoff=float(one_euro_mincutoff),
one_euro_dcutoff=float(one_euro_dcutoff),
bucket_size=int(bucket_size),
nb_values_to_keep=int(nb_values_to_keep),
discontinuity_relative=int(discontinuity_relative),
discontinuity_absolute=int(discontinuity_absolute),
minimum_for_fusion=int(minimum_for_fusion),
default_sigma_position=int(default_sigma_position),
default_sigma_thickness=int(default_sigma_thickness),
default_sigma_luminosity=int(default_sigma_luminosity),
min_nb_values_sigma=int(min_nb_values_sigma),
sigma_pos_min=float(sigma_pos_min),
sigma_thickness_min=float(sigma_thickness_min),
sigma_luminosity_min=float(sigma_luminosity_min),
gradient_threshold=int(gradient_threshold),
llumi=int(llumi),
blumi=int(blumi),
ratio_lum=float(ratio_lum),
max_thickness=int(max_thickness),
threshold_intersection=float(threshold_intersection),
remove_duplicates=bool(remove_duplicates)
)
t1 = time.time()
duration = t1 - t0
outputs = generate_output(greyscale_input_img, img_label, superpositions, lines)
return duration, *outputs
with gr.Blocks() as app:
gr.Markdown("""
# Pylena line detection demonstration
This is a demonstration of the line detector described in the article *Linear Object Detection in Document Images using Multiple Object Tracking*
accepted at ICDAR 2023. The article is available at: https://arxiv.org/abs/2305.16968.
## How to use this demonstration ?
You can either upload your own (greyscale/8bit image) image or use one of the examples, then change the parameters and click on the run button.
The complete documentation is available at: http://olena.pages.lre.epita.fr/pylena/
""")
with gr.Row():
with gr.Column():
gr.Markdown("## Input")
img_input = gr.Image(type="numpy", image_mode="L", label="Greyscale input image")
with gr.Tab("Parameters"):
with gr.Tab("Tracking"):
min_len = gr.Number(label="min_len", value=default_min_len)
tracker = gr.Radio(label="tracker", choices=["KALMAN", "ONE_EURO", "DOUBLE_EXPONENTIAL", "LAST_INTEGRATION", "SIMPLE_MOVING_AVERAGE", "EXPONENTIAL_MOVING_AVERAGE"], value=default_tracker)
traversal_mode = gr.Radio(label="traversal_mode", choices=["HORIZONTAL_VERTICAL", "HORIZONTAL", "VERTICAL"], value=default_traversal_mode)
with gr.Tab("Observation extraction"):
blumi = gr.Number(label="blumi", value=default_blumi)
llumi = gr.Number(label="llumi", value=default_llumi)
max_thickness = gr.Number(label="max_thickness", value=default_max_thickness)
with gr.Tab("Discontinuity"):
discontinuity_relative = gr.Number(label="discontinuity_relative", value=default_discontinuity_relative)
discontinuity_absolute = gr.Number(label="discontinuity_absolute", value=default_discontinuity_absolute)
with gr.Tab("Advanced parameters"):
with gr.Tab("Preprocessing"):
preprocess = gr.Radio(label="preprocess", choices=["NONE", "Black top hat"], value=default_preprocess)
negate_image = gr.Checkbox(label="negate_image", value=default_negate_image)
dyn = gr.Number(label="dyn", value=default_dyn)
size_mask = gr.Number(label="size_mask", value=default_size_mask)
with gr.Tab("Tracker specific parameters"):
double_exponential_alpha = gr.Number(label="double_exponential_alpha", value=default_double_exponential_alpha)
simple_moving_average_memory = gr.Number(label="simple_moving_average_memory", value=default_simple_moving_average_memory)
exponential_moving_average_memory = gr.Number(label="exponential_moving_average_memory", value=default_exponential_moving_average_memory)
one_euro_beta = gr.Number(label="one_euro_beta", value=default_one_euro_beta)
one_euro_mincutoff = gr.Number(label="one_euro_mincutoff", value=default_one_euro_mincutoff)
one_euro_dcutoff = gr.Number(label="one_euro_dcutoff", value=default_one_euro_dcutoff)
with gr.Tab("Tracker parameters"):
nb_values_to_keep = gr.Number(label="nb_values_to_keep", value=default_nb_values_to_keep)
minimum_for_fusion = gr.Number(label="minimum_for_fusion", value=default_minimum_for_fusion)
with gr.Tab("Observation extraction"):
extraction_type = gr.Radio(label="extraction_type", choices=["BINARY", "GRADIENT"], value="BINARY")
gradient_threshold = gr.Number(label="gradient_threshold", value=default_gradient_threshold)
with gr.Tab("Observation matching"):
default_sigma_position = gr.Number(label="default_sigma_position", value=default_default_sigma_position)
default_sigma_thickness = gr.Number(label="default_sigma_thickness", value=default_default_sigma_thickness)
default_sigma_luminosity = gr.Number(label="default_sigma_luminosity", value=default_default_sigma_luminosity)
min_nb_values_sigma = gr.Number(label="min_nb_values_sigma", value=default_min_nb_values_sigma)
sigma_pos_min = gr.Number(label="sigma_pos_min", value=default_sigma_pos_min)
sigma_thickness_min = gr.Number(label="sigma_thickness_min", value=default_sigma_thickness_min)
sigma_luminosity_min = gr.Number(label="sigma_luminosity_min", value=default_sigma_luminosity_min)
with gr.Tab("Extraction"):
ratio_lum = gr.Number(label="ratio_lum", value=default_ratio_lum)
with gr.Tab("Post Processing"):
threshold_intersection = gr.Number(label="threshold_intersection", value=default_threshold_intersection)
remove_duplicates = gr.Checkbox(label="remove_duplicates", value=default_remove_duplicates)
with gr.Tab("Optimisation"):
bucket_size = gr.Number(label="bucket_size", value=default_bucket_size)
with gr.Column():
gr.Markdown("## Output")
out_duration = gr.Number(label="Line detection duration (in seconds)", value=-1, interactive=False)
with gr.Tab("Output Vector"):
with gr.Tab("Over input"):
out_vector_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line only"):
out_vector_label_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("File"):
out_vector_file = gr.File(label="Vector output full", interactive=False)
out_vector_file_extract = gr.Json(label="Vector sample")
with gr.Tab("Output Pixel"):
with gr.Tab("Line and Superposition over input"):
out_pixel_full_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line over input"):
out_pixel_line_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Superposition over input"):
out_pixel_superposition_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line and Superposition"):
out_pixel_full_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Line only"):
out_pixel_line_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
with gr.Tab("Superposition only"):
out_pixel_superposition_img = gr.Image(type="numpy", image_mode="RGB", label="Labelized image")
with gr.Tab("File"):
out_pixel_file_label = gr.File(label="Pixel output full", interactive=False)
out_pixel_file_superposition = gr.File(label="Pixel output full", interactive=False)
out_pixel_file_superposition_extract = gr.Json(label="Superposition sample")
run_button = gr.Button("Run")
run_button.click(
app_function,
inputs=[
img_input,
min_len,
preprocess,
tracker,
traversal_mode,
extraction_type,
negate_image,
dyn,
size_mask,
double_exponential_alpha,
simple_moving_average_memory,
exponential_moving_average_memory,
one_euro_beta,
one_euro_mincutoff,
one_euro_dcutoff,
bucket_size,
nb_values_to_keep,
discontinuity_relative,
discontinuity_absolute,
minimum_for_fusion,
default_sigma_position,
default_sigma_thickness,
default_sigma_luminosity,
min_nb_values_sigma,
sigma_pos_min,
sigma_thickness_min,
sigma_luminosity_min,
gradient_threshold,
llumi,
blumi,
ratio_lum,
max_thickness,
threshold_intersection,
remove_duplicates
],
outputs=[
out_duration,
out_vector_over_img, out_vector_label_img,
out_vector_file, out_vector_file_extract,
out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img,
out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img,
out_pixel_file_label,
out_pixel_file_superposition, out_pixel_file_superposition_extract
])
gr.Markdown("""
## Examples
Be aware that parameters are not reset when you change example.
""")
current_dir = os.path.dirname(__file__)
with gr.Tab("trade_directory"):
gr.Examples(
examples=[[os.path.join(current_dir, "image", "trade_directories.png"), 200, 200, 200]],
inputs=[img_input, blumi, llumi, min_len]
)
with gr.Tab("music_sheet"):
gr.Examples(
examples=[[os.path.join(current_dir, "image", "music_sheet.png"), 30, 5, 20, "HORIZONTAL"]],
inputs=[img_input, discontinuity_relative, max_thickness, min_len, traversal_mode]
)
with gr.Tab("map"):
gr.Examples(
examples=[[os.path.join(current_dir, "image", "map.png"), 4, 180, 180, 20, 6]],
inputs=[img_input, discontinuity_relative, blumi, llumi, min_len, max_thickness]
)
gr.Markdown("""
## A question ?
If you have any question, please contact us at: <philippe.bernet@epita.fr>
""")
# fmt: on
app.launch()