Spaces:

Sefray
/

PylenaLineDetector_ICDAR2023

Sleeping

PylenaLineDetector_ICDAR2023 / app.py

1bae6d9 10 months ago

No virus

24.8 kB

	import gradio as gr

	import cv2
	import json
	import os
	import numpy as np
	from pathlib import Path
	from pylena.scribo import line_detector
	from pylena.scribo import VSegment, LSuperposition
	from pylena.scribo import e_segdet_preprocess, e_segdet_process_extraction, e_segdet_process_tracking, e_segdet_process_traversal_mode
	import time

	from typing import List, Tuple, Dict


	# Define all the default values
	default_min_len = 10
	default_preprocess = "NONE"
	default_tracker = "KALMAN"
	default_traversal_mode = "HORIZONTAL_VERTICAL"
	default_extraction_type = "BINARY"
	default_negate_image = False
	default_dyn = 0.6
	default_size_mask = 11
	default_double_exponential_alpha = 0.6
	default_simple_moving_average_memory = 30.0
	default_exponential_moving_average_memory = 16.0
	default_one_euro_beta = 0.007
	default_one_euro_mincutoff = 1.0
	default_one_euro_dcutoff = 1.0
	default_bucket_size = 32
	default_nb_values_to_keep = 30
	default_discontinuity_relative = 0
	default_discontinuity_absolute = 0
	default_minimum_for_fusion = 15
	default_default_sigma_position = 2
	default_default_sigma_thickness = 2
	default_default_sigma_luminosity = 57
	default_min_nb_values_sigma = 10
	default_sigma_pos_min = 1.0
	default_sigma_thickness_min = 0.64
	default_sigma_luminosity_min = 13.0
	default_gradient_threshold = 30
	default_llumi = 225
	default_blumi = 225
	default_ratio_lum = 1.0
	default_max_thickness = 100
	default_threshold_intersection = 0.8
	default_remove_duplicates = True


	def get_json_extract(full_json: dict) -> dict:
	"""Extract 5 samples from a json dictionnary

	Args:
	full_json (dict): The full json dictionnary

	Returns:
	dict: A sub sample of the full json dictionnary containing the first 5 samples.
	"""
	extract_json = {}

	count = 5
	for key, value in full_json.items():
	extract_json[key] = value

	count -= 1
	if count == 0:
	break

	return extract_json


	def save_json(data: dict, path: Path) -> None:
	"""Save a json dictionnary to a file

	Args:
	data (dict): The json dictionnary to save
	path (Path): The path to the file
	"""
	with open(path, "w") as f:
	json.dump(data, f)


	def get_new_white(height: int, width: int) -> np.ndarray:
	"""Create a new white image

	Args:
	height (int): The height of the image
	width (int): The width of the image

	Returns:
	np.ndarray: The new white image
	"""
	img = np.ones((height, width, 3), dtype=np.uint8) * 255
	return img

	# fmt: off

	def generate_vector_output(img_rgb_input: np.ndarray, lines: List[VSegment], lines_colors: Dict[int, np.ndarray]):
	"""Generate the vector output using the VSegment list

	Args:
	img_rgb_input (np.ndarray): Input image with 3 channels
	lines (List[VSegment]): The identified lines in the image
	lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label

	Returns:
	Tuple[np.ndarray, np.ndarray, Path, dict]: The vector output
	"""

	def draw_lines(img: np.ndarray, lines: List[VSegment]) -> np.ndarray:
	"""Draw the lines as vector on the image

	Args:
	img (np.ndarray): The image to draw on
	lines (List[VSegment]): The lines to draw

	Returns:
	np.ndarray: The image with the lines drawn on it
	"""
	for line in lines:
	cv2.line(img, (line.x0, line.y0), (line.x1, line.y1), lines_colors[line.label].tolist(), 2)
	return img

	def get_vector_json(lines: List[VSegment]) -> dict:
	"""Generate the json dictionnary containing the vector output

	Args:
	lines (List[VSegment]): The lines to draw

	Returns:
	dict: The json dictionnary containing the vector output
	"""
	ret = {}
	for line in lines:
	ret[str(line.label)] = {"x0": line.x0, "y0": line.y0, "x1": line.x1, "y1": line.y1}
	return ret

	img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1])

	out_vector_over_img = draw_lines(img_rgb_input.copy(), lines)
	out_vector_label_img = draw_lines(img_empty, lines)

	out_vector_file = Path("vector_output_full.json")
	out_vector_file_full = get_vector_json(lines)
	save_json(out_vector_file_full, out_vector_file)

	out_vector_file_extract = get_json_extract(out_vector_file_full)

	return out_vector_over_img, out_vector_label_img, out_vector_file, out_vector_file_extract,

	def generate_pixel_output(img_rgb_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]):
	"""Generate the pixel output using the LSuperposition list and the img_label

	Args:
	img_rgb_input (np.ndarray): Input image with 3 channels
	img_label (np.ndarray): The labelized image
	superpositions (List[LSuperposition]): The identified superpositions in the image
	lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label

	Returns:
	Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The pixel output
	"""

	def draw_pixels(img: np.ndarray, img_label: np.ndarray, lines_colors: Dict[int, np.ndarray]) -> np.ndarray:
	"""Draw the pixels as vector on the image

	Args:
	img (np.ndarray): The image to draw on
	img_label (np.ndarray): The labelized image
	lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label

	Returns:
	np.ndarray: The image with the pixels drawn on it
	"""
	for x in range(img.shape[0]):
	for y in range(img.shape[1]):
	if img_label[x, y] != 0 and img_label[x, y] != 1:
	img[x, y, :] = lines_colors[img_label[x, y]]
	return img

	def draw_superposition(img: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]) -> np.ndarray:
	"""Draw the superpositions as vector on the image

	Args:
	img (np.ndarray): The image to draw on
	superpositions (List[LSuperposition]): The superpositions to draw
	lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label

	Returns:
	np.ndarray: The image with the superpositions drawn on it
	"""
	for superposition in superpositions:
	img[superposition.y, superposition.x, :] = lines_colors[1]
	return img

	def get_superposition_json(superpositions: List[LSuperposition]) -> dict:
	"""Generate the json dictionnary containing the superposition output

	Args:
	superpositions (List[LSuperposition]): The superpositions

	Returns:
	dict: The json dictionnary containing the superposition output
	"""
	ret = {}
	for superposition in superpositions:
	key = f"{superposition.x}_{superposition.y}"
	if not key in ret:
	ret[key] = []

	ret[key].append(superposition.label)
	return ret

	def draw_full(img: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines_colors: Dict[int, np.ndarray]):
	"""Draw the full output (pixels and superpositions) on the image

	Args:
	img (np.ndarray): The image to draw on
	img_label (np.ndarray): The labelized image
	superpositions (List[LSuperposition]): The superpositions
	lines_colors (Dict[int, np.ndarray]): Dictionary containing the color for each line according to their label

	Returns:
	np.ndarray: The image with the full output drawn on it
	"""
	img = draw_pixels(img, img_label, lines_colors)
	img = draw_superposition(img, superpositions, lines_colors)
	return img

	out_pixel_full_over_img = draw_full(img_rgb_input.copy(), img_label, superpositions, lines_colors)
	out_pixel_line_over_img = draw_pixels(img_rgb_input.copy(), img_label, lines_colors)
	out_pixel_superposition_over_img = draw_superposition(img_rgb_input.copy(), superpositions, lines_colors)

	img_empty = get_new_white(img_rgb_input.shape[0], img_rgb_input.shape[1])
	out_pixel_full_img = draw_full(img_empty.copy(), img_label, superpositions, lines_colors)
	out_pixel_line_img = draw_pixels(img_empty.copy(), img_label, lines_colors)
	out_pixel_superposition_img = draw_superposition(img_empty.copy(), superpositions, lines_colors)

	out_pixel_file_label = Path("pixel_output_label.npy")
	img_label.dump(out_pixel_file_label)
	out_pixel_file_superposition = Path("pixel_output_superposition.json")
	out_pixel_file_superposition_full = get_superposition_json(superpositions)
	save_json(out_pixel_file_superposition_full, out_pixel_file_superposition)
	out_pixel_file_superposition_extract = get_json_extract(out_pixel_file_superposition_full)

	return out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img, out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img, out_pixel_file_label, out_pixel_file_superposition, out_pixel_file_superposition_extract

	def generate_output(img_input: np.ndarray, img_label: np.ndarray, superpositions: List[LSuperposition], lines: List[VSegment]):
	"""Generate the output using the LSuperposition list and the img_label

	Args:
	img_input (np.ndarray): Input image with 1 channel
	img_label (np.ndarray): The labelized image
	superpositions (List[LSuperposition]): The identified superpositions in the image
	lines (List[VSegment]): The identified lines in the image

	Returns:
	Tuple[np.ndarray, np.ndarray, Path, dict, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]: The complete output for gradio application
	"""
	def get_rgb_input_img(greyscale_input_img: np.ndarray) -> np.ndarray:
	"""Convert a greyscale image to a rgb image

	Args:
	greyscale_input_img (np.ndarray): The greyscale / 1 channel image

	Returns:
	np.ndarray: The 3 channels version of the input image
	"""
	rgb_input_img: np.ndarray = np.zeros((greyscale_input_img.shape[0], greyscale_input_img.shape[1], 3), dtype=np.uint8)
	rgb_input_img[:, :, 0] = greyscale_input_img
	rgb_input_img[:, :, 1] = greyscale_input_img
	rgb_input_img[:, :, 2] = greyscale_input_img

	return rgb_input_img

	def generate_line_colors(lines: List[VSegment]) -> Dict[int, np.ndarray]:
	"""Generate a color for each line

	Args:
	lines (List[VSegment]): The lines

	Returns:
	Dict[int, np.ndarray]: A dictionary containing the color for each line according to their label
	"""
	np.random.seed(0)
	color = np.random.randint(low=0, high=255, size=(len(lines), 3))

	ret = {}
	ret[0] = np.array([0, 0, 0])
	ret[1] = np.array([255, 0, 0])
	for i, line in enumerate(lines):
	ret[line.label] = color[i, :].astype(np.uint8)
	return ret

	rgb_input_img: np.ndarray = get_rgb_input_img(img_input)
	lines_colors: Dict[int, np.ndarray] = generate_line_colors(lines)

	out_vector: Tuple[np.ndarray, np.ndarray, Path, dict]
	out_vector = generate_vector_output(rgb_input_img, lines, lines_colors)

	out_pixel: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Path, Path, dict]
	out_pixel = generate_pixel_output(rgb_input_img, img_label, superpositions, lines_colors)

	return out_vector, out_pixel

	def app_function(
	greyscale_input_img,
	min_len,
	preprocess,
	tracker,
	traversal_mode,
	extraction_type,
	negate_image,
	dyn,
	size_mask,
	double_exponential_alpha,
	simple_moving_average_memory,
	exponential_moving_average_memory,
	one_euro_beta,
	one_euro_mincutoff,
	one_euro_dcutoff,
	bucket_size,
	nb_values_to_keep,
	discontinuity_relative,
	discontinuity_absolute,
	minimum_for_fusion,
	default_sigma_position,
	default_sigma_thickness,
	default_sigma_luminosity,
	min_nb_values_sigma,
	sigma_pos_min,
	sigma_thickness_min,
	sigma_luminosity_min,
	gradient_threshold,
	llumi,
	blumi,
	ratio_lum,
	max_thickness,
	threshold_intersection,
	remove_duplicates):

	img_label: np.ndarray
	superpositions: List[LSuperposition]
	lines: List[VSegment]

	def get_enum_value(enum, value):
	return enum.__members__[value]

	t0 = time.time()
	img_label, superpositions, lines = line_detector(
	greyscale_input_img, "full",
	min_len=int(min_len),
	preprocess=get_enum_value(e_segdet_preprocess, preprocess),
	tracker=get_enum_value(e_segdet_process_tracking, tracker),
	traversal_mode=get_enum_value(e_segdet_process_traversal_mode, traversal_mode),
	extraction_type=get_enum_value(e_segdet_process_extraction, extraction_type),
	negate_image=bool(negate_image),
	dyn=float(dyn),
	size_mask=int(size_mask),
	double_exponential_alpha=float(double_exponential_alpha),
	simple_moving_average_memory=int(simple_moving_average_memory),
	exponential_moving_average_memory=int(exponential_moving_average_memory),
	one_euro_beta=float(one_euro_beta),
	one_euro_mincutoff=float(one_euro_mincutoff),
	one_euro_dcutoff=float(one_euro_dcutoff),
	bucket_size=int(bucket_size),
	nb_values_to_keep=int(nb_values_to_keep),
	discontinuity_relative=int(discontinuity_relative),
	discontinuity_absolute=int(discontinuity_absolute),
	minimum_for_fusion=int(minimum_for_fusion),
	default_sigma_position=int(default_sigma_position),
	default_sigma_thickness=int(default_sigma_thickness),
	default_sigma_luminosity=int(default_sigma_luminosity),
	min_nb_values_sigma=int(min_nb_values_sigma),
	sigma_pos_min=float(sigma_pos_min),
	sigma_thickness_min=float(sigma_thickness_min),
	sigma_luminosity_min=float(sigma_luminosity_min),
	gradient_threshold=int(gradient_threshold),
	llumi=int(llumi),
	blumi=int(blumi),
	ratio_lum=float(ratio_lum),
	max_thickness=int(max_thickness),
	threshold_intersection=float(threshold_intersection),
	remove_duplicates=bool(remove_duplicates)
	)
	t1 = time.time()

	duration = t1 - t0

	outputs = generate_output(greyscale_input_img, img_label, superpositions, lines)

	return duration, *outputs



	with gr.Blocks() as app:
	gr.Markdown("""
	# Pylena line detection demonstration

	This is a demonstration of the line detector described in the article Linear Object Detection in Document Images using Multiple Object Tracking
	accepted at ICDAR 2023. The article is available at: https://arxiv.org/abs/2305.16968.

	## How to use this demonstration ?

	You can either upload your own (greyscale/8bit image) image or use one of the examples, then change the parameters and click on the run button.

	The complete documentation is available at: http://olena.pages.lre.epita.fr/pylena/
	""")


	with gr.Row():
	with gr.Column():
	gr.Markdown("## Input")

	img_input = gr.Image(type="numpy", image_mode="L", label="Greyscale input image")

	with gr.Tab("Parameters"):
	with gr.Tab("Tracking"):
	min_len = gr.Number(label="min_len", value=default_min_len)
	tracker = gr.Radio(label="tracker", choices=["KALMAN", "ONE_EURO", "DOUBLE_EXPONENTIAL", "LAST_INTEGRATION", "SIMPLE_MOVING_AVERAGE", "EXPONENTIAL_MOVING_AVERAGE"], value=default_tracker)
	traversal_mode = gr.Radio(label="traversal_mode", choices=["HORIZONTAL_VERTICAL", "HORIZONTAL", "VERTICAL"], value=default_traversal_mode)

	with gr.Tab("Observation extraction"):
	blumi = gr.Number(label="blumi", value=default_blumi)
	llumi = gr.Number(label="llumi", value=default_llumi)
	max_thickness = gr.Number(label="max_thickness", value=default_max_thickness)

	with gr.Tab("Discontinuity"):
	discontinuity_relative = gr.Number(label="discontinuity_relative", value=default_discontinuity_relative)
	discontinuity_absolute = gr.Number(label="discontinuity_absolute", value=default_discontinuity_absolute)

	with gr.Tab("Advanced parameters"):
	with gr.Tab("Preprocessing"):
	preprocess = gr.Radio(label="preprocess", choices=["NONE", "Black top hat"], value=default_preprocess)
	negate_image = gr.Checkbox(label="negate_image", value=default_negate_image)
	dyn = gr.Number(label="dyn", value=default_dyn)
	size_mask = gr.Number(label="size_mask", value=default_size_mask)

	with gr.Tab("Tracker specific parameters"):
	double_exponential_alpha = gr.Number(label="double_exponential_alpha", value=default_double_exponential_alpha)
	simple_moving_average_memory = gr.Number(label="simple_moving_average_memory", value=default_simple_moving_average_memory)
	exponential_moving_average_memory = gr.Number(label="exponential_moving_average_memory", value=default_exponential_moving_average_memory)
	one_euro_beta = gr.Number(label="one_euro_beta", value=default_one_euro_beta)
	one_euro_mincutoff = gr.Number(label="one_euro_mincutoff", value=default_one_euro_mincutoff)
	one_euro_dcutoff = gr.Number(label="one_euro_dcutoff", value=default_one_euro_dcutoff)

	with gr.Tab("Tracker parameters"):
	nb_values_to_keep = gr.Number(label="nb_values_to_keep", value=default_nb_values_to_keep)
	minimum_for_fusion = gr.Number(label="minimum_for_fusion", value=default_minimum_for_fusion)

	with gr.Tab("Observation extraction"):
	extraction_type = gr.Radio(label="extraction_type", choices=["BINARY", "GRADIENT"], value="BINARY")
	gradient_threshold = gr.Number(label="gradient_threshold", value=default_gradient_threshold)

	with gr.Tab("Observation matching"):
	default_sigma_position = gr.Number(label="default_sigma_position", value=default_default_sigma_position)
	default_sigma_thickness = gr.Number(label="default_sigma_thickness", value=default_default_sigma_thickness)
	default_sigma_luminosity = gr.Number(label="default_sigma_luminosity", value=default_default_sigma_luminosity)
	min_nb_values_sigma = gr.Number(label="min_nb_values_sigma", value=default_min_nb_values_sigma)
	sigma_pos_min = gr.Number(label="sigma_pos_min", value=default_sigma_pos_min)
	sigma_thickness_min = gr.Number(label="sigma_thickness_min", value=default_sigma_thickness_min)
	sigma_luminosity_min = gr.Number(label="sigma_luminosity_min", value=default_sigma_luminosity_min)

	with gr.Tab("Extraction"):
	ratio_lum = gr.Number(label="ratio_lum", value=default_ratio_lum)

	with gr.Tab("Post Processing"):
	threshold_intersection = gr.Number(label="threshold_intersection", value=default_threshold_intersection)
	remove_duplicates = gr.Checkbox(label="remove_duplicates", value=default_remove_duplicates)

	with gr.Tab("Optimisation"):
	bucket_size = gr.Number(label="bucket_size", value=default_bucket_size)

	with gr.Column():
	gr.Markdown("## Output")

	out_duration = gr.Number(label="Line detection duration (in seconds)", value=-1, interactive=False)

	with gr.Tab("Output Vector"):
	with gr.Tab("Over input"):
	out_vector_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("Line only"):
	out_vector_label_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("File"):
	out_vector_file = gr.File(label="Vector output full", interactive=False)
	out_vector_file_extract = gr.Json(label="Vector sample")

	with gr.Tab("Output Pixel"):
	with gr.Tab("Line and Superposition over input"):
	out_pixel_full_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("Line over input"):
	out_pixel_line_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("Superposition over input"):
	out_pixel_superposition_over_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("Line and Superposition"):
	out_pixel_full_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("Line only"):
	out_pixel_line_img = gr.Image(type="numpy", image_mode="RGB", interactive=False)
	with gr.Tab("Superposition only"):
	out_pixel_superposition_img = gr.Image(type="numpy", image_mode="RGB", label="Labelized image")
	with gr.Tab("File"):
	out_pixel_file_label = gr.File(label="Pixel output full", interactive=False)
	out_pixel_file_superposition = gr.File(label="Pixel output full", interactive=False)
	out_pixel_file_superposition_extract = gr.Json(label="Superposition sample")


	run_button = gr.Button("Run")
	run_button.click(
	app_function,
	inputs=[
	img_input,
	min_len,
	preprocess,
	tracker,
	traversal_mode,
	extraction_type,
	negate_image,
	dyn,
	size_mask,
	double_exponential_alpha,
	simple_moving_average_memory,
	exponential_moving_average_memory,
	one_euro_beta,
	one_euro_mincutoff,
	one_euro_dcutoff,
	bucket_size,
	nb_values_to_keep,
	discontinuity_relative,
	discontinuity_absolute,
	minimum_for_fusion,
	default_sigma_position,
	default_sigma_thickness,
	default_sigma_luminosity,
	min_nb_values_sigma,
	sigma_pos_min,
	sigma_thickness_min,
	sigma_luminosity_min,
	gradient_threshold,
	llumi,
	blumi,
	ratio_lum,
	max_thickness,
	threshold_intersection,
	remove_duplicates
	],
	outputs=[
	out_duration,

	out_vector_over_img, out_vector_label_img,
	out_vector_file, out_vector_file_extract,

	out_pixel_full_over_img, out_pixel_line_over_img, out_pixel_superposition_over_img,
	out_pixel_full_img, out_pixel_line_img, out_pixel_superposition_img,
	out_pixel_file_label,
	out_pixel_file_superposition, out_pixel_file_superposition_extract
	])


	gr.Markdown("""
	## Examples

	Be aware that parameters are not reset when you change example.
	""")

	current_dir = os.path.dirname(__file__)
	with gr.Tab("trade_directory"):
	gr.Examples(
	examples=[[os.path.join(current_dir, "image", "trade_directories.png"), 200, 200, 200]],
	inputs=[img_input, blumi, llumi, min_len]
	)
	with gr.Tab("music_sheet"):
	gr.Examples(

	examples=[[os.path.join(current_dir, "image", "music_sheet.png"), 30, 5, 20, "HORIZONTAL"]],
	inputs=[img_input, discontinuity_relative, max_thickness, min_len, traversal_mode]
	)
	with gr.Tab("map"):
	gr.Examples(
	examples=[[os.path.join(current_dir, "image", "map.png"), 4, 180, 180, 20, 6]],
	inputs=[img_input, discontinuity_relative, blumi, llumi, min_len, max_thickness]
	)

	gr.Markdown("""
	## A question ?

	If you have any question, please contact us at: <philippe.bernet@epita.fr>
	""")

	# fmt: on

	app.launch()