ScreenCoder

Paused

App Files Files Community

ScreenCoder / screencoder /UIED /run_single.py

Jimmyzheng-10

Update

0246ff9 4 months ago

raw

history blame

5.7 kB

	import cv2
	import os
	import numpy as np
	import multiprocessing
	import argparse
	from os.path import join as pjoin

	def get_args():
	parser = argparse.ArgumentParser(description="Processes a single image for UI element detection.")
	parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
	return parser.parse_args()

	def resize_height_by_longest_edge(img_path, resize_length=800):
	org = cv2.imread(img_path)
	height, width = org.shape[:2]
	if height > width:
	return resize_length
	else:
	return int(resize_length * (height / width))


	def color_tips():
	color_map = {'Text': (0, 0, 255), 'Compo': (0, 255, 0), 'Block': (0, 255, 255), 'Text Content': (255, 0, 255)}
	board = np.zeros((200, 200, 3), dtype=np.uint8)

	board[:50, :, :] = (0, 0, 255)
	board[50:100, :, :] = (0, 255, 0)
	board[100:150, :, :] = (255, 0, 255)
	board[150:200, :, :] = (0, 255, 255)
	cv2.putText(board, 'Text', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.putText(board, 'Non-text Compo', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.putText(board, "Compo's Text Content", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.putText(board, "Block", (10, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.imshow('colors', board)


	if __name__ == '__main__':
	args = get_args()

	# --- Dynamic Path Construction ---
	# Construct paths based on the provided run_id
	base_dir = os.path.dirname(os.path.abspath(__file__))
	run_id = args.run_id

	# The temporary directory for this specific run
	tmp_dir = os.path.join(base_dir, '..', 'data', 'tmp', run_id)

	# Input image path
	input_path_img = os.path.join(tmp_dir, f"{run_id}.png")

	# Output directory for this script's results
	output_root = tmp_dir # All results (ip, ocr, etc.) will go into the run's tmp subdir.

	if not os.path.exists(input_path_img):
	print(f"Error: Input image not found at {input_path_img}")
	exit(1)

	print(f"--- Starting UIED processing for run_id: {run_id} ---")
	print(f"Input image: {input_path_img}")
	print(f"Output root: {output_root}")
	# Set multiprocessing start method to 'spawn' for macOS compatibility.
	# This must be done at the very beginning of the main block.
	try:
	multiprocessing.set_start_method('spawn', force=True)
	except RuntimeError:
	pass # It's OK if it's already set.

	# Disable multiprocessing for PaddleOCR to avoid segmentation fault on macOS
	import os
	os.environ['PADDLE_USE_MULTIPROCESSING'] = '0'

	'''
	ele:min-grad: gradient threshold to produce binary map
	ele:ffl-block: fill-flood threshold
	ele:min-ele-area: minimum area for selected elements
	ele:merge-contained-ele: if True, merge elements contained in others
	text:max-word-inline-gap: words with smaller distance than the gap are counted as a line
	text:max-line-gap: lines with smaller distance than the gap are counted as a paragraph

	Tips:
	1. Larger min-grad produces fine-grained binary-map while prone to over-segment element to small pieces
	2. Smaller min-ele-area leaves tiny elements while prone to produce noises
	3. If not merge-contained-ele, the elements inside others will be recognized, while prone to produce noises
	4. The max-word-inline-gap and max-line-gap should be dependent on the input image size and resolution

	mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':50, 'max-word-inline-gap':6, 'max-line-gap':1}
	web : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4}
	'''
	key_params = {'min-grad':10, 'ffl-block':5, 'min-ele-area':50,
	'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}

	# set input image path
	# input_path_img = 'data/test1.png'
	# output_root = 'data'

	resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
	# color_tips() # This shows a window, which is not suitable for a script.

	is_ip = True
	is_clf = False
	is_ocr = False
	is_merge = False

	if is_ocr:
	import detect_text.text_detection as text
	os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True)
	text.text_detection(input_path_img, output_root, show=True, method='paddle')

	if is_ip:
	import detect_compo.ip_region_proposal as ip
	os.makedirs(pjoin(output_root, 'ip'), exist_ok=True)
	# switch of the classification func
	classifier = None
	if is_clf:
	classifier = {}
	from cnn.CNN import CNN
	# classifier['Image'] = CNN('Image')
	classifier['Elements'] = CNN('Elements')
	# classifier['Noise'] = CNN('Noise')
	ip.compo_detection(input_path_img, output_root, key_params,
	classifier=classifier, resize_by_height=resized_height, show=False)

	if is_merge:
	import detect_merge.merge as merge
	os.makedirs(pjoin(output_root, 'merge'), exist_ok=True)
	name = input_path_img.split('/')[-1][:-4]
	compo_path = pjoin(output_root, 'ip', str(name) + '.json')
	ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
	merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
	is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=False)

	print(f"--- UIED processing complete for run_id: {run_id} ---")