Spaces:

LinhKL2002
/

App

Build error

App Files Files Community

App / demo_code.py

LinhKL2002

Upload folder using huggingface_hub

4dbe5d1 verified 8 months ago

raw

history blame contribute delete

4.55 kB

	import os
	import cv2
	import numpy as np
	from pdf2image import convert_from_path

	from main import RapidOCR
	ocr_engine = RapidOCR()

	dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本'

	from image_enhancement import enhance_image

	def crop_dynamic(image_rgb):
	"""
	Dynamically crop the blank regions (white or black) surrounding the object.

	Parameters:
	image_rgb (numpy.ndarray): Input image in RGB format.

	Returns:
	cropped_rgb (numpy.ndarray): Cropped RGB image.
	bbox (tuple): Bounding box of the cropped region (x, y, w, h).
	"""
	# Convert to grayscale for easier processing
	gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)

	# Find non-blank rows and columns based on pixel intensity
	row_mask = np.any(gray < 240, axis=1) # Detect rows with pixel intensity below the white threshold
	col_mask = np.any(gray < 240, axis=0) # Detect columns with pixel intensity below the white threshold

	# Adjust logic for black regions by combining white and black detection
	row_mask = row_mask \| np.any(gray > 10, axis=1) # Include black regions
	col_mask = col_mask \| np.any(gray > 10, axis=0) # Include black regions

	# Find bounding box indices
	y_min, y_max = np.where(row_mask)[0][[0, -1]]
	x_min, x_max = np.where(col_mask)[0][[0, -1]]

	# Crop the region
	cropped_rgb = image_rgb[y_min:y_max+1, x_min:x_max+1]
	return cropped_rgb, (x_min, y_min, x_max - x_min, y_max - y_min)

	list_pdf = []
	for root, dirs, files in os.walk(dataPath):
	for file in files:
	if file.endswith('.pdf'):
	pdf_f = os.path.join(root, file)
	assert os.path.exists(pdf_f)
	list_pdf.append(pdf_f)
	sorted(list_pdf)

	for idx, pdf_f in enumerate(list_pdf):
	bs_name = os.path.basename(pdf_f)
	bs_name_0 = os.path.splitext(bs_name)[0]


	# images = convert_from_path(pdf_f, dpi=900)
	images = convert_from_path(pdf_f, dpi=500, first_page=1, last_page=3)
	for i, image in enumerate(images):
	#brightness = ImageEnhance.Brightness(image).enhance(1.5)
	#contrast = ImageEnhance.Contrast(brightness).enhance(1.8)
	#sharpness = ImageEnhance.Sharpness(contrast).enhance(2.0)
	#sharpness.save("{i}_"+bs_name)
	img = np.array(image)
	#img = enhance_image(img)
	# img, bbox = crop_dynamic(img)

	parameters = {}
	parameters['local_contrast'] = 1.5 # 1.5x increase in details
	parameters['mid_tones'] = 0.5
	parameters['tonal_width'] = 0.5
	parameters['areas_dark'] = 0.7 # 70% improvement in dark areas
	parameters['areas_bright'] = 0.5 # 50% improvement in bright areas
	parameters['saturation_degree'] = 1.2 # 1.2x increase in color saturation
	parameters['brightness'] = 0.1 # slight increase in brightness
	parameters['preserve_tones'] = True
	parameters['color_correction'] = False
	img = enhance_image(image, parameters, verbose=False)

	print(img.shape)
	enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Save in OpenCV-compatible format
	cv2.imwrite(f'{i + 1}_{bs_name_0}.jpg', enhanced_img_bgr)
	print(bs_name_0, i )
	rotation_attempts = 0 # Track rotation count

	while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
	result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
	detected = False # Flag to check detection status
	if result:
	test_list = [r[1] for r in result]

	for j in range(len(test_list) - 1): # Loop up to the second-to-last row
	count1 = test_list[j].count("<")
	count2 = test_list[j + 1].count("<")
	if count1 > 2 and count2 > 2:
	print(bs_name_0)
	print(f"Consecutive rows with '<' more than 2 times each:")
	print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
	print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
	detected = True
	break

	if detected:
	break # Stop further rotation since rows are detected

	# Rotate the image by 90 degrees
	img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
	rotation_attempts += 1