Spaces:

pantatwiai
/

Newspapers-OCR-Demo

Runtime error

Devesh Pant

1b870f4 about 1 year ago

2.44 kB

	import cv2
	from run_yolo import get_layout_results
	from order_text_blocks import get_ordered_data
	from run_ocr import OCR
	from tqdm import tqdm
	import time


	def driver(img, language_name, st):
	onnx_path = "./best.onnx"
	img_ori = img.copy()
	labels = get_layout_results(img_ori, onnx_path)
	output_dict = get_ordered_data(labels, img)
	st.markdown("<p style='text-align: center; color: red'>Layout Analysis Completed!</p>", unsafe_allow_html=True)
	article_wise_ocr = {}
	h, w = img.shape[:2]

	with st.spinner('Performing OCR...'):
	# Add your spinner message with custom CSS
	for itr, article in tqdm(enumerate(output_dict['Articles'])):
	ocr_dict = {}
	article_key = ""
	for key in article:

	if article[key] == []:
	continue

	if key == 'Articles':
	x1, y1, x2, y2 = int(article[key][0][0]), int(article[key][0][1]), int(article[key][0][2]), int(article[key][0][3])
	article_key = '_'.join([str(x1), str(y1), str(x2), str(y2)])

	if key == 'Headlines' or key == 'Sub-headlines' or key == 'Text Block':
	for coord in article[key]:
	x1, y1, x2, y2 = int(coord[0]), int(coord[1]), int(coord[2]), int(coord[3])
	# check if the coordinates are valid, w.r.t image dimensions, if not then skip
	if x1 < 0 or x2 < 0 or y1 < 0 or y2 < 0 or x1 > w or x2 > w or y1 > h or y2 > h:
	continue

	crop = img[int(coord[1]):int(coord[3]), int(coord[0]):int(coord[2])]
	output_text = OCR(crop, lang=language_name)

	box_key = "_".join([str(int(coord[0])), str(int(coord[1])), str(int(coord[2])), str(int(coord[3]))])
	if key not in ocr_dict:
	ocr_dict[key] = [{box_key: output_text}]
	else:
	ocr_dict[key].append({box_key: output_text})

	article_wise_ocr[article_key] = ocr_dict

	st.markdown("<p style='text-align: center; color: red'>OCR Completed!</p>", unsafe_allow_html=True)
	return output_dict, article_wise_ocr