Spaces:

GiantAnalytics
/

ArabicOCRExtractor

Running

App Files Files Community

ArabicOCRExtractor / main.py

GiantAnalytics

Rename app.py to main.py

a7b5b52 verified about 1 year ago

raw

history blame contribute delete

3.12 kB

	import gradio as gr
	import easyocr
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import os
	import requests
	from pathlib import Path

	# Download and cache the font file
	def get_font():
	font_path = Path("Roboto-Regular.ttf")
	if not font_path.exists():
	font_url = "https://github.com/google/fonts/raw/main/apache/roboto/Roboto-Regular.ttf"
	response = requests.get(font_url)
	font_path.write_bytes(response.content)
	return str(font_path)

	# Initialize EasyOCR Reader for Arabic and English
	reader = easyocr.Reader(['ar', 'en'], gpu=True) # Set gpu=False if no GPU available

	def ocr_extract_text(image):
	if image is None:
	return "No image provided", None

	# Convert to RGB if needed
	if len(image.shape) == 3 and image.shape[2] == 4: # RGBA
	image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)

	# Perform OCR
	results = reader.readtext(image)

	if not results:
	return "No text detected in the image", image

	# Prepare text output and confidence scores
	detected_text = []
	for (_, text, confidence) in results:
	detected_text.append(f"{text} (Confidence: {confidence:.2f})")

	# Create annotated image
	pil_image = Image.fromarray(image)
	draw = ImageDraw.Draw(pil_image)

	# Get font for annotation
	try:
	font = ImageFont.truetype(get_font(), size=20)
	except Exception as e:
	print(f"Error loading font: {e}")
	font = ImageFont.load_default()

	# Draw boxes and text
	for (bbox, text, confidence) in results:
	# Convert points to integers
	top_left = tuple(map(int, bbox[0]))
	bottom_right = tuple(map(int, bbox[2]))

	# Draw rectangle
	draw.rectangle([top_left, bottom_right], outline="red", width=3)

	# Draw text with confidence
	text_with_conf = f"{text} ({confidence:.2f})"
	draw.text(top_left, text_with_conf, fill="blue", font=font)

	# Convert back to numpy array
	annotated_image = np.array(pil_image)

	# Join detected text with proper formatting
	text_output = "\n".join(detected_text)

	return text_output, annotated_image

	# Custom CSS for RTL support and better text display
	css = """
	.output-text {
	direction: rtl;
	text-align: right;
	font-family: Arial, sans-serif;
	white-space: pre-wrap;
	}
	"""

	# Create Gradio interface
	iface = gr.Interface(
	fn=ocr_extract_text,
	inputs=gr.Image(type="numpy", label="Upload Image"),
	outputs=[
	gr.Textbox(
	label="Extracted Text (Arabic & English)",
	elem_classes=["output-text"]
	),
	gr.Image(label="Annotated Image")
	],
	title="Arabic & English OCR Extractor",
	description="Upload an image containing Arabic and/or English text for OCR processing. The system will detect and extract text in both languages.",
	css=css,
	examples=[], # You can add example images here
	cache_examples=True
	)

	# Launch the interface
	if __name__ == "__main__":
	iface.launch(debug=True, share=True)