Spaces:

Gladiator-byte
/

OCR

Paused

App Files Files Community

OCR / app.py

Gladiator-byte

Update app.py

826f8aa verified 2 months ago

raw

history blame contribute delete

4.99 kB

	import gradio as gr
	import easyocr
	import numpy as np
	import cv2
	from PIL import Image
	import io
	import base64

	# Initialize EasyOCR reader
	reader = easyocr.Reader(['en', 'zh-cn']) # English and Chinese

	def process_image(image):
	"""
	Process an image with EasyOCR and return the results

	Args:
	image: Image file (numpy array)

	Returns:
	Tuple of (annotated image, extracted text, JSON results)
	"""
	if image is None:
	return None, "No image provided", []

	# Run EasyOCR
	results = reader.readtext(image)

	# Create a copy of the image to draw bounding boxes
	image_with_boxes = image.copy()

	# Extract text
	all_text = []

	for idx, (bbox, text, prob) in enumerate(results):
	# Draw bounding box
	pts = np.array(bbox, np.int32)
	pts = pts.reshape((-1, 1, 2))
	cv2.polylines(image_with_boxes, [pts], True, (0, 255, 0), 2)

	# Add text label with confidence
	label = f"{idx+1}: {text} ({prob:.2f})"
	cv2.putText(image_with_boxes, label, (int(bbox[0][0]), int(bbox[0][1])-10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	# Add to text list
	all_text.append(f"{text} (Confidence: {prob:.2f})")

	# Format as JSON for API
	extracted_data = [
	{
	"text": item[1],
	"confidence": float(item[2]),
	"bbox": item[0]
	}
	for item in results
	]

	return image_with_boxes, "\n".join(all_text), extracted_data

	def process_base64_image(base64_image):
	"""
	Process a base64-encoded image with EasyOCR

	Args:
	base64_image: Base64-encoded image string

	Returns:
	JSON results
	"""
	try:
	# Decode base64 image
	if ',' in base64_image:
	base64_image = base64_image.split(',')[1]

	image_bytes = base64.b64decode(base64_image)
	image = Image.open(io.BytesIO(image_bytes))

	# Convert PIL Image to numpy array
	image_np = np.array(image)

	# Run EasyOCR
	results = reader.readtext(image_np)

	# Format as JSON
	extracted_data = [
	{
	"text": item[1],
	"confidence": float(item[2]),
	"bbox": item[0]
	}
	for item in results
	]

	return {
	"status": "success",
	"data": extracted_data
	}
	except Exception as e:
	return {
	"status": "error",
	"message": str(e)
	}

	# Create Gradio interface
	with gr.Blocks(title="SizeWize OCR API") as demo:
	gr.Markdown("# SizeWize OCR API")
	gr.Markdown("This API extracts text from size chart images for the SizeWize Chrome extension.")

	with gr.Tab("Test Interface"):
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(type="numpy", label="Upload Image")
	process_btn = gr.Button("Extract Text")

	with gr.Column():
	output_image = gr.Image(type="numpy", label="Detected Text")
	output_text = gr.Textbox(label="Extracted Text")
	output_json = gr.JSON(label="JSON Output")

	process_btn.click(
	fn=process_image,
	inputs=input_image,
	outputs=[output_image, output_text, output_json]
	)

	with gr.Tab("API Documentation"):
	gr.Markdown("""
	## API Usage

	Send a POST request to this Gradio app with a base64-encoded image.

	### Endpoint

	```
	https://gladiator-byte-ocr.hf.space/api/predict
	```

	### Request Format

	```json
	{
	"data": [
	"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
	]
	}
	```

	### Response Format

	```json
	{
	"data": [
	{
	"status": "success",
	"data": [
	{
	"text": "Extracted text",
	"confidence": 0.95,
	"bbox": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
	},
	...
	]
	}
	]
	}
	```

	### Example using fetch in JavaScript

	```javascript
	const response = await fetch('https://gladiator-byte-ocr.hf.space/api/predict', {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json'
	},
	body: JSON.stringify({
	data: [base64EncodedImage]
	})
	});
	const result = await response.json();
	```
	""")

	# Define API endpoint
	demo.queue()
	demo.launch()

	# Add a custom API endpoint for direct base64 image processing
	@demo.load_from_checkpoint
	def api_predict(base64_image):
	return process_base64_image(base64_image)