import gradio as gr import easyocr import numpy as np import cv2 from PIL import Image import io import base64 # Initialize EasyOCR reader reader = easyocr.Reader(['en', 'zh-cn']) # English and Chinese def process_image(image): """ Process an image with EasyOCR and return the results Args: image: Image file (numpy array) Returns: Tuple of (annotated image, extracted text, JSON results) """ if image is None: return None, "No image provided", [] # Run EasyOCR results = reader.readtext(image) # Create a copy of the image to draw bounding boxes image_with_boxes = image.copy() # Extract text all_text = [] for idx, (bbox, text, prob) in enumerate(results): # Draw bounding box pts = np.array(bbox, np.int32) pts = pts.reshape((-1, 1, 2)) cv2.polylines(image_with_boxes, [pts], True, (0, 255, 0), 2) # Add text label with confidence label = f"{idx+1}: {text} ({prob:.2f})" cv2.putText(image_with_boxes, label, (int(bbox[0][0]), int(bbox[0][1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # Add to text list all_text.append(f"{text} (Confidence: {prob:.2f})") # Format as JSON for API extracted_data = [ { "text": item[1], "confidence": float(item[2]), "bbox": item[0] } for item in results ] return image_with_boxes, "\n".join(all_text), extracted_data def process_base64_image(base64_image): """ Process a base64-encoded image with EasyOCR Args: base64_image: Base64-encoded image string Returns: JSON results """ try: # Decode base64 image if ',' in base64_image: base64_image = base64_image.split(',')[1] image_bytes = base64.b64decode(base64_image) image = Image.open(io.BytesIO(image_bytes)) # Convert PIL Image to numpy array image_np = np.array(image) # Run EasyOCR results = reader.readtext(image_np) # Format as JSON extracted_data = [ { "text": item[1], "confidence": float(item[2]), "bbox": item[0] } for item in results ] return { "status": "success", "data": extracted_data } except Exception as e: return { "status": "error", "message": str(e) } # Create Gradio interface with gr.Blocks(title="SizeWize OCR API") as demo: gr.Markdown("# SizeWize OCR API") gr.Markdown("This API extracts text from size chart images for the SizeWize Chrome extension.") with gr.Tab("Test Interface"): with gr.Row(): with gr.Column(): input_image = gr.Image(type="numpy", label="Upload Image") process_btn = gr.Button("Extract Text") with gr.Column(): output_image = gr.Image(type="numpy", label="Detected Text") output_text = gr.Textbox(label="Extracted Text") output_json = gr.JSON(label="JSON Output") process_btn.click( fn=process_image, inputs=input_image, outputs=[output_image, output_text, output_json] ) with gr.Tab("API Documentation"): gr.Markdown(""" ## API Usage Send a POST request to this Gradio app with a base64-encoded image. ### Endpoint ``` https://gladiator-byte-ocr.hf.space/api/predict ``` ### Request Format ```json { "data": [ "..." ] } ``` ### Response Format ```json { "data": [ { "status": "success", "data": [ { "text": "Extracted text", "confidence": 0.95, "bbox": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] }, ... ] } ] } ``` ### Example using fetch in JavaScript ```javascript const response = await fetch('https://gladiator-byte-ocr.hf.space/api/predict', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ data: [base64EncodedImage] }) }); const result = await response.json(); ``` """) # Define API endpoint demo.queue() demo.launch() # Add a custom API endpoint for direct base64 image processing @demo.load_from_checkpoint def api_predict(base64_image): return process_base64_image(base64_image)