Spaces:

Gladiator-byte
/

OCR

Paused

OCR

File size: 4,987 Bytes

import gradio as gr
import easyocr
import numpy as np
import cv2
from PIL import Image
import io
import base64

# Initialize EasyOCR reader
reader = easyocr.Reader(['en', 'zh-cn'])  # English and Chinese

def process_image(image):
    """
    Process an image with EasyOCR and return the results

    Args:
        image: Image file (numpy array)

    Returns:
        Tuple of (annotated image, extracted text, JSON results)
    """
    if image is None:
        return None, "No image provided", []

    # Run EasyOCR
    results = reader.readtext(image)

    # Create a copy of the image to draw bounding boxes
    image_with_boxes = image.copy()

    # Extract text
    all_text = []

    for idx, (bbox, text, prob) in enumerate(results):
        # Draw bounding box
        pts = np.array(bbox, np.int32)
        pts = pts.reshape((-1, 1, 2))
        cv2.polylines(image_with_boxes, [pts], True, (0, 255, 0), 2)

        # Add text label with confidence
        label = f"{idx+1}: {text} ({prob:.2f})"
        cv2.putText(image_with_boxes, label, (int(bbox[0][0]), int(bbox[0][1])-10),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Add to text list
        all_text.append(f"{text} (Confidence: {prob:.2f})")

    # Format as JSON for API
    extracted_data = [
        {
            "text": item[1],
            "confidence": float(item[2]),
            "bbox": item[0]
        }
        for item in results
    ]

    return image_with_boxes, "\n".join(all_text), extracted_data

def process_base64_image(base64_image):
    """
    Process a base64-encoded image with EasyOCR

    Args:
        base64_image: Base64-encoded image string

    Returns:
        JSON results
    """
    try:
        # Decode base64 image
        if ',' in base64_image:
            base64_image = base64_image.split(',')[1]

        image_bytes = base64.b64decode(base64_image)
        image = Image.open(io.BytesIO(image_bytes))

        # Convert PIL Image to numpy array
        image_np = np.array(image)

        # Run EasyOCR
        results = reader.readtext(image_np)

        # Format as JSON
        extracted_data = [
            {
                "text": item[1],
                "confidence": float(item[2]),
                "bbox": item[0]
            }
            for item in results
        ]

        return {
            "status": "success",
            "data": extracted_data
        }
    except Exception as e:
        return {
            "status": "error",
            "message": str(e)
        }

# Create Gradio interface
with gr.Blocks(title="SizeWize OCR API") as demo:
    gr.Markdown("# SizeWize OCR API")
    gr.Markdown("This API extracts text from size chart images for the SizeWize Chrome extension.")

    with gr.Tab("Test Interface"):
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="numpy", label="Upload Image")
                process_btn = gr.Button("Extract Text")

            with gr.Column():
                output_image = gr.Image(type="numpy", label="Detected Text")
                output_text = gr.Textbox(label="Extracted Text")
                output_json = gr.JSON(label="JSON Output")

        process_btn.click(
            fn=process_image,
            inputs=input_image,
            outputs=[output_image, output_text, output_json]
        )

    with gr.Tab("API Documentation"):
        gr.Markdown("""
        ## API Usage

        Send a POST request to this Gradio app with a base64-encoded image.

        ### Endpoint

        ```
        https://gladiator-byte-ocr.hf.space/api/predict
        ```

        ### Request Format

        ```json
        {
            "data": [
                "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..."
            ]
        }
        ```

        ### Response Format

        ```json
        {
            "data": [
                {
                    "status": "success",
                    "data": [
                        {
                            "text": "Extracted text",
                            "confidence": 0.95,
                            "bbox": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
                        },
                        ...
                    ]
                }
            ]
        }
        ```

        ### Example using fetch in JavaScript

        ```javascript
        const response = await fetch('https://gladiator-byte-ocr.hf.space/api/predict', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
                data: [base64EncodedImage]
            })
        });
        const result = await response.json();
        ```
        """)

# Define API endpoint
demo.queue()
demo.launch()

# Add a custom API endpoint for direct base64 image processing
@demo.load_from_checkpoint
def api_predict(base64_image):
    return process_base64_image(base64_image)