Spaces:
Paused
Paused
import gradio as gr | |
import easyocr | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
import io | |
import base64 | |
# Initialize EasyOCR reader | |
reader = easyocr.Reader(['en', 'zh-cn']) # English and Chinese | |
def process_image(image): | |
""" | |
Process an image with EasyOCR and return the results | |
Args: | |
image: Image file (numpy array) | |
Returns: | |
Tuple of (annotated image, extracted text, JSON results) | |
""" | |
if image is None: | |
return None, "No image provided", [] | |
# Run EasyOCR | |
results = reader.readtext(image) | |
# Create a copy of the image to draw bounding boxes | |
image_with_boxes = image.copy() | |
# Extract text | |
all_text = [] | |
for idx, (bbox, text, prob) in enumerate(results): | |
# Draw bounding box | |
pts = np.array(bbox, np.int32) | |
pts = pts.reshape((-1, 1, 2)) | |
cv2.polylines(image_with_boxes, [pts], True, (0, 255, 0), 2) | |
# Add text label with confidence | |
label = f"{idx+1}: {text} ({prob:.2f})" | |
cv2.putText(image_with_boxes, label, (int(bbox[0][0]), int(bbox[0][1])-10), | |
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) | |
# Add to text list | |
all_text.append(f"{text} (Confidence: {prob:.2f})") | |
# Format as JSON for API | |
extracted_data = [ | |
{ | |
"text": item[1], | |
"confidence": float(item[2]), | |
"bbox": item[0] | |
} | |
for item in results | |
] | |
return image_with_boxes, "\n".join(all_text), extracted_data | |
def process_base64_image(base64_image): | |
""" | |
Process a base64-encoded image with EasyOCR | |
Args: | |
base64_image: Base64-encoded image string | |
Returns: | |
JSON results | |
""" | |
try: | |
# Decode base64 image | |
if ',' in base64_image: | |
base64_image = base64_image.split(',')[1] | |
image_bytes = base64.b64decode(base64_image) | |
image = Image.open(io.BytesIO(image_bytes)) | |
# Convert PIL Image to numpy array | |
image_np = np.array(image) | |
# Run EasyOCR | |
results = reader.readtext(image_np) | |
# Format as JSON | |
extracted_data = [ | |
{ | |
"text": item[1], | |
"confidence": float(item[2]), | |
"bbox": item[0] | |
} | |
for item in results | |
] | |
return { | |
"status": "success", | |
"data": extracted_data | |
} | |
except Exception as e: | |
return { | |
"status": "error", | |
"message": str(e) | |
} | |
# Create Gradio interface | |
with gr.Blocks(title="SizeWize OCR API") as demo: | |
gr.Markdown("# SizeWize OCR API") | |
gr.Markdown("This API extracts text from size chart images for the SizeWize Chrome extension.") | |
with gr.Tab("Test Interface"): | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(type="numpy", label="Upload Image") | |
process_btn = gr.Button("Extract Text") | |
with gr.Column(): | |
output_image = gr.Image(type="numpy", label="Detected Text") | |
output_text = gr.Textbox(label="Extracted Text") | |
output_json = gr.JSON(label="JSON Output") | |
process_btn.click( | |
fn=process_image, | |
inputs=input_image, | |
outputs=[output_image, output_text, output_json] | |
) | |
with gr.Tab("API Documentation"): | |
gr.Markdown(""" | |
## API Usage | |
Send a POST request to this Gradio app with a base64-encoded image. | |
### Endpoint | |
``` | |
https://gladiator-byte-ocr.hf.space/api/predict | |
``` | |
### Request Format | |
```json | |
{ | |
"data": [ | |
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA..." | |
] | |
} | |
``` | |
### Response Format | |
```json | |
{ | |
"data": [ | |
{ | |
"status": "success", | |
"data": [ | |
{ | |
"text": "Extracted text", | |
"confidence": 0.95, | |
"bbox": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] | |
}, | |
... | |
] | |
} | |
] | |
} | |
``` | |
### Example using fetch in JavaScript | |
```javascript | |
const response = await fetch('https://gladiator-byte-ocr.hf.space/api/predict', { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
body: JSON.stringify({ | |
data: [base64EncodedImage] | |
}) | |
}); | |
const result = await response.json(); | |
``` | |
""") | |
# Define API endpoint | |
demo.queue() | |
demo.launch() | |
# Add a custom API endpoint for direct base64 image processing | |
def api_predict(base64_image): | |
return process_base64_image(base64_image) |