OCR / app.py
Gladiator-byte's picture
Update app.py
826f8aa verified
import gradio as gr
import easyocr
import numpy as np
import cv2
from PIL import Image
import io
import base64
# Initialize EasyOCR reader
reader = easyocr.Reader(['en', 'zh-cn']) # English and Chinese
def process_image(image):
"""
Process an image with EasyOCR and return the results
Args:
image: Image file (numpy array)
Returns:
Tuple of (annotated image, extracted text, JSON results)
"""
if image is None:
return None, "No image provided", []
# Run EasyOCR
results = reader.readtext(image)
# Create a copy of the image to draw bounding boxes
image_with_boxes = image.copy()
# Extract text
all_text = []
for idx, (bbox, text, prob) in enumerate(results):
# Draw bounding box
pts = np.array(bbox, np.int32)
pts = pts.reshape((-1, 1, 2))
cv2.polylines(image_with_boxes, [pts], True, (0, 255, 0), 2)
# Add text label with confidence
label = f"{idx+1}: {text} ({prob:.2f})"
cv2.putText(image_with_boxes, label, (int(bbox[0][0]), int(bbox[0][1])-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Add to text list
all_text.append(f"{text} (Confidence: {prob:.2f})")
# Format as JSON for API
extracted_data = [
{
"text": item[1],
"confidence": float(item[2]),
"bbox": item[0]
}
for item in results
]
return image_with_boxes, "\n".join(all_text), extracted_data
def process_base64_image(base64_image):
"""
Process a base64-encoded image with EasyOCR
Args:
base64_image: Base64-encoded image string
Returns:
JSON results
"""
try:
# Decode base64 image
if ',' in base64_image:
base64_image = base64_image.split(',')[1]
image_bytes = base64.b64decode(base64_image)
image = Image.open(io.BytesIO(image_bytes))
# Convert PIL Image to numpy array
image_np = np.array(image)
# Run EasyOCR
results = reader.readtext(image_np)
# Format as JSON
extracted_data = [
{
"text": item[1],
"confidence": float(item[2]),
"bbox": item[0]
}
for item in results
]
return {
"status": "success",
"data": extracted_data
}
except Exception as e:
return {
"status": "error",
"message": str(e)
}
# Create Gradio interface
with gr.Blocks(title="SizeWize OCR API") as demo:
gr.Markdown("# SizeWize OCR API")
gr.Markdown("This API extracts text from size chart images for the SizeWize Chrome extension.")
with gr.Tab("Test Interface"):
with gr.Row():
with gr.Column():
input_image = gr.Image(type="numpy", label="Upload Image")
process_btn = gr.Button("Extract Text")
with gr.Column():
output_image = gr.Image(type="numpy", label="Detected Text")
output_text = gr.Textbox(label="Extracted Text")
output_json = gr.JSON(label="JSON Output")
process_btn.click(
fn=process_image,
inputs=input_image,
outputs=[output_image, output_text, output_json]
)
with gr.Tab("API Documentation"):
gr.Markdown("""
## API Usage
Send a POST request to this Gradio app with a base64-encoded image.
### Endpoint
```
https://gladiator-byte-ocr.hf.space/api/predict
```
### Request Format
```json
{
"data": [
"..."
]
}
```
### Response Format
```json
{
"data": [
{
"status": "success",
"data": [
{
"text": "Extracted text",
"confidence": 0.95,
"bbox": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
},
...
]
}
]
}
```
### Example using fetch in JavaScript
```javascript
const response = await fetch('https://gladiator-byte-ocr.hf.space/api/predict', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
data: [base64EncodedImage]
})
});
const result = await response.json();
```
""")
# Define API endpoint
demo.queue()
demo.launch()
# Add a custom API endpoint for direct base64 image processing
@demo.load_from_checkpoint
def api_predict(base64_image):
return process_base64_image(base64_image)