File size: 4,987 Bytes
826f8aa
336c194
 
 
 
 
 
b828e09
09b29b5
826f8aa
09b29b5
826f8aa
 
 
336c194
826f8aa
 
 
 
 
 
 
 
 
 
 
 
 
 
09b29b5
826f8aa
 
09b29b5
826f8aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b828e09
826f8aa
 
336c194
826f8aa
b828e09
336c194
b828e09
 
336c194
b828e09
 
336c194
826f8aa
b828e09
 
 
 
 
336c194
b828e09
 
 
 
 
 
 
336c194
5f9e409
 
 
 
e2570ef
826f8aa
 
 
 
5f9e409
826f8aa
 
 
 
 
b828e09
826f8aa
 
 
 
b828e09
826f8aa
 
 
 
 
336c194
826f8aa
 
 
 
 
 
 
b828e09
826f8aa
 
b828e09
 
826f8aa
b828e09
 
09b29b5
b828e09
826f8aa
b828e09
09b29b5
b828e09
 
826f8aa
b828e09
826f8aa
 
 
 
 
 
b828e09
826f8aa
 
 
 
 
b828e09
826f8aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b828e09
826f8aa
 
 
e2570ef
826f8aa
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import gradio as gr
import easyocr
import numpy as np
import cv2
from PIL import Image
import io
import base64

# Initialize EasyOCR reader
reader = easyocr.Reader(['en', 'zh-cn'])  # English and Chinese

def process_image(image):
    """
    Process an image with EasyOCR and return the results

    Args:
        image: Image file (numpy array)

    Returns:
        Tuple of (annotated image, extracted text, JSON results)
    """
    if image is None:
        return None, "No image provided", []

    # Run EasyOCR
    results = reader.readtext(image)

    # Create a copy of the image to draw bounding boxes
    image_with_boxes = image.copy()

    # Extract text
    all_text = []

    for idx, (bbox, text, prob) in enumerate(results):
        # Draw bounding box
        pts = np.array(bbox, np.int32)
        pts = pts.reshape((-1, 1, 2))
        cv2.polylines(image_with_boxes, [pts], True, (0, 255, 0), 2)

        # Add text label with confidence
        label = f"{idx+1}: {text} ({prob:.2f})"
        cv2.putText(image_with_boxes, label, (int(bbox[0][0]), int(bbox[0][1])-10),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Add to text list
        all_text.append(f"{text} (Confidence: {prob:.2f})")

    # Format as JSON for API
    extracted_data = [
        {
            "text": item[1],
            "confidence": float(item[2]),
            "bbox": item[0]
        }
        for item in results
    ]

    return image_with_boxes, "\n".join(all_text), extracted_data

def process_base64_image(base64_image):
    """
    Process a base64-encoded image with EasyOCR

    Args:
        base64_image: Base64-encoded image string

    Returns:
        JSON results
    """
    try:
        # Decode base64 image
        if ',' in base64_image:
            base64_image = base64_image.split(',')[1]

        image_bytes = base64.b64decode(base64_image)
        image = Image.open(io.BytesIO(image_bytes))

        # Convert PIL Image to numpy array
        image_np = np.array(image)

        # Run EasyOCR
        results = reader.readtext(image_np)

        # Format as JSON
        extracted_data = [
            {
                "text": item[1],
                "confidence": float(item[2]),
                "bbox": item[0]
            }
            for item in results
        ]

        return {
            "status": "success",
            "data": extracted_data
        }
    except Exception as e:
        return {
            "status": "error",
            "message": str(e)
        }

# Create Gradio interface
with gr.Blocks(title="SizeWize OCR API") as demo:
    gr.Markdown("# SizeWize OCR API")
    gr.Markdown("This API extracts text from size chart images for the SizeWize Chrome extension.")

    with gr.Tab("Test Interface"):
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="numpy", label="Upload Image")
                process_btn = gr.Button("Extract Text")

            with gr.Column():
                output_image = gr.Image(type="numpy", label="Detected Text")
                output_text = gr.Textbox(label="Extracted Text")
                output_json = gr.JSON(label="JSON Output")

        process_btn.click(
            fn=process_image,
            inputs=input_image,
            outputs=[output_image, output_text, output_json]
        )

    with gr.Tab("API Documentation"):
        gr.Markdown("""
        ## API Usage

        Send a POST request to this Gradio app with a base64-encoded image.

        ### Endpoint

        ```
        https://gladiator-byte-ocr.hf.space/api/predict
        ```

        ### Request Format

        ```json
        {
            "data": [
                "..."
            ]
        }
        ```

        ### Response Format

        ```json
        {
            "data": [
                {
                    "status": "success",
                    "data": [
                        {
                            "text": "Extracted text",
                            "confidence": 0.95,
                            "bbox": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
                        },
                        ...
                    ]
                }
            ]
        }
        ```

        ### Example using fetch in JavaScript

        ```javascript
        const response = await fetch('https://gladiator-byte-ocr.hf.space/api/predict', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
                data: [base64EncodedImage]
            })
        });
        const result = await response.json();
        ```
        """)

# Define API endpoint
demo.queue()
demo.launch()

# Add a custom API endpoint for direct base64 image processing
@demo.load_from_checkpoint
def api_predict(base64_image):
    return process_base64_image(base64_image)