Charlie Li
init
8011742
raw
history blame
No virus
1.05 kB
import gradio as gr
import pytesseract
from PIL import Image, ImageDraw
def extract_text_and_boxes(image):
data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
draw = ImageDraw.Draw(image)
boxes_and_words = []
for i in range(len(data['text'])):
if data['text'][i].strip() != '': # Filters out empty text results
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
word = data['text'][i]
boxes_and_words.append({'box': (x, y, w, h), 'word': word})
draw.rectangle([x, y, x + w, y + h], outline='red')
return image, boxes_and_words
iface = gr.Interface(fn=extract_text_and_boxes,
inputs=gr.Image(type='pil'),
outputs=[gr.Image(type='pil', label="Image with Bounding Boxes"),
gr.JSON(label="Extracted Words and Boxes")],
title="Test Tesseract",
description="Test PyTesseract.")
iface.launch()