from typing import List import pytesseract from PIL import Image import gradio as gr def tesseract_ocr(filepath: str, languages: List[str]): image = Image.open(filepath) return pytesseract.image_to_string(image=image, lang=', '.join(languages)) title = "Tesseract OCR" description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine." article = "

Tesseract documentation | Github Repo

" examples = [ ['examples/eurotext.png', ['eng']], ['examples/tesseract_sample.png', ['jpn', 'eng']], ['examples/chi.jpg', ['HanS', 'HanT']] ] language_choices = pytesseract.get_languages() demo = gr.Interface( fn=tesseract_ocr, inputs=[ gr.Image(type="filepath", label="Input"), gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='language') ], outputs='text', title=title, description=description, article=article, examples=examples, ) if __name__ == '__main__': demo.launch() print("Finished running")