sotirios-slv's picture
Added a button for download- testing event listener is attached correctly
raw history blame
No virus
1.85 kB
from PIL import Image
import pytesseract
import gradio as gr
import os
from import Sentence
from flair.models import SequenceTagger
from segtok.segmenter import split_single
tagger = SequenceTagger.load("ner-ontonotes")
langs = []
choices = os.popen("tesseract --list-langs").read().split("\n")[1:-1]
blocks = gr.Blocks()
def get_named_entities(ocr_text: str):
sentence = [Sentence(sent, use_tokenizer=True) for sent in split_single(ocr_text)]
entities = []
for token in sentence:
for entity in token.get_spans("ner"):
entity = str(entity)
entities = "\n".join(entities)
print("ENTITIES ", entities)
return entities
def run(image, lang="eng"):
result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
ner = get_named_entities(result)
return result, ner
def download_output(ocr_text: str, named_entities: str):
print("Download output!")
print("OCR text: ", len(ocr_text))
print("Named Entities: ", len(named_entities))
return True
with gr.Blocks() as demo:
gr.Markdown("## Theatre Programmer")
with gr.Row():
with gr.Column():
image_in = gr.Image(type="pil")
lang = gr.Dropdown(choices, value="eng")
btn = gr.Button("Run")
with gr.Column():
ocr_text = gr.TextArea(label="OCR output")
with gr.Column():
ner = gr.TextArea(label="Named entities")
# with gr.Column():
# gr.CheckboxGroup(ner, label="Named entities")
with gr.Row():
download_btn = gr.Button("Download output"), inputs=[image_in, lang], outputs=[ocr_text, ner]), inputs=[ocr_text, ner], outputs=[])