import os os.environ['USE_TORCH'] = '1' from doctr.io import DocumentFile from doctr.models import ocr_predictor import gradio as gr from PIL import Image import base64 from utils import HocrParser predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn',pretrained=True) title="DocTR OCR (PDL Demo)" description="Upload an image to get the OCR results !" def greet(img): img.save("out.jpg") doc = DocumentFile.from_images("out.jpg") output=predictor(doc) xml_outputs = output.export_as_xml() parser = HocrParser() res="" for obj in output.pages: for obj1 in obj.blocks: for obj2 in obj1.lines: for obj3 in obj2.words: res=res + " " + obj3.value res=res + "\n" res=res + "\n" _output_name = "RESULT_OCR.txt" _output_name_pdf="RESULT_OCR.pdf" open(_output_name, 'w').close() # clear file with open(_output_name, "w", encoding="utf-8", errors="ignore") as f: f.write(res) print("Writing into file") base64_encoded_pdfs = list() for i, (xml, img) in enumerate(zip(xml_outputs, doc)): xml_element_tree = xml[1] parser.export_pdfa(_output_name_pdf, hocr=xml_element_tree, image=img) with open(_output_name_pdf, 'rb') as f: base64_encoded_pdfs.append(base64.b64encode(f.read())) return res, _output_name, _output_name_pdf demo = gr.Interface(fn=greet, inputs=gr.Image(type="pil"), outputs=["text", "file","file"], title=title, description=description, examples=[["Examples/Book.png"],["Examples/News.png"],["Examples/Manuscript.jpg"],["Examples/Files.jpg"]] ) demo.launch(debug=True)