from codecs import encode, decode import requests import gradio as gr def infer(im): im.save("converted.png") url = "https://ajax.thehive.ai/api/demo/classify?endpoint=text_recognition" files = { "image": ("converted.png", open("converted.png", "rb"), "image/png"), "model_type": (None, "detection"), "media_type": (None, "photo"), } headers = {"referer": "https://thehive.ai/"} res = requests.post(url, headers=headers, files=files) text = "" blocks = [] for output in res.json()["response"]["output"]: text += output["block_text"] for poly in output["bounding_poly"]: blocks.append( { "text": "".join([c["class"] for c in poly["classes"]]), "rect": poly["dimensions"], } ) text = decode(encode(text, "latin-1", "backslashreplace"), "unicode-escape") return text, blocks iface = gr.Interface( fn=infer, title="OCR_Hive", description="Demo for OCR_Hive. Transcribe and analyze media depicting typed, written, or graphic text", inputs=[gr.Image(type="pil")], outputs=["text", "json"], examples=["202306.jpg"], article='Sample OCR', ).launch()