import gradio as gr # import cv2 from transformers import pipeline # from PIL import Image # from craft_text_detector import Craft import os model_ckpt = "razhan/trocr-base-ckb" ocr = pipeline("image-to-text", model=model_ckpt) # craft = Craft( # output_dir=None, # crop_type="poly", # export_extra=False, # text_threshold=0.7, # link_threshold=0.4, # low_text=0.4, # long_size=1280, # cuda=False, # ) # def recoginition(img, prediction_result, ocr): # text = [] # for i, j in enumerate(prediction_result["boxes"]): # roi = img[ # int(prediction_result["boxes"][i][0][1]) : int( # prediction_result["boxes"][i][2][1] # ), # int(prediction_result["boxes"][i][0][0]) : int( # prediction_result["boxes"][i][2][0] # ), # ] # image = Image.fromarray(roi).convert("RGB") # generated_text = ocr(image)[0]["generated_text"] # text.append(generated_text) # return "\n".join(text) # def visualize(img, prediction_result): # for i, j in enumerate(prediction_result["boxes"]): # y1 = int(prediction_result["boxes"][i][0][1]) # y2 = int(prediction_result["boxes"][i][2][1]) # x1 = int(prediction_result["boxes"][i][0][0]) # x2 = int(prediction_result["boxes"][i][2][0]) # cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2) # return Image.fromarray(img) # def multi_line(img): # detection = craft.detect_text(img) # viz = visualize(img, detection) # text = recoginition(img, detection, ocr) # return viz, text def single_line(image): generated_text = ocr(image)[0]["generated_text"] return generated_text txt_output = gr.Textbox() image_output = gr.Image(type="filepath") # mode_input = gr.Radio(["single-line", "multi-line"], label="Mode", info="Wether to use the OCR model alone or with a text detection model (CRAFT)"), article = "

Made with ❤️ by Razhan Hameed

" # examples =[["1.jpg"], ["2.jpg"]] examples = [] # get the path of all the files inside the folder data/examples put them in the format [["1.jpg"], ["2.jpg"]] for file in os.listdir("examples"): examples.append([os.path.join("examples", file)]) with gr.Blocks() as demo: gr.HTML( """

🚀 Kurdish OCR

Demo for Kurdish OCR encoder-decoder vision model on single-text line images.

""" ) with gr.Tab("Signle line"): with gr.Row(): with gr.Column(scale=1): image = gr.Image(type="pil", label="Image") button = gr.Button("Submit") with gr.Column(scale=1): txt_output = gr.Textbox(label="Extracted text") gr.Markdown("## Single Line Examples") gr.Examples( examples=examples, inputs=image, outputs=txt_output, fn=single_line, examples_per_page=20, cache_examples=False, run_on_click=True, ) button.click(single_line, inputs=[image], outputs=[txt_output]) # with gr.Tab("Multi line"): # with gr.Row(): # with gr.Column(scale=1): # image = gr.Image(label="Image") # button = gr.Button("Submit") # with gr.Column(scale=1): # txt_output = gr.Textbox(label="Extracted text") # image_output = gr.Image(type="filepath") # button.click(multi_line, inputs=[image], outputs=[image_output, txt_output]) # at the bottom write its made by Razhan gr.Markdown(article) demo.launch()