import gradio as gr # import cv2 from transformers import pipeline # from PIL import Image # from craft_text_detector import Craft import os model_ckpt = "razhan/trocr-base-ckb" ocr = pipeline("image-to-text", model=model_ckpt) # craft = Craft( # output_dir=None, # crop_type="poly", # export_extra=False, # text_threshold=0.7, # link_threshold=0.4, # low_text=0.4, # long_size=1280, # cuda=False, # ) # def recoginition(img, prediction_result, ocr): # text = [] # for i, j in enumerate(prediction_result["boxes"]): # roi = img[ # int(prediction_result["boxes"][i][0][1]) : int( # prediction_result["boxes"][i][2][1] # ), # int(prediction_result["boxes"][i][0][0]) : int( # prediction_result["boxes"][i][2][0] # ), # ] # image = Image.fromarray(roi).convert("RGB") # generated_text = ocr(image)[0]["generated_text"] # text.append(generated_text) # return "\n".join(text) # def visualize(img, prediction_result): # for i, j in enumerate(prediction_result["boxes"]): # y1 = int(prediction_result["boxes"][i][0][1]) # y2 = int(prediction_result["boxes"][i][2][1]) # x1 = int(prediction_result["boxes"][i][0][0]) # x2 = int(prediction_result["boxes"][i][2][0]) # cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2) # return Image.fromarray(img) # def multi_line(img): # detection = craft.detect_text(img) # viz = visualize(img, detection) # text = recoginition(img, detection, ocr) # return viz, text def single_line(image): generated_text = ocr(image)[0]["generated_text"] return generated_text txt_output = gr.Textbox() image_output = gr.Image(type="filepath") # mode_input = gr.Radio(["single-line", "multi-line"], label="Mode", info="Wether to use the OCR model alone or with a text detection model (CRAFT)"), article = "
Made with ❤️ by Razhan Hameed
" # examples =[["1.jpg"], ["2.jpg"]] examples = [] # get the path of all the files inside the folder data/examples put them in the format [["1.jpg"], ["2.jpg"]] for file in os.listdir("examples"): examples.append([os.path.join("examples", file)]) with gr.Blocks() as demo: gr.HTML( """Demo for Kurdish OCR encoder-decoder vision model on single-text line images.