kurdish-ocr / app.py
razhan's picture
update
3fb4fb1
raw
history blame
4.8 kB
import gradio as gr
# import cv2
from transformers import pipeline
# from PIL import Image
# from craft_text_detector import Craft
import os
model_ckpt = "razhan/trocr-base-ckb"
ocr = pipeline("image-to-text", model=model_ckpt)
# craft = Craft(
# output_dir=None,
# crop_type="poly",
# export_extra=False,
# text_threshold=0.7,
# link_threshold=0.4,
# low_text=0.4,
# long_size=1280,
# cuda=False,
# )
# def recoginition(img, prediction_result, ocr):
# text = []
# for i, j in enumerate(prediction_result["boxes"]):
# roi = img[
# int(prediction_result["boxes"][i][0][1]) : int(
# prediction_result["boxes"][i][2][1]
# ),
# int(prediction_result["boxes"][i][0][0]) : int(
# prediction_result["boxes"][i][2][0]
# ),
# ]
# image = Image.fromarray(roi).convert("RGB")
# generated_text = ocr(image)[0]["generated_text"]
# text.append(generated_text)
# return "\n".join(text)
# def visualize(img, prediction_result):
# for i, j in enumerate(prediction_result["boxes"]):
# y1 = int(prediction_result["boxes"][i][0][1])
# y2 = int(prediction_result["boxes"][i][2][1])
# x1 = int(prediction_result["boxes"][i][0][0])
# x2 = int(prediction_result["boxes"][i][2][0])
# cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
# return Image.fromarray(img)
# def multi_line(img):
# detection = craft.detect_text(img)
# viz = visualize(img, detection)
# text = recoginition(img, detection, ocr)
# return viz, text
def single_line(image):
generated_text = ocr(image)[0]["generated_text"]
return generated_text
txt_output = gr.Textbox()
image_output = gr.Image(type="filepath")
# mode_input = gr.Radio(["single-line", "multi-line"], label="Mode", info="Wether to use the OCR model alone or with a text detection model (CRAFT)"),
article = "<p style='text-align: center'> Made with ❤️ by <a href='https://razhan.ai'>Razhan Hameed</a></p>"
# examples =[["1.jpg"], ["2.jpg"]]
examples = []
# get the path of all the files inside the folder data/examples put them in the format [["1.jpg"], ["2.jpg"]]
for file in os.listdir("examples"):
examples.append([os.path.join("examples", file)])
with gr.Blocks() as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
<h1 style="font-weight: 900; font-size: 3rem; margin: 0rem"> 🚀 Kurdish OCR </h1>
<p style="font-weight: 450; font-size: 1rem; margin: 0rem"> Demo for Kurdish OCR encoder-decoder vision model on single-text line images.</p>
<h2 style="text-align: left; font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
<ul style="list-style-type:disc;">
<li>The model's original training focuses on recognizing text in single lines. Once you upload the image, use the pen icon to crop the image into a single line format</li>
<!-- <li>For images containing multiple lines of text, you can utilize the multi-line tab. Please be aware that the CRAFT text detection used in the pipeline may encounter difficulties with Arabic letters, resulting in potential inaccuracies in detecting the boundaries and angles of the text. The OCR model will receive the identified regions, but it might not provide accurate results if certain parts of the letters are excluded in the captured regions. </li> -->
</ul>
</h2>
</div>
"""
)
with gr.Tab("Signle line"):
with gr.Row():
with gr.Column(scale=1):
image = gr.Image(type="pil", label="Image")
button = gr.Button("Submit")
with gr.Column(scale=1):
txt_output = gr.Textbox(label="Extracted text")
gr.Markdown("## Single Line Examples")
gr.Examples(
examples=examples,
inputs=image,
outputs=txt_output,
fn=single_line,
examples_per_page=20,
cache_examples=False,
run_on_click=True,
)
button.click(single_line, inputs=[image], outputs=[txt_output])
# with gr.Tab("Multi line"):
# with gr.Row():
# with gr.Column(scale=1):
# image = gr.Image(label="Image")
# button = gr.Button("Submit")
# with gr.Column(scale=1):
# txt_output = gr.Textbox(label="Extracted text")
# image_output = gr.Image(type="filepath")
# button.click(multi_line, inputs=[image], outputs=[image_output, txt_output])
# at the bottom write its made by Razhan
gr.Markdown(article)
demo.launch()