File size: 4,797 Bytes
3fb4fb1
 
 
 
 
 
 
dacb5be
3fb4fb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
# import cv2
from transformers import pipeline
# from PIL import Image
# from craft_text_detector import Craft
import os

model_ckpt = "razhan/trocr-base-ckb"
ocr = pipeline("image-to-text", model=model_ckpt)


# craft = Craft(
#     output_dir=None,
#     crop_type="poly",
#     export_extra=False,
#     text_threshold=0.7,
#     link_threshold=0.4,
#     low_text=0.4,
#     long_size=1280,
#     cuda=False,
# )


# def recoginition(img, prediction_result, ocr):
#     text = []
#     for i, j in enumerate(prediction_result["boxes"]):
#         roi = img[
#             int(prediction_result["boxes"][i][0][1]) : int(
#                 prediction_result["boxes"][i][2][1]
#             ),
#             int(prediction_result["boxes"][i][0][0]) : int(
#                 prediction_result["boxes"][i][2][0]
#             ),
#         ]
#         image = Image.fromarray(roi).convert("RGB")
#         generated_text = ocr(image)[0]["generated_text"]
#         text.append(generated_text)
#     return "\n".join(text)


# def visualize(img, prediction_result):
#     for i, j in enumerate(prediction_result["boxes"]):
#         y1 = int(prediction_result["boxes"][i][0][1])
#         y2 = int(prediction_result["boxes"][i][2][1])

#         x1 = int(prediction_result["boxes"][i][0][0])
#         x2 = int(prediction_result["boxes"][i][2][0])

#         cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)

#     return Image.fromarray(img)


# def multi_line(img):
#     detection = craft.detect_text(img)
#     viz = visualize(img, detection)
#     text = recoginition(img, detection, ocr)

#     return viz, text


def single_line(image):
    generated_text = ocr(image)[0]["generated_text"]
    return generated_text


txt_output = gr.Textbox()
image_output = gr.Image(type="filepath")
# mode_input = gr.Radio(["single-line", "multi-line"], label="Mode", info="Wether to use the OCR model alone or with a text detection model (CRAFT)"),


article = "<p style='text-align: center'> Made with ❤️ by <a href='https://razhan.ai'>Razhan Hameed</a></p>"
# examples =[["1.jpg"], ["2.jpg"]]
examples = []

# get the path of all the files inside the folder data/examples put them in the format [["1.jpg"], ["2.jpg"]]
for file in os.listdir("examples"):
    examples.append([os.path.join("examples", file)])


with gr.Blocks() as demo:
    gr.HTML(
        """
    <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
        <h1 style="font-weight: 900; font-size: 3rem; margin: 0rem"> 🚀 Kurdish OCR </h1>

        <p style="font-weight: 450; font-size: 1rem; margin: 0rem"> Demo for Kurdish OCR encoder-decoder vision model on single-text line images.</p>
        <h2 style="text-align: left; font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
            <ul style="list-style-type:disc;">
                <li>The model's original training focuses on recognizing text in single lines. Once you upload the image, use the pen icon to crop the image into a single line format</li>
                <!-- <li>For images containing multiple lines of text, you can utilize the multi-line tab. Please be aware that the CRAFT text detection used in the pipeline may encounter difficulties with Arabic letters, resulting in potential inaccuracies in detecting the boundaries and angles of the text. The OCR model will receive the identified regions, but it might not provide accurate results if certain parts of the letters are excluded in the captured regions. </li> -->
            </ul>
        </h2>
    </div>
    """
    )

    with gr.Tab("Signle line"):
        with gr.Row():
            with gr.Column(scale=1):
                image = gr.Image(type="pil", label="Image")
                button = gr.Button("Submit")

            with gr.Column(scale=1):
                txt_output = gr.Textbox(label="Extracted text")

        gr.Markdown("## Single Line Examples")
        gr.Examples(
            examples=examples,
            inputs=image,
            outputs=txt_output,
            fn=single_line,
            examples_per_page=20,
            cache_examples=False,
            run_on_click=True,
        )
        button.click(single_line, inputs=[image], outputs=[txt_output])

    # with gr.Tab("Multi line"):
    #     with gr.Row():
    #         with gr.Column(scale=1):
    #             image = gr.Image(label="Image")
    #             button = gr.Button("Submit")

    #         with gr.Column(scale=1):
    #             txt_output = gr.Textbox(label="Extracted text")
    #             image_output = gr.Image(type="filepath")

    #     button.click(multi_line, inputs=[image], outputs=[image_output, txt_output])
    # at the bottom write its made by Razhan
    gr.Markdown(article)
    demo.launch()