Onur Savas commited on
Commit
ceeb3e5
1 Parent(s): f99a712
Files changed (3) hide show
  1. Dockerfile +32 -0
  2. main.py +54 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM ubuntu:22.04
5
+
6
+ ARG DEBIAN_FRONTEND=noninteractive
7
+
8
+ RUN useradd -m -u 1000 user
9
+
10
+ RUN apt-get update && apt-get install -y \
11
+ git \
12
+ curl \
13
+ software-properties-common \
14
+ python3.10 \
15
+ python3.10-dev \
16
+ && rm -rf /var/lib/apt/lists/* \
17
+ && apt-get remove -y --purge python3-blinker
18
+
19
+ RUN apt-get update && apt-get install -y python3-opencv
20
+
21
+ WORKDIR /code
22
+
23
+ COPY --chown=user ./requirements.txt /code/requirements.txt
24
+
25
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \
26
+ && python3.10 -m pip install --no-cache-dir -r /code/requirements.txt
27
+
28
+ COPY --chown=user . .
29
+
30
+ USER user
31
+
32
+ CMD ["python3.10", "main.py"]
main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from paddleocr import PaddleOCR, draw_ocr
3
+ import json
4
+ import os
5
+ import cv2
6
+ import numpy as np
7
+ from PIL import Image
8
+
9
+ ocr_en = PaddleOCR(use_angle_cls=True, lang="en")
10
+ ocr_ch = PaddleOCR(use_angle_cls=True, lang='ch')
11
+ ocr_ru = PaddleOCR(use_angle_cls=True, lang='cyrillic')
12
+ ocr_ar = PaddleOCR(use_angle_cls=True, lang='arabic')
13
+ #ocr_ch = PaddleOCR(det_model_dir="models/det/ch/ch_PP-OCRv4_det_infer", rec_model_dir="models/rec/ch/ch_PP-OCRv4_rec_infer", cls_model_dir="models/cls/ch_ppocr_mobile_v2.0_cls_infer", rec_char_dict_path="models/dict/ppocr_keys_v1.txt", lang="ch")
14
+
15
+
16
+ def perform_ocr(img):
17
+
18
+ lang = "Russian"
19
+ if lang == "English":
20
+ ocr = ocr_en
21
+ elif lang == "Chinese (Simplified)":
22
+ ocr = ocr_ch
23
+ elif lang == "Russian" or lang == "Ukrainian":
24
+ ocr = ocr_ru
25
+ elif lang == "Arabic" or lang == "Persian":
26
+ ocr = ocr_ar
27
+
28
+ result = ocr.ocr(img, cls=True)
29
+ final_result = ""
30
+
31
+ image = Image.open(img).convert('RGB')
32
+ boxes = [line[0] for line in result]
33
+ txts = [line[1][0] for line in result]
34
+ scores = [line[1][1] for line in result]
35
+ im_show = draw_ocr(image, boxes, txts, scores, font_path='fonts/simfang.ttf')
36
+ im_show = Image.fromarray(im_show)
37
+ return [im_show, final_result]
38
+
39
+ demo = gr.Blocks()
40
+
41
+ with demo:
42
+ gr.Markdown("# Multilingual OCR")
43
+ with gr.Row():
44
+ with gr.Column():
45
+ input_image = gr.Image(source="upload", type="filepath")
46
+ input_radio = gr.Radio(["English", "Chinese (Simplified)", "Russian", "Ukrainian", "Arabic", "Persian"], label="Languages"),
47
+ input_button = gr.Button("Run!")
48
+ with gr.Column():
49
+ output_image = gr.Image()
50
+ output_text = gr.Textbox(label="Results")
51
+
52
+ input_button.click(fn=perform_ocr, inputs=[input_image], outputs=[output_image, output_text])
53
+
54
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ gradio
4
+ paddleocr==2.7.0.2
5
+ paddlepaddle==2.4.2