enandhag
commited on
Commit
•
4f8f6ef
1
Parent(s):
f912b4b
pushed gradio app
Browse files- Dockerfile +74 -0
- README.md +11 -12
- app.py +65 -0
- data/1012.jpg +0 -0
- data/103.jpg +0 -0
- data/1031.jpg +0 -0
- data/1038.jpg +0 -0
- data/1046.jpg +0 -0
- docker_build.sh +5 -0
- poetry.lock +0 -0
- pyproject.toml +20 -0
- scripts/__pycache__/predict.cpython-38.pyc +0 -0
- scripts/predict.py +60 -0
- utils/__pycache__/donut_utils.cpython-38.pyc +0 -0
- utils/donut_utils.py +31 -0
Dockerfile
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ==================================================================
|
2 |
+
# Base image
|
3 |
+
# ------------------------------------------------------------------
|
4 |
+
FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04
|
5 |
+
|
6 |
+
# ==================================================================
|
7 |
+
# git, text editors, cmake
|
8 |
+
# ------------------------------------------------------------------
|
9 |
+
|
10 |
+
RUN apt-get update -y && \
|
11 |
+
apt-get upgrade -y && \
|
12 |
+
APT_INSTALL="apt-get install -y" && \
|
13 |
+
APT_INSTALL_NIR="apt-get install -y --no-install-recommends" && \
|
14 |
+
PIP_INSTALL="python -m pip --no-cache-dir install" && \
|
15 |
+
GIT_CLONE="git clone" && \
|
16 |
+
DEBIAN_FRONTEND=noninteractive $APT_INSTALL_NIR \
|
17 |
+
apt && \
|
18 |
+
DEBIAN_FRONTEND=noninteractive $APT_INSTALL \
|
19 |
+
git-core \
|
20 |
+
ca-certificates \
|
21 |
+
cmake \
|
22 |
+
wget \
|
23 |
+
vim \
|
24 |
+
nano \
|
25 |
+
unzip \
|
26 |
+
ffmpeg \
|
27 |
+
libsm6 libxext6 libxrender-dev \
|
28 |
+
libgstreamer1.0-0 gstreamer1.0-plugins-base gstreamer1.0-plugins-good gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly gstreamer1.0-libav gstreamer1.0-tools \
|
29 |
+
build-essential && \
|
30 |
+
# ==================================================================
|
31 |
+
# python, pip
|
32 |
+
# ------------------------------------------------------------------
|
33 |
+
# rm -rf /var/lib/apt/lists/* \
|
34 |
+
# /etc/apt/sources.list.d/cuda.list \
|
35 |
+
# /etc/apt/sources.list.d/nvidia-ml.list && \
|
36 |
+
apt-get update -y && \
|
37 |
+
apt-get upgrade -y && \
|
38 |
+
DEBIAN_FRONTEND=noninteractive $APT_INSTALL \
|
39 |
+
software-properties-common && \
|
40 |
+
apt-get update && \
|
41 |
+
DEBIAN_FRONTEND=noninteractive $APT_INSTALL \
|
42 |
+
python3.7 \
|
43 |
+
python3.7-dev \
|
44 |
+
python-tk \
|
45 |
+
python3-tk \
|
46 |
+
python3.7-tk \
|
47 |
+
python3-pip && \
|
48 |
+
ln -s /usr/bin/python3.7 /usr/local/bin/python3 && \
|
49 |
+
ln -s /usr/bin/python3.7 /usr/local/bin/python && \
|
50 |
+
python3.7 -m pip install pip --upgrade
|
51 |
+
|
52 |
+
# ==================================================================
|
53 |
+
# Tools and dependencies
|
54 |
+
# ------------------------------------------------------------------
|
55 |
+
RUN python -m pip install \
|
56 |
+
setuptools==41.0.0 \
|
57 |
+
transformers[sentencepiece] \
|
58 |
+
numpy\
|
59 |
+
h5py \
|
60 |
+
scipy \
|
61 |
+
pandas \
|
62 |
+
matplotlib \
|
63 |
+
datasets \
|
64 |
+
pillow \
|
65 |
+
jupyter \
|
66 |
+
scikit-learn \
|
67 |
+
tqdm \
|
68 |
+
torch \
|
69 |
+
torchvision \
|
70 |
+
pytesseract \
|
71 |
+
pdf2img \
|
72 |
+
img2pdf \
|
73 |
+
jupyterlab \
|
74 |
+
timm
|
README.md
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# Chequeeasy
|
2 |
+
|
3 |
+
ChequeEasy is a project that aims to simplify the process of approval of cheques and making it easier for both bank officials and customers.
|
4 |
+
|
5 |
+
This project leverages Donut model proposed in the paper <a href="https://arxiv.org/abs/2111.15664/"> OCR-free Document Understanding Transformer </a> for the parsing of the required data from cheques.'
|
6 |
+
|
7 |
+
'Donut is based on a very simple transformer encoder and decoder architecture. It\'s main USP is that it is an OCR-free approach to Visual Document Understanding (VDU) and can perform tasks like document classification, information extraction as well as VQA. \
|
8 |
+
|
9 |
+
|
10 |
+
OCR based techniques come with several limitations such as requiring use of additional downstream models, lack of understanding about document structure, requiring use of hand crafted rules for information extraction,etc. \
|
11 |
+
Donut helps you get rid of all of these OCR specific limitations. The model for the project has been trained using a subset of this <a href="https://www.kaggle.com/datasets/medali1992/cheque-images/"> kaggle dataset </a>. The original dataset contains images of cheques of 10 different banks.
|
|
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import glob
|
3 |
+
import gradio as gr
|
4 |
+
from scripts.predict import parse_cheque_with_donut
|
5 |
+
|
6 |
+
##Create list of examples to be loaded
|
7 |
+
example_list = glob.glob("data/*")
|
8 |
+
example_list = list(map(lambda el: [el], example_list))
|
9 |
+
|
10 |
+
demo = gr.Blocks()
|
11 |
+
|
12 |
+
with demo:
|
13 |
+
|
14 |
+
gr.Markdown("# **<p align='center'>ChequeEasy: Banking made easy </p>**")
|
15 |
+
gr.Markdown(
|
16 |
+
'ChequeEasy is a project that aims to simplify the process of approval of cheques and making it easier for both bank officials and customers. \
|
17 |
+
This project leverages Donut model proposed in the paper <a href="https://arxiv.org/abs/2111.15664/"> OCR-free Document Understanding Transformer </a> for the parsing of the required data from cheques.'
|
18 |
+
'Donut is based on a very simple transformer encoder and decoder architecture. It\'s main USP is that it is an OCR-free approach to Visual Document Understanding (VDU) and can perform tasks like document classification, information extraction as well as VQA. \
|
19 |
+
OCR based techniques come with several limitations such as requiring use of additional downstream models, lack of understanding about document structure, requiring use of hand crafted rules for information extraction,etc. \
|
20 |
+
Donut helps you get rid of all of these OCR specific limitations. The model for the project has been trained using a subset of this <a href="https://www.kaggle.com/datasets/medali1992/cheque-images/"> kaggle dataset </a>. The original dataset contains images of cheques of 10 different banks.'
|
21 |
+
)
|
22 |
+
|
23 |
+
with gr.Tabs():
|
24 |
+
|
25 |
+
with gr.TabItem("Cheque Parser"):
|
26 |
+
gr.Markdown(
|
27 |
+
"This module is used to extract details filled by a bank customer from cheques. At present the model is trained to extract details like - Payee Name, Amount in words, Amount in Figures, Bank Name. \
|
28 |
+
This model can be further trained to parse additional details like MICR Code, Cheque Number, Account Number, etc."
|
29 |
+
)
|
30 |
+
with gr.Box():
|
31 |
+
gr.Markdown("**Upload Cheque**")
|
32 |
+
input_image_parse = gr.Image(type="filepath", label="Input Cheque")
|
33 |
+
with gr.Box():
|
34 |
+
gr.Markdown("**Parsed Cheque Data**")
|
35 |
+
|
36 |
+
payee_name = gr.Textbox(label="Payee Name")
|
37 |
+
amt_in_words = gr.Textbox(label="Legal Amount")
|
38 |
+
amt_in_figures = gr.Textbox(label="Courtesy Amount")
|
39 |
+
bank_name = gr.Textbox(label="Bank Name")
|
40 |
+
|
41 |
+
with gr.Box():
|
42 |
+
gr.Markdown("**Predict**")
|
43 |
+
with gr.Row():
|
44 |
+
parse_cheque = gr.Button("Call Donut 🍩")
|
45 |
+
|
46 |
+
with gr.Column():
|
47 |
+
gr.Examples(
|
48 |
+
example_list,
|
49 |
+
[input_image_parse],
|
50 |
+
[payee_name, amt_in_words, amt_in_figures, bank_name],
|
51 |
+
parse_cheque_with_donut,
|
52 |
+
cache_examples=False,
|
53 |
+
)
|
54 |
+
|
55 |
+
parse_cheque.click(
|
56 |
+
parse_cheque_with_donut,
|
57 |
+
inputs=input_image_parse,
|
58 |
+
outputs=[payee_name, amt_in_words, amt_in_figures, bank_name],
|
59 |
+
)
|
60 |
+
|
61 |
+
gr.Markdown(
|
62 |
+
'\n Solution built by: <a href="https://github.com/Nandhagopalan">Nandhagopalan Elangovan</a>'
|
63 |
+
)
|
64 |
+
|
65 |
+
demo.launch()
|
data/1012.jpg
ADDED
data/103.jpg
ADDED
data/1031.jpg
ADDED
data/1038.jpg
ADDED
data/1046.jpg
ADDED
docker_build.sh
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
docker build -t harbor.hpc.ford.com/enandhag/docai:v1 \
|
2 |
+
--build-arg https_proxy=http://internet.ford.com:83/ \
|
3 |
+
--build-arg http_proxy=http://internet.ford.com:83/ \
|
4 |
+
--build-arg no_proxy=.ford.com,localhost,127.0.0.1 \
|
5 |
+
-f Dockerfile .
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "docai"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["enandhag <enandhag@ford.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.8"
|
10 |
+
transformers = {extras = ["sentencepiece"], version = "^4.25.1"}
|
11 |
+
datasets = "^2.8.0"
|
12 |
+
torch = "^1.13.1"
|
13 |
+
gradio = "^3.14.0"
|
14 |
+
numpy = "^1.24.0"
|
15 |
+
jupyter = "^1.0.0"
|
16 |
+
|
17 |
+
|
18 |
+
[build-system]
|
19 |
+
requires = ["poetry-core"]
|
20 |
+
build-backend = "poetry.core.masonry.api"
|
scripts/__pycache__/predict.cpython-38.pyc
ADDED
Binary file (1.49 kB). View file
|
|
scripts/predict.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.donut_utils import (
|
2 |
+
load_donut_model_and_processor,
|
3 |
+
prepare_data_using_processor,
|
4 |
+
load_image,
|
5 |
+
)
|
6 |
+
import re
|
7 |
+
|
8 |
+
CHEQUE_PARSER_MODEL = "Nandhu/DocAI"
|
9 |
+
TASK_PROMPT = "<s>"
|
10 |
+
|
11 |
+
|
12 |
+
def parse_cheque_with_donut(input_image_path):
|
13 |
+
|
14 |
+
image = load_image(input_image_path)
|
15 |
+
|
16 |
+
donut_processor, model = load_donut_model_and_processor(CHEQUE_PARSER_MODEL)
|
17 |
+
|
18 |
+
cheque_image_tensor, input_for_decoder = prepare_data_using_processor(
|
19 |
+
donut_processor, image, TASK_PROMPT
|
20 |
+
)
|
21 |
+
|
22 |
+
outputs = model.generate(
|
23 |
+
cheque_image_tensor,
|
24 |
+
decoder_input_ids=input_for_decoder,
|
25 |
+
max_length=model.decoder.config.max_position_embeddings,
|
26 |
+
early_stopping=True,
|
27 |
+
pad_token_id=donut_processor.tokenizer.pad_token_id,
|
28 |
+
eos_token_id=donut_processor.tokenizer.eos_token_id,
|
29 |
+
use_cache=True,
|
30 |
+
num_beams=1,
|
31 |
+
bad_words_ids=[[donut_processor.tokenizer.unk_token_id]],
|
32 |
+
return_dict_in_generate=True,
|
33 |
+
output_scores=True,
|
34 |
+
)
|
35 |
+
|
36 |
+
decoded_output_sequence = donut_processor.batch_decode(outputs.sequences)[0]
|
37 |
+
|
38 |
+
extracted_cheque_details = decoded_output_sequence.replace(
|
39 |
+
donut_processor.tokenizer.eos_token, ""
|
40 |
+
).replace(donut_processor.tokenizer.pad_token, "")
|
41 |
+
|
42 |
+
## remove task prompt from token sequence
|
43 |
+
cleaned_cheque_details = re.sub(
|
44 |
+
r"<.*?>", "", extracted_cheque_details, count=1
|
45 |
+
).strip()
|
46 |
+
|
47 |
+
## generate ordered json sequence from output token sequence
|
48 |
+
cheque_details_json = donut_processor.token2json(cleaned_cheque_details)
|
49 |
+
print("cheque_details_json:", cheque_details_json)
|
50 |
+
|
51 |
+
## extract required fields from predicted json
|
52 |
+
|
53 |
+
amt_in_words = cheque_details_json["VALUE_LETTERS"]
|
54 |
+
amt_in_figures = cheque_details_json["VALUE_NUMBERS"]
|
55 |
+
|
56 |
+
payee_name = cheque_details_json["USER2NAME"]
|
57 |
+
|
58 |
+
bank_name = cheque_details_json["BANK_NAME"]
|
59 |
+
|
60 |
+
return (payee_name, amt_in_words, amt_in_figures, bank_name)
|
utils/__pycache__/donut_utils.cpython-38.pyc
ADDED
Binary file (1.09 kB). View file
|
|
utils/donut_utils.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
2 |
+
from PIL import Image
|
3 |
+
import torch
|
4 |
+
|
5 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
6 |
+
|
7 |
+
|
8 |
+
def load_image(image_path):
|
9 |
+
image = Image.open(image_path).convert("RGB")
|
10 |
+
return image
|
11 |
+
|
12 |
+
|
13 |
+
def load_donut_model_and_processor(trained_model_repo):
|
14 |
+
donut_processor = DonutProcessor.from_pretrained(trained_model_repo)
|
15 |
+
model = VisionEncoderDecoderModel.from_pretrained(trained_model_repo)
|
16 |
+
model.to(device)
|
17 |
+
return donut_processor, model
|
18 |
+
|
19 |
+
|
20 |
+
def prepare_data_using_processor(donut_processor, image, task_prompt):
|
21 |
+
## Pass image through donut processor's feature extractor and retrieve image tensor
|
22 |
+
pixel_values = donut_processor(image, return_tensors="pt").pixel_values
|
23 |
+
pixel_values = pixel_values.to(device)
|
24 |
+
|
25 |
+
## Pass task prompt for document (cheque) parsing task to donut processor's tokenizer and retrieve the input_ids
|
26 |
+
decoder_input_ids = donut_processor.tokenizer(
|
27 |
+
task_prompt, add_special_tokens=False, return_tensors="pt"
|
28 |
+
)["input_ids"]
|
29 |
+
decoder_input_ids = decoder_input_ids.to(device)
|
30 |
+
|
31 |
+
return pixel_values, decoder_input_ids
|