Spaces:
Running
Running
Ubuntu
commited on
Commit
•
34b23f6
1
Parent(s):
00a0f2e
updated project description, added example images
Browse files- app.py +11 -38
- examples/cheque_parser/2.jpg +0 -0
- examples/cheque_parser/3.jpg +0 -0
- examples/cheque_parser/5000.jpg +0 -0
- examples/cheque_parser/5805.jpg +0 -0
- examples/cheque_parser/5877.jpg +0 -0
- predict_cheque_parser.py +8 -2
app.py
CHANGED
@@ -5,21 +5,25 @@ from predict_cheque_parser import parse_cheque_with_donut
|
|
5 |
|
6 |
##Create list of examples to be loaded
|
7 |
example_list = glob.glob("examples/cheque_parser/*")
|
8 |
-
faulty_cheques_list = glob.glob("examples/cheque_analyze/*")
|
9 |
example_list = list(map(lambda el:[el], example_list))
|
10 |
-
faulty_cheques_list = list(map(lambda el:[el], faulty_cheques_list))
|
11 |
|
12 |
-
demo = gr.Blocks(
|
13 |
|
14 |
with demo:
|
15 |
|
16 |
gr.Markdown("# **<p align='center'>ChequeEasy: Banking with Transformers </p>**")
|
17 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
with gr.Tabs():
|
20 |
|
21 |
with gr.TabItem("Cheque Parser"):
|
22 |
-
gr.Markdown("
|
|
|
23 |
with gr.Box():
|
24 |
gr.Markdown("**Upload Cheque**")
|
25 |
input_image_parse = gr.Image(type='filepath', label="Input Cheque")
|
@@ -30,12 +34,8 @@ with demo:
|
|
30 |
amt_in_words = gr.Textbox(label="Courtesy Amount")
|
31 |
amt_in_figures = gr.Textbox(label="Legal Amount")
|
32 |
cheque_date = gr.Textbox(label="Cheque Date")
|
33 |
-
|
34 |
-
# micr_code = gr.Textbox(label="MICR code")
|
35 |
-
# cheque_number = gr.Textbox(label="Cheque Number")
|
36 |
-
# account_number = gr.Textbox(label="Account Number")
|
37 |
|
38 |
-
amts_matching = gr.Checkbox(label="Legal & Courtesy Amount Matching"
|
39 |
stale_check = gr.Checkbox(label="Stale Cheque")
|
40 |
|
41 |
with gr.Box():
|
@@ -46,37 +46,10 @@ with demo:
|
|
46 |
with gr.Column():
|
47 |
gr.Examples(example_list, [input_image_parse],
|
48 |
[payee_name,amt_in_words,amt_in_figures,cheque_date],parse_cheque_with_donut,cache_examples=False)
|
49 |
-
# micr_code,cheque_number,account_number,
|
50 |
-
# amts_matching, stale_check]#,cache_examples=True)
|
51 |
-
|
52 |
-
|
53 |
-
with gr.TabItem("Quality Analyzer"):
|
54 |
-
gr.Markdown("The module is used to detect any mistakes made by bank customers while filling out the cheque or while taking a snapshot of the cheque. At present the model is trained to find mistakes like -'object blocking cheque', 'overwriting in cheque'. ")
|
55 |
-
with gr.Box():
|
56 |
-
gr.Markdown("**Upload Cheque**")
|
57 |
-
input_image_detect = gr.Image(type='filepath',label="Input Cheque", show_label=True)
|
58 |
-
|
59 |
-
with gr.Box(): # with gr.Column():
|
60 |
-
gr.Markdown("**Cheque Quality Results:**")
|
61 |
-
output_detections = gr.Image(label="Analyzed Cheque Image", show_label=True)
|
62 |
-
output_text = gr.Textbox()
|
63 |
-
|
64 |
-
with gr.Box():
|
65 |
-
gr.Markdown("**Predict**")
|
66 |
-
with gr.Row():
|
67 |
-
analyze_cheque = gr.Button("Call YOLOS 🤙")
|
68 |
-
|
69 |
-
gr.Markdown("**Examples:**")
|
70 |
-
|
71 |
-
with gr.Column():
|
72 |
-
gr.Examples(faulty_cheques_list, input_image_detect, [output_detections, output_text])#, predict, cache_examples=True)
|
73 |
|
74 |
|
75 |
parse_cheque.click(parse_cheque_with_donut, inputs=input_image_parse, outputs=[payee_name,amt_in_words,amt_in_figures,cheque_date,amts_matching,stale_check])
|
76 |
-
|
77 |
-
# amts_matching, stale_check])
|
78 |
-
# analyze_cheque.click(predict, inputs=input_image_detect, outputs=[output_detections, output_text])
|
79 |
-
|
80 |
gr.Markdown('\n Solution built by: <a href=\"https://www.linkedin.com/in/shivalika-singh/\">Shivalika Singh</a>')
|
81 |
|
82 |
demo.launch()
|
|
|
5 |
|
6 |
##Create list of examples to be loaded
|
7 |
example_list = glob.glob("examples/cheque_parser/*")
|
|
|
8 |
example_list = list(map(lambda el:[el], example_list))
|
|
|
9 |
|
10 |
+
demo = gr.Blocks()
|
11 |
|
12 |
with demo:
|
13 |
|
14 |
gr.Markdown("# **<p align='center'>ChequeEasy: Banking with Transformers </p>**")
|
15 |
+
gr.Markdown("ChequeEasy is a project that aims to simply the process of approval of cheques. Leveraging recent advances in Visual Document Understanding (VDU) domain to extract relevant data from cheques and make the whole process quicker and easier for both bank officials and customers. \
|
16 |
+
This project leverages Donut model proposed in this <a href=\"https://arxiv.org/abs/2111.15664/\">paper </a> for the parsing of the required data from cheques." \
|
17 |
+
"Donut is based on a very simple transformer encoder and decoder architecture. It's main USP is that it is an OCR-free approach to information extraction from documents. \
|
18 |
+
OCR based techniques come with several limitations such as use of additional downstream models, lack of understanding about document structure, use of hand crafted rules,etc. \
|
19 |
+
Donut helps you get rid of all of these OCR specific limitations.")
|
20 |
+
|
21 |
|
22 |
with gr.Tabs():
|
23 |
|
24 |
with gr.TabItem("Cheque Parser"):
|
25 |
+
gr.Markdown("This module is used to extract details filled by a bank customer from cheques. At present the model is trained to extract details like - payee_name, amount_in_words, amount_in_figures. \
|
26 |
+
This model can be further trained to parse additional details like micr_code, cheque_number, account_number, etc")
|
27 |
with gr.Box():
|
28 |
gr.Markdown("**Upload Cheque**")
|
29 |
input_image_parse = gr.Image(type='filepath', label="Input Cheque")
|
|
|
34 |
amt_in_words = gr.Textbox(label="Courtesy Amount")
|
35 |
amt_in_figures = gr.Textbox(label="Legal Amount")
|
36 |
cheque_date = gr.Textbox(label="Cheque Date")
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
amts_matching = gr.Checkbox(label="Legal & Courtesy Amount Matching")
|
39 |
stale_check = gr.Checkbox(label="Stale Cheque")
|
40 |
|
41 |
with gr.Box():
|
|
|
46 |
with gr.Column():
|
47 |
gr.Examples(example_list, [input_image_parse],
|
48 |
[payee_name,amt_in_words,amt_in_figures,cheque_date],parse_cheque_with_donut,cache_examples=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
|
51 |
parse_cheque.click(parse_cheque_with_donut, inputs=input_image_parse, outputs=[payee_name,amt_in_words,amt_in_figures,cheque_date,amts_matching,stale_check])
|
52 |
+
|
|
|
|
|
|
|
53 |
gr.Markdown('\n Solution built by: <a href=\"https://www.linkedin.com/in/shivalika-singh/\">Shivalika Singh</a>')
|
54 |
|
55 |
demo.launch()
|
examples/cheque_parser/2.jpg
ADDED
examples/cheque_parser/3.jpg
ADDED
examples/cheque_parser/5000.jpg
ADDED
examples/cheque_parser/5805.jpg
ADDED
examples/cheque_parser/5877.jpg
ADDED
predict_cheque_parser.py
CHANGED
@@ -13,8 +13,8 @@ TASK_PROMPT = "<s_cord-v2>"
|
|
13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
|
15 |
def load_donut_model_and_processor():
|
16 |
-
donut_processor = DonutProcessor.from_pretrained(CHEQUE_PARSER_MODEL
|
17 |
-
model = VisionEncoderDecoderModel.from_pretrained(CHEQUE_PARSER_MODEL
|
18 |
model.to(device)
|
19 |
return donut_processor, model
|
20 |
|
@@ -69,6 +69,9 @@ def parse_cheque_with_donut(input_image_path):
|
|
69 |
macthing_amts = match_legal_and_courstesy_amount(amt_in_words,amt_in_figures)
|
70 |
|
71 |
payee_name = cheque_details_json['cheque_details'][2]['payee_name']
|
|
|
|
|
|
|
72 |
cheque_date = '06/05/2022'
|
73 |
stale_cheque = check_if_cheque_is_stale(cheque_date)
|
74 |
|
@@ -86,8 +89,11 @@ def spell_correction(amt_in_words):
|
|
86 |
|
87 |
def match_legal_and_courstesy_amount(legal_amount,courtesy_amount):
|
88 |
macthing_amts = False
|
|
|
|
|
89 |
corrected_amt_in_words = spell_correction(legal_amount)
|
90 |
print("corrected_amt_in_words:",corrected_amt_in_words)
|
|
|
91 |
numeric_legal_amt = w2n.word_to_num(corrected_amt_in_words)
|
92 |
print("numeric_legal_amt:",numeric_legal_amt)
|
93 |
if int(numeric_legal_amt) == int(courtesy_amount):
|
|
|
13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
|
15 |
def load_donut_model_and_processor():
|
16 |
+
donut_processor = DonutProcessor.from_pretrained(CHEQUE_PARSER_MODEL)
|
17 |
+
model = VisionEncoderDecoderModel.from_pretrained(CHEQUE_PARSER_MODEL)
|
18 |
model.to(device)
|
19 |
return donut_processor, model
|
20 |
|
|
|
69 |
macthing_amts = match_legal_and_courstesy_amount(amt_in_words,amt_in_figures)
|
70 |
|
71 |
payee_name = cheque_details_json['cheque_details'][2]['payee_name']
|
72 |
+
|
73 |
+
## In the cheques dataset used to train the model -> all the cheques are dated '06/05/22'
|
74 |
+
## Train model to extract cheque date -> to do
|
75 |
cheque_date = '06/05/2022'
|
76 |
stale_cheque = check_if_cheque_is_stale(cheque_date)
|
77 |
|
|
|
89 |
|
90 |
def match_legal_and_courstesy_amount(legal_amount,courtesy_amount):
|
91 |
macthing_amts = False
|
92 |
+
if len(legal_amount) == 0:
|
93 |
+
return macthing_amts
|
94 |
corrected_amt_in_words = spell_correction(legal_amount)
|
95 |
print("corrected_amt_in_words:",corrected_amt_in_words)
|
96 |
+
|
97 |
numeric_legal_amt = w2n.word_to_num(corrected_amt_in_words)
|
98 |
print("numeric_legal_amt:",numeric_legal_amt)
|
99 |
if int(numeric_legal_amt) == int(courtesy_amount):
|