ocr_vst / app.py
whoami02's picture
Update app.py
a049953 verified
raw
history blame contribute delete
No virus
4.06 kB
import os
import math
import re
import ast
import gradio as gr
import numpy as np
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from PIL import Image, ImageDraw
img_temp = "tp"
sub_img_temp = "tp1"
def load_model():
return ocr_predictor(
det_arch='linknet_resnet18_rotation',
reco_arch='crnn_vgg16_bn',
detect_orientation=True,
assume_straight_pages=False,
pretrained=True,
pretrained_backbone=True,
export_as_straight_boxes=True,
preserve_aspect_ratio=True,
)
def convert_coordinates(geometry, page_dim, i, j):
len_x = page_dim[1]
len_y = page_dim[0]
(x_min, y_min) = geometry[0]
(x_max, y_max) = geometry[1]
x_min = (math.floor(x_min * len_x)) + i*len_x
x_max = (math.ceil(x_max * len_x)) + i*len_x
y_min = (math.floor(y_min * len_y)) + j*len_y
y_max = (math.ceil(y_max * len_y)) + j*len_y
return [x_min, x_max, y_min, y_max]
def get_coordinates(output, x, y):
page_dim = output['pages'][0]["dimensions"]
raw_data = []
for obj1 in output['pages'][0]["blocks"]:
for obj2 in obj1["lines"]:
for obj3 in obj2["words"]:
converted_coordinates = convert_coordinates(obj3["geometry"],page_dim, x, y)
raw_data.append("{}: {}".format(converted_coordinates,obj3["value"]))
return raw_data
def get_vals(file_path, wh):
model = load_model()
Data, counter = [], 1
for i in range(wh): # split_var is fixed
for j in range(wh):
path = f"{file_path}/{counter}.jpg"
temp_doc = DocumentFile.from_images(path)
output = model(temp_doc).export()
data = get_coordinates(output, i, j)
counter += 1
Data.extend(data)
return Data
def clean_dir(path):
files = os.listdir(path=path)
# return files
for i in range(1,len(files)+1):
os.remove(f"{path}/{i}.jpg")
def html_path(img, counter):
img.save(f"{sub_img_temp}/{counter}.jpg")
return f"<img src='/file={sub_img_temp}/{counter}.jpg'></img>"
def create_box(l): # l represents the bounds of box
return (l[0], l[2], l[1], l[3])
def process(filepath, regex, size=(1656,1170)):
clean_dir(path=img_temp)
clean_dir(path=sub_img_temp)
# return [f1, f2]
img = Image.open(filepath)
(width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
for i in range(0, width, size[0]):
for j in range(0, height, size[1]):
counter += 1
box = (i, j, i+size[0], j+size[1])
img.crop(box).save(f"{img_temp}/{counter}.jpg")
parts.append(img.crop(box))
temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
if regex == 'Regex-1':
pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
else:
pattern = re.compile(r"\d+")
data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
counter, idx = 1, []
for d in data:
dimensions.append(ast.literal_eval(d.split(':')[0]))
im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
values.append(d.split(':')[1])
counter += 1
metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
return df#.to_markdown()
def main():
demo = gr.Interface(
fn=process,
inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
# outputs = "list",
title="OCR",
description="Issue with filesystem...not able to parse all files in the folders",
)
demo.launch(debug=True, show_error=True)
if __name__=="__main__":
main()