diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..05c297572e2750b86810af60d309f9e6b3f7c50f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,35 +1 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/0.jpg b/0.jpg new file mode 100644 index 0000000000000000000000000000000000000000..13ec1dcdf303fcc177ce98002ddf13eafe3d6c68 Binary files /dev/null and b/0.jpg differ diff --git a/README.md b/README.md index 1a68a1163d059f79b5be39d0b468bcef94418877..0b1472270eed17ef5187c7a6d42451fa044d4ca8 100644 --- a/README.md +++ b/README.md @@ -1,12 +1 @@ ---- -title: Test Ocr -emoji: 🌍 -colorFrom: red -colorTo: purple -sdk: streamlit -sdk_version: 1.36.0 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# ocr_all \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..51fc504734545d84ba53969fc71e15f8f120f60f --- /dev/null +++ b/app.py @@ -0,0 +1,254 @@ +import streamlit as st +from PIL import Image +from tkinter import ttk +import tkinter as tk +from tkinter import filedialog +from PIL import Image, ImageTk +import os +import argparse +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.autograd import Variable +from test import copyStateDict +from PIL import Image +import cv2 +from skimage import io +import numpy as np +import test +import file_utils +import pandas as pd +from craft import CRAFT +from collections import OrderedDict +from PIL import Image +from vietocr.tool.predictor import Predictor +from vietocr.tool.config import Cfg +import os +import tkinter as tk +from tkinter import filedialog +import matplotlib.pyplot as plt +import numpy as np +from pathlib import Path +import cv2 +import glob +import matplotlib.pyplot as plt +import tkinter as tk +from tkinter import filedialog +from PIL import Image, ImageTk +import file_utils +import os +import tkinter.messagebox as messagebox +import imgproc + + +# Tạo yêu cầu đến mô hình +def str2bool(v): + return v.lower() in ("yes", "y", "true", "t", "1") + + + +''' + +''' +# CRAFT +parser = argparse.ArgumentParser(description='CRAFT Text Detection') +parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='pretrained model') +parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold') +parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score') +parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold') +parser.add_argument('--cpu', default=True, type=str2bool, help='Use cpu for inference') +parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference') +parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio') +parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type') +parser.add_argument('--show_time', default=False, action='store_true', help='show processing time') +parser.add_argument('--test_folder', default='data_image', type=str, help='đường dẫn tới ảnh đầu vào') +parser.add_argument('--refine', default=True, action='store_true', help='enable link refiner') +parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str, + help='pretrained refiner model') + +args = parser.parse_args() + + + +######################################################################################### +csv_columns = ['x_top_left', 'y_top_left', 'x_top_right', 'y_top_right', 'x_bot_right', 'y_bot_right', 'x_bot_left', + 'y_bot_left'] +# load net +net = CRAFT() # initialize +print('Đang thực hiện load weight (' + args.trained_model + ')') +''' +nhảy sang file test, đưa vào train model +''' +if args.cpu: + net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu'))) +else: + net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu'))) + +if args.cpu: + net = net.cpu() + net = torch.nn.DataParallel(net) + cudnn.benchmark = False + +net.eval() +# LinkRefiner Đoạn này code không chạy qua nên không cần đọc vì weight đã load ở cái bên trên +# còn refine để mặc định bên trên là False nên sẽ bị bỏ qua +# ------------------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------------------ +refine_net = None +if args.refine: + from refinenet import RefineNet + + refine_net = RefineNet() + print('Đang thực hiện load weight (' + args.refiner_model + ')') + if args.cpu: + refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu'))) + refine_net = refine_net.cpu() + refine_net = torch.nn.DataParallel(refine_net) + else: + refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu'))) + + refine_net.eval() + args.poly = True + + +config = Cfg.load_config_from_name('vgg_transformer') +config['export'] = 'transformerocr_checkpoint.pth' +config['device'] = 'cpu' +config['predictor']['beamsearch'] = False + +detector = Predictor(config) +# ------------------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------------------ + + +# Tạo tiêu đề và phần tải lên hình ảnh +st.title("Trích xuất thông tin từ căn cước công dân") +uploaded_file = st.file_uploader("Tải lên ảnh căn cước công dân", type=["jpg", "jpeg", "png"]) + +if uploaded_file is not None: + # Hiển thị hình ảnh tải lên + image = Image.open(uploaded_file) + image.save("uploaded_image.jpg") + st.image(image, caption='Hình ảnh căn cước', use_column_width=True) + import tempfile + # Lưu trữ tạm thời và lấy đường dẫn + with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: + # Save the image to the temp file + temp_file.write(uploaded_file.read()) + temp_file_path = temp_file.name + + # Hiển thị đường dẫn tạm thời + print(f"Đường dẫn tạm thời của file: {temp_file_path}") + # Nút chạy + if st.button("Run"): + print("ok") + image_path = "uploaded_image.jpg" + k = 1 + crop_folder = "crop_Word" + result_folder = "Results" + image = imgproc.loadImage(image_path) + + bboxes, polys, score_text, det_scores = test.test_net(net, image, args.text_threshold, args.link_threshold, + args.low_text, args.cpu, args.poly, args, refine_net) + bbox_score = {} + + def crop_polygon(image, vertices, box_num1): + # Tạo mặt nạ + mask = np.zeros(image.shape[:2], dtype=np.uint8) + cv2.fillPoly(mask, [np.int32(vertices)], 255) + + # Tìm bounding rect để crop vùng chứa đa giác + rect = cv2.boundingRect(np.int32(vertices)) + + # Crop và lấy hình ảnh con theo bounding rect + cropped = image[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]] + + # Tạo mặt nạ cho vùng đã crop + cropped_mask = mask[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]] + + # Lọc vùng bằng mặt nạ + result = cv2.bitwise_and(cropped, cropped, mask=cropped_mask) + crop_path = os.path.join(crop_folder, f"crop_{box_num1 + 1}.jpg") + cv2.imwrite(crop_path, result) + return result + + if len(bboxes) == 0: + with open(f"data_text//text_{k}.txt", "w", encoding="utf-8") as f: + f.write(" ") + + else: + # for box_num, item in enumerate(bboxes): + # # Crop the bbox from the image + # pts = np.array(item, np.int32).reshape((-1, 1, 2)) + # rect = cv2.boundingRect(pts) + # x, y, w, h = rect + # cropped_img = image[y:y+h, x:x+w].copy() + # crop_path = os.path.join(crop_folder, f"crop_{box_num + 1}.jpg") + # cv2.imwrite(crop_path, cropped_img) + for box_num in range(len(bboxes)): + item = bboxes[box_num] + data = np.array([[int(item[0][0]), int(item[0][1]), int(item[1][0]), int(item[1][1]), int(item[2][0]), + int(item[2][1]), int(item[3][0]), int(item[3][1])]]) + csvdata = pd.DataFrame(data, columns=csv_columns) + csvdata.to_csv(f'data{k}.csv', index=False, mode='a', header=False) + + # save score text + filename, file_ext = os.path.splitext(os.path.basename(image_path)) + mask_file = result_folder + "/res_" + filename + '_mask.jpg' # tạo đường dẫn file bản đồ nhiệt + + cv2.imwrite(mask_file, score_text) # in ra bản đồ nhiệt + # + file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder) + + cropped_images = [] + for i, box in enumerate(bboxes): + cropped = crop_polygon(image, box, i) + cropped_images.append(cropped) + + + print(f"Đã cắt {len(cropped_images)} vùng bounding box.") + path = glob.glob("crop_Word/*.jpg") + cv_img = [str(detector.predict(Image.open(f'crop_Word/crop_' + str(i + 1) + '.jpg'))) for i in + range(len(bboxes))] + print(cv_img) + # from google.generativeai.types import HarmCategory, HarmBlockThreshold + # import google.generativeai as genai + + # genai.configure(api_key="AIzaSyAH4ayK6nL71wxPtuYOCe32OdZVZAANWic") + + # # Khởi tạo mô hình + # model = genai.GenerativeModel(model_name='gemini-1.5-flash') + + # # Thiết lập safe_setting cho các loại harm có sẵn và hợp lệ + # safety_settings = [ + # {"category": HarmCategory.HARM_CATEGORY_HATE_SPEECH, "threshold": HarmBlockThreshold.BLOCK_NONE}, + # {"category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, "threshold": HarmBlockThreshold.BLOCK_NONE}, + # {"category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, "threshold": HarmBlockThreshold.BLOCK_NONE}, + # {"category": HarmCategory.HARM_CATEGORY_HARASSMENT, "threshold": HarmBlockThreshold.BLOCK_NONE} + # ] + # print("ok") + # response = model.generate_content( + # [f"tìm tên, ngày sinh, nơi cư trú, số căn cước, hạn sử dụng trong mảng sau {cv_img}, chỉ trả về tên, ngày sinh, nơi cư trú, số căn cước,hạn sử dụng mà model tìm thấy được, không trả lời thêm gì, ví dụ 'NGUYỄN THANH SANG, 18/05/1981, 223/11 Kv Bỉnh- Dương, Long Hòa, Bình Thủy, Cần Thơ, 092081007131, 18/05/2041 '"], + # safety_settings=safety_settings + # ) + # # print(f"tìm tên, ngày sinh, nơi cư trú, số căn cước, hạn sử dụng trong mảng sau {cv_img}, chỉ trả về tên, ngày sinh, nơi cư trú, số căn cước,hạn sử dụng mà model tìm thấy được, không trả lời thêm gì, ví dụ 'NGUYỄN THANH SANG, 18/05/1981, 223/11 Kv Bỉnh- Dương, Long Hòa, Bình Thủy, Cần Thơ, 092081007131, 18/05/2041 '") + # print(response.text) + + + for box in bboxes: + cv2.polylines(image, [np.int32(box)], isClosed=True, color=(0, 255, 0), thickness=1) + + plt.figure(figsize=(20, 20)) + plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) + plt.title('Detected Text Bounding Boxes') + plt.show() + print(f"đã load xong ảnh {k + 1}") + + # # Hiển thị kết quả + st.subheader("Kết quả trích xuất:") + st.text_area("ALL TEXT",cv_img) + # st.text_area("Tên", response.text.get('name', '')) + # st.text_area("Ngày sinh", response.text.get('dob', '')) + # st.text_area("Nơi cư trú", response.text.get('address', '')) + # st.text_area("Số căn cước", response.text.get('id_number', '')) + # st.text_area("Hạn sử dụng", response.text.get('expiry', '')) diff --git a/basenet/__init__.py b/basenet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/basenet/__pycache__/__init__.cpython-310.pyc b/basenet/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5df119ba37b85cfc7dee5b9ae9a810ea99158b7 Binary files /dev/null and b/basenet/__pycache__/__init__.cpython-310.pyc differ diff --git a/basenet/__pycache__/vgg16_bn.cpython-310.pyc b/basenet/__pycache__/vgg16_bn.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..152bc345c50009ee2000b98caf150d5046c6d3f0 Binary files /dev/null and b/basenet/__pycache__/vgg16_bn.cpython-310.pyc differ diff --git a/basenet/vgg16_bn.py b/basenet/vgg16_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..a0a471234f575742509caa72039bab179e53a9c6 --- /dev/null +++ b/basenet/vgg16_bn.py @@ -0,0 +1,71 @@ +from collections import namedtuple + +import torch +import torch.nn as nn +import torch.nn.init as init +from torchvision import models + +def init_weights(modules): + for m in modules: + if isinstance(m, nn.Conv2d): + init.xavier_uniform_(m.weight.data) + if m.bias is not None: + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, 0.01) + m.bias.data.zero_() + +class vgg16_bn(torch.nn.Module): + def __init__(self, pretrained=True, freeze=True): + super(vgg16_bn, self).__init__() + vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features + self.slice1 = torch.nn.Sequential() + self.slice2 = torch.nn.Sequential() + self.slice3 = torch.nn.Sequential() + self.slice4 = torch.nn.Sequential() + self.slice5 = torch.nn.Sequential() + for x in range(12): # conv2_2 + self.slice1.add_module(str(x), vgg_pretrained_features[x]) + for x in range(12, 19): # conv3_3 + self.slice2.add_module(str(x), vgg_pretrained_features[x]) + for x in range(19, 29): # conv4_3 + self.slice3.add_module(str(x), vgg_pretrained_features[x]) + for x in range(29, 39): # conv5_3 + self.slice4.add_module(str(x), vgg_pretrained_features[x]) + + # fc6, fc7 without atrous conv + self.slice5 = torch.nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1), + nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6), + nn.Conv2d(1024, 1024, kernel_size=1) + ) + + if not pretrained: + init_weights(self.slice1.modules()) + init_weights(self.slice2.modules()) + init_weights(self.slice3.modules()) + init_weights(self.slice4.modules()) + + init_weights(self.slice5.modules()) # no pretrained model for fc6 and fc7 + + if freeze: + for param in self.slice1.parameters(): # only first conv + param.requires_grad= False + + def forward(self, X): + h = self.slice1(X) + h_relu2_2 = h + h = self.slice2(h) + h_relu3_2 = h + h = self.slice3(h) + h_relu4_3 = h + h = self.slice4(h) + h_relu5_3 = h + h = self.slice5(h) + h_fc7 = h + vgg_outputs = namedtuple("VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2']) + out = vgg_outputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2) + return out diff --git a/craft.py b/craft.py new file mode 100644 index 0000000000000000000000000000000000000000..f4f0c53447907267c62832e5302c896bc3e32094 --- /dev/null +++ b/craft.py @@ -0,0 +1,86 @@ +""" +Copyright (c) 2019-present NAVER Corp. +MIT License +""" + +# -*- coding: utf-8 -*- +import torch +import torch.nn as nn +import torch.nn.functional as F + +from basenet.vgg16_bn import vgg16_bn, init_weights +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +print(device) +class double_conv(nn.Module): + def __init__(self, in_ch, mid_ch, out_ch): + super(double_conv, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1), + nn.BatchNorm2d(mid_ch), + nn.ReLU(inplace=True), + nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1), + nn.BatchNorm2d(out_ch), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + x = self.conv(x) + return x + + +class CRAFT(nn.Module): + def __init__(self, pretrained=False, freeze=False): + super(CRAFT, self).__init__() + + """ Base network """ + self.basenet = vgg16_bn(pretrained, freeze) + + """ U network """ + self.upconv1 = double_conv(1024, 512, 256) + self.upconv2 = double_conv(512, 256, 128) + self.upconv3 = double_conv(256, 128, 64) + self.upconv4 = double_conv(128, 64, 32) + + num_class = 2 + self.conv_cls = nn.Sequential( + nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True), + nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True), + nn.Conv2d(32, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True), + nn.Conv2d(16, 16, kernel_size=1), nn.ReLU(inplace=True), + nn.Conv2d(16, num_class, kernel_size=1), + ) + + init_weights(self.upconv1.modules()) + init_weights(self.upconv2.modules()) + init_weights(self.upconv3.modules()) + init_weights(self.upconv4.modules()) + init_weights(self.conv_cls.modules()) + + def forward(self, x): + """ Base network """ + sources = self.basenet(x) + + """ U network """ + y = torch.cat([sources[0], sources[1]], dim=1) + y = self.upconv1(y) + + y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False) + y = torch.cat([y, sources[2]], dim=1) + y = self.upconv2(y) + + y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False) + y = torch.cat([y, sources[3]], dim=1) + y = self.upconv3(y) + + y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False) + y = torch.cat([y, sources[4]], dim=1) + feature = self.upconv4(y) + + y = self.conv_cls(feature) + + return y.permute(0,2,3,1), feature + +if __name__ == '__main__': + model = CRAFT(pretrained=True).cuda() + output, _ = model(torch.randn(1, 3, 768, 768).cuda()) + print(output.shape) \ No newline at end of file diff --git a/craft_utils.py b/craft_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cb4ba7fdb41a408af275ff1e026cb95317e9d5e5 --- /dev/null +++ b/craft_utils.py @@ -0,0 +1,273 @@ +"""Modify to Return Scores of Detection Boxes""" + +""" +Copyright (c) 2019-present NAVER Corp. +MIT License +""" + +# -*- coding: utf-8 -*- +import numpy as np +import cv2 +import math + +""" auxilary functions """ + + +# unwarp corodinates +def warpCoord(Minv, pt): + out = np.matmul(Minv, (pt[0], pt[1], 1)) + return np.array([out[0] / out[2], out[1] / out[2]]) + + +""" end of auxilary functions """ + + +def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text): + # prepare data + linkmap = linkmap.copy() + textmap = textmap.copy() + img_h, img_w = textmap.shape + + # Helper function for generating random colors + def random_color(): + return tuple(np.random.randint(0, 255, 3).tolist()) + + """ labeling method """ + ret, text_score = cv2.threshold(textmap, low_text, 1, 0) + ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0) + + text_score_comb = np.clip(text_score + link_score, 0, 1) + nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), + connectivity=4) + + # Create a color version of linkmap for visualization + visualized_linkmap = cv2.cvtColor(linkmap, cv2.COLOR_GRAY2BGR) + det = [] + det_scores = [] + mapper = [] + for k in range(1,nLabels): + # visualize stats on the original linkmap + x, y, w, h = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP], stats[k, cv2.CC_STAT_WIDTH], stats[ + k, cv2.CC_STAT_HEIGHT] + cv2.rectangle(visualized_linkmap, (x, y), (x + w, y + h), random_color(), 2) + + # size filtering + size = stats[k, cv2.CC_STAT_AREA] + if size < 10: continue + + # thresholding + if np.max(textmap[labels == k]) < text_threshold: continue + + # make segmentation map + segmap = np.zeros(textmap.shape, dtype=np.uint8) + segmap[labels==k] = 255 + segmap[np.logical_and(link_score==1, text_score==0)] = 0 # remove link area + x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP] + w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT] + niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2) + sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1 + # boundary check + if sx < 0 : sx = 0 + if sy < 0 : sy = 0 + if ex >= img_w: ex = img_w + if ey >= img_h: ey = img_h + kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter)) + segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel) + + # make box + np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2) + rectangle = cv2.minAreaRect(np_contours) + box = cv2.boxPoints(rectangle) + + # align diamond-shape + w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2]) + box_ratio = max(w, h) / (min(w, h) + 1e-5) + if abs(1 - box_ratio) <= 0.1: + l, r = min(np_contours[:,0]), max(np_contours[:,0]) + t, b = min(np_contours[:,1]), max(np_contours[:,1]) + box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32) + + # make clock-wise order + startidx = box.sum(axis=1).argmin() + box = np.roll(box, 4-startidx, 0) + box = np.array(box) + + det.append(box) + mapper.append(k) + det_scores.append(np.max(textmap[labels==k])) + # # Show the visualized linkmap with stats drawn + # cv2.imshow("Visualized Linkmap with Stats", visualized_linkmap) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + return det, labels, mapper, det_scores + +def getPoly_core(boxes, labels, mapper, linkmap): + # configs + num_cp = 5 + max_len_ratio = 0.7 + expand_ratio = 1.45 + max_r = 2.0 + step_r = 0.2 + + polys = [] + for k, box in enumerate(boxes): + # size filter for small instance + w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1) + if w < 10 or h < 10: + polys.append(None); + continue + + # warp image + tar = np.float32([[0, 0], [w, 0], [w, h], [0, h]]) + M = cv2.getPerspectiveTransform(box, tar) + word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST) + try: + Minv = np.linalg.inv(M) + except: + polys.append(None); + continue + + # binarization for selected label + cur_label = mapper[k] + word_label[word_label != cur_label] = 0 + word_label[word_label > 0] = 1 + + """ Polygon generation """ + # find top/bottom contours + cp = [] + max_len = -1 + for i in range(w): + region = np.where(word_label[:, i] != 0)[0] + if len(region) < 2: continue + cp.append((i, region[0], region[-1])) + length = region[-1] - region[0] + 1 + if length > max_len: max_len = length + + # pass if max_len is similar to h + if h * max_len_ratio < max_len: + polys.append(None); + continue + + # get pivot points with fixed length + tot_seg = num_cp * 2 + 1 + seg_w = w / tot_seg # segment width + pp = [None] * num_cp # init pivot points + cp_section = [[0, 0]] * tot_seg + seg_height = [0] * num_cp + seg_num = 0 + num_sec = 0 + prev_h = -1 + for i in range(0, len(cp)): + (x, sy, ey) = cp[i] + if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg: + # average previous segment + if num_sec == 0: break + cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec] + num_sec = 0 + + # reset variables + seg_num += 1 + prev_h = -1 + + # accumulate center points + cy = (sy + ey) * 0.5 + cur_h = ey - sy + 1 + cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy] + num_sec += 1 + + if seg_num % 2 == 0: continue # No polygon area + + if prev_h < cur_h: + pp[int((seg_num - 1) / 2)] = (x, cy) + seg_height[int((seg_num - 1) / 2)] = cur_h + prev_h = cur_h + + # processing last segment + if num_sec != 0: + cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec] + + # pass if num of pivots is not sufficient or segment widh is smaller than character height + if None in pp or seg_w < np.max(seg_height) * 0.25: + polys.append(None); + continue + + # calc median maximum of pivot points + half_char_h = np.median(seg_height) * expand_ratio / 2 + + # calc gradiant and apply to make horizontal pivots + new_pp = [] + for i, (x, cy) in enumerate(pp): + dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0] + dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1] + if dx == 0: # gradient if zero + new_pp.append([x, cy - half_char_h, x, cy + half_char_h]) + continue + rad = - math.atan2(dy, dx) + c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad) + new_pp.append([x - s, cy - c, x + s, cy + c]) + + # get edge points to cover character heatmaps + isSppFound, isEppFound = False, False + grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0]) + grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0]) + for r in np.arange(0.5, max_r, step_r): + dx = 2 * half_char_h * r + if not isSppFound: + line_img = np.zeros(word_label.shape, dtype=np.uint8) + dy = grad_s * dx + p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy]) + cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1) + if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r: + spp = p + isSppFound = True + if not isEppFound: + line_img = np.zeros(word_label.shape, dtype=np.uint8) + dy = grad_e * dx + p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy]) + cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1) + if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r: + epp = p + isEppFound = True + if isSppFound and isEppFound: + break + + # pass if boundary of polygon is not found + if not (isSppFound and isEppFound): + polys.append(None); + continue + + # make final polygon + poly = [] + poly.append(warpCoord(Minv, (spp[0], spp[1]))) + for p in new_pp: + poly.append(warpCoord(Minv, (p[0], p[1]))) + poly.append(warpCoord(Minv, (epp[0], epp[1]))) + poly.append(warpCoord(Minv, (epp[2], epp[3]))) + for p in reversed(new_pp): + poly.append(warpCoord(Minv, (p[2], p[3]))) + poly.append(warpCoord(Minv, (spp[2], spp[3]))) + + # add to final result + polys.append(np.array(poly)) + + return polys + + +def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False): + boxes, labels, mapper, det_scores = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) + + if poly: + polys = getPoly_core(boxes, labels, mapper, linkmap) + else: + polys = [None] * len(boxes) + + return boxes, polys, det_scores + + +def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2): + for i in range(len(polys)): + if polys[i] is not None: + for j in range(len(polys[i])): + polys[i][j][0] *= ratio_w * ratio_net + polys[i][j][1] *= ratio_h * ratio_net + return polys diff --git a/crop_Word/crop_1.jpg b/crop_Word/crop_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ac956dff7f610b83c4b858485976d0ba2923d658 Binary files /dev/null and b/crop_Word/crop_1.jpg differ diff --git a/crop_Word/crop_10.jpg b/crop_Word/crop_10.jpg new file mode 100644 index 0000000000000000000000000000000000000000..74e3e6e66f34ae78251464a474698b5e0e940883 Binary files /dev/null and b/crop_Word/crop_10.jpg differ diff --git a/crop_Word/crop_11.jpg b/crop_Word/crop_11.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b169e72f6dc5db9822dacac0940aa4bfd2428437 Binary files /dev/null and b/crop_Word/crop_11.jpg differ diff --git a/crop_Word/crop_12.jpg b/crop_Word/crop_12.jpg new file mode 100644 index 0000000000000000000000000000000000000000..246588a7f1dd527738a7e77ad3a6097f7066b685 Binary files /dev/null and b/crop_Word/crop_12.jpg differ diff --git a/crop_Word/crop_13.jpg b/crop_Word/crop_13.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c7374a90a6f54bdf3ddfb37e5669b32456b6ad04 Binary files /dev/null and b/crop_Word/crop_13.jpg differ diff --git a/crop_Word/crop_14.jpg b/crop_Word/crop_14.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3e2f995b3c251a01f10c534a1a0ada6f355bd253 Binary files /dev/null and b/crop_Word/crop_14.jpg differ diff --git a/crop_Word/crop_15.jpg b/crop_Word/crop_15.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4fba58b979265f84d3b3a084ae65b3a5a4b8c2c8 Binary files /dev/null and b/crop_Word/crop_15.jpg differ diff --git a/crop_Word/crop_16.jpg b/crop_Word/crop_16.jpg new file mode 100644 index 0000000000000000000000000000000000000000..059beb4ba4ef46778a4941c3740da401bb4a1194 Binary files /dev/null and b/crop_Word/crop_16.jpg differ diff --git a/crop_Word/crop_17.jpg b/crop_Word/crop_17.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a59e9ba3a9783ea32d9422afb0552f069cbec847 Binary files /dev/null and b/crop_Word/crop_17.jpg differ diff --git a/crop_Word/crop_18.jpg b/crop_Word/crop_18.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f31d710611722b7f82d070a9cea1ee17d85efee8 Binary files /dev/null and b/crop_Word/crop_18.jpg differ diff --git a/crop_Word/crop_19.jpg b/crop_Word/crop_19.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3dad78a26fb57c92f91ce0c53b36bf06f8c96105 Binary files /dev/null and b/crop_Word/crop_19.jpg differ diff --git a/crop_Word/crop_2.jpg b/crop_Word/crop_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ff5a897eef3d740168116afdfe605d9433612fac Binary files /dev/null and b/crop_Word/crop_2.jpg differ diff --git a/crop_Word/crop_20.jpg b/crop_Word/crop_20.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9db642108d1eccb659685651abeafd277356f9f9 Binary files /dev/null and b/crop_Word/crop_20.jpg differ diff --git a/crop_Word/crop_21.jpg b/crop_Word/crop_21.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1ad88c34153087dc964ce32f9164ca49bc2cf7f6 Binary files /dev/null and b/crop_Word/crop_21.jpg differ diff --git a/crop_Word/crop_22.jpg b/crop_Word/crop_22.jpg new file mode 100644 index 0000000000000000000000000000000000000000..02879e5863e1aeac3ec691ce7adbd624d72589b6 Binary files /dev/null and b/crop_Word/crop_22.jpg differ diff --git a/crop_Word/crop_23.jpg b/crop_Word/crop_23.jpg new file mode 100644 index 0000000000000000000000000000000000000000..63687a036f300e157790b0489d5319c03835a1db Binary files /dev/null and b/crop_Word/crop_23.jpg differ diff --git a/crop_Word/crop_24.jpg b/crop_Word/crop_24.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b865d35d64c3a7722586db084902f1e977ce1b7d Binary files /dev/null and b/crop_Word/crop_24.jpg differ diff --git a/crop_Word/crop_25.jpg b/crop_Word/crop_25.jpg new file mode 100644 index 0000000000000000000000000000000000000000..07c6389ba681e19f3f2f45f41adcae7300df53bf Binary files /dev/null and b/crop_Word/crop_25.jpg differ diff --git a/crop_Word/crop_26.jpg b/crop_Word/crop_26.jpg new file mode 100644 index 0000000000000000000000000000000000000000..123d8c4b4ba872739361ad2eb7d7a4c2c87bcda2 Binary files /dev/null and b/crop_Word/crop_26.jpg differ diff --git a/crop_Word/crop_27.jpg b/crop_Word/crop_27.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1824b836fed21632f90679c548c9d630e3b9fc89 Binary files /dev/null and b/crop_Word/crop_27.jpg differ diff --git a/crop_Word/crop_28.jpg b/crop_Word/crop_28.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c3f85185ca7f172dc943a80494e4d483226fa543 Binary files /dev/null and b/crop_Word/crop_28.jpg differ diff --git a/crop_Word/crop_29.jpg b/crop_Word/crop_29.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1cd343ec310b2a2c4979cbd9acd72e5804038ca1 Binary files /dev/null and b/crop_Word/crop_29.jpg differ diff --git a/crop_Word/crop_3.jpg b/crop_Word/crop_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c8b1dc512d6a2aef71515aec26fe776cc5404507 Binary files /dev/null and b/crop_Word/crop_3.jpg differ diff --git a/crop_Word/crop_30.jpg b/crop_Word/crop_30.jpg new file mode 100644 index 0000000000000000000000000000000000000000..42189c6f9935344868954432c9c106c7328ef334 Binary files /dev/null and b/crop_Word/crop_30.jpg differ diff --git a/crop_Word/crop_31.jpg b/crop_Word/crop_31.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7f82ac186d1564a5d05fd5bb1017dcf0cc2a5e42 Binary files /dev/null and b/crop_Word/crop_31.jpg differ diff --git a/crop_Word/crop_32.jpg b/crop_Word/crop_32.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d12c846757cacf3fe479290258c59d0c71529746 Binary files /dev/null and b/crop_Word/crop_32.jpg differ diff --git a/crop_Word/crop_33.jpg b/crop_Word/crop_33.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9c406ac8e5c03a13e6b7c7c875a51c627ddbd933 Binary files /dev/null and b/crop_Word/crop_33.jpg differ diff --git a/crop_Word/crop_34.jpg b/crop_Word/crop_34.jpg new file mode 100644 index 0000000000000000000000000000000000000000..abd734f14d3832f729d6477326d70d24198b1385 Binary files /dev/null and b/crop_Word/crop_34.jpg differ diff --git a/crop_Word/crop_4.jpg b/crop_Word/crop_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d4d27f6916496344789e84a4fc7dc928d232a5ce Binary files /dev/null and b/crop_Word/crop_4.jpg differ diff --git a/crop_Word/crop_5.jpg b/crop_Word/crop_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b342d60906cefe72b416795f50b568ab33e6bdac Binary files /dev/null and b/crop_Word/crop_5.jpg differ diff --git a/crop_Word/crop_6.jpg b/crop_Word/crop_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2737f3d3e5075a70b15c7bfc94be40f72ae29e21 Binary files /dev/null and b/crop_Word/crop_6.jpg differ diff --git a/crop_Word/crop_7.jpg b/crop_Word/crop_7.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d18bed9462354aa6f01eb68c29cd263d2006dc3e Binary files /dev/null and b/crop_Word/crop_7.jpg differ diff --git a/crop_Word/crop_8.jpg b/crop_Word/crop_8.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b453f0d4fbb87c264fa338511306115a97b36db7 Binary files /dev/null and b/crop_Word/crop_8.jpg differ diff --git a/crop_Word/crop_9.jpg b/crop_Word/crop_9.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8352373e7b10c693e52fc821691193ed7a4cec22 Binary files /dev/null and b/crop_Word/crop_9.jpg differ diff --git a/crop_images.py b/crop_images.py new file mode 100644 index 0000000000000000000000000000000000000000..30cc22e2649f83ec459176052c535bd2ef80a19a --- /dev/null +++ b/crop_images.py @@ -0,0 +1,73 @@ +import os +import numpy as np +import cv2 +import pandas as pd + + +def crop(pts, image): + """ + Takes inputs as 8 points + and Returns cropped, masked image with a white background + """ + # Giới hạn giá trị của pts + pts[:, 0] = np.clip(pts[:, 0], 0, image.shape[1] - 1) + pts[:, 1] = np.clip(pts[:, 1], 0, image.shape[0] - 1) + + rect = cv2.boundingRect(pts) + x, y, w, h = rect + x = int(x) + y = int(y) + w = int(w) + if h == 0 or w == 0: + return np.ones((10, 10, 3), + np.uint8) * 255 # Trả về một ảnh trắng 10x10, bạn có thể thay đổi kích thước này nếu muốn + + cropped = image[y:y + h, x:x + w].copy() + pts = pts - pts.min(axis=0) + mask = np.zeros(cropped.shape[:2], np.uint8) + # print("Kích thước của mask:", mask.shape) + # print("Kích thước của cropped:", cropped.shape) + # print("Giá trị của pts:", pts) + + cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA) + dst = cv2.bitwise_and(cropped, cropped, mask=mask) + bg = np.ones_like(cropped, np.uint8) * 255 + cv2.bitwise_not(bg, bg, mask=mask) + dst2 = bg + dst + return dst2 + + +def generate_words(image_name, score_bbox, image): + num_bboxes = len(score_bbox) + for num in range(num_bboxes): + bbox_coords = score_bbox[num].split(':')[-1].split(',\n') + if bbox_coords != ['{}']: + l_t = float(bbox_coords[0].strip(' array([').strip(']').split(',')[0]) + t_l = float(bbox_coords[0].strip(' array([').strip(']').split(',')[1]) + r_t = float(bbox_coords[1].strip(' [').strip(']').split(',')[0]) + t_r = float(bbox_coords[1].strip(' [').strip(']').split(',')[1]) + r_b = float(bbox_coords[2].strip(' [').strip(']').split(',')[0]) + b_r = float(bbox_coords[2].strip(' [').strip(']').split(',')[1]) + l_b = float(bbox_coords[3].strip(' [').strip(']').split(',')[0]) + b_l = float(bbox_coords[3].strip(' [').strip(']').split(',')[1].strip(']')) + pts = np.array([[int(l_t), int(t_l)], [int(r_t), int(t_r)], [int(r_b), int(b_r)], [int(l_b), int(b_l)]]) + + if np.all(pts) > 0: + + word = crop(pts, image) + + folder = '/'.join(image_name.split('/')[:-1]) + # CHANGE DIR + dir = '/content/Pipeline/Crop Words/' + if os.path.isdir(os.path.join(dir + folder)) == False: + os.makedirs(os.path.join(dir + folder)) + try: + file_name = os.path.join(dir + image_name) + cv2.imwrite( + file_name + '_{}_{}_{}_{}_{}_{}_{}_{}.jpg'.format(l_t, t_l, r_t, t_r, r_b, b_r, l_b, b_l), word) + print('Image saved to ' + file_name + '_{}_{}_{}_{}_{}_{}_{}_{}.jpg'.format(l_t, t_l, r_t, t_r, r_b, + b_r, l_b, b_l)) + except: + continue + + diff --git a/file_utils.py b/file_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ab5123d5bbe55b8b042669b46df3ef2503c888d0 --- /dev/null +++ b/file_utils.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +import os +import numpy as np +import cv2 +import matplotlib.pyplot as plt +import crop_images +import imgproc +import matplotlib.pyplot as plt + + +# borrowed from https://github.com/lengstrom/fast-style-transfer/blob/master/src/utils.py +def get_files(img_dir): + imgs, masks, xmls = list_files(img_dir) + return imgs, masks, xmls + + +def list_files(in_path): + img_files = [] + mask_files = [] + gt_files = [] + for (dirpath, dirnames, filenames) in os.walk(in_path): + for file in filenames: + filename, ext = os.path.splitext(file) + ext = str.lower(ext) + if ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.pgm': + img_files.append(os.path.join(dirpath, file)) + elif ext == '.bmp': + mask_files.append(os.path.join(dirpath, file)) + elif ext == '.xml' or ext == '.gt' or ext == '.txt': + gt_files.append(os.path.join(dirpath, file)) + elif ext == '.zip': + continue + # img_files.sort() + # mask_files.sort() + # gt_files.sort() + return img_files, mask_files, gt_files + + +def saveResult(img_file, img, boxes, dirname='Results', verticals=None, texts=None): + """ save text detection result one by one + Args: + img_file (str): image file name + img (array): raw image context + boxes (array): array of result file + Shape: [num_detections, 4] for BB output / [num_detections, 4] for QUAD output + Return: + None + """ + + img = np.array(img) + + # make result file list: tên ảnh và đuôi ảnh + filename, file_ext = os.path.splitext(os.path.basename(img_file)) + # result directory + res_file = dirname + "res_" + filename + '.txt' + res_img_file = dirname + "res_" + filename + '.jpg' + + if not os.path.isdir(dirname): + os.mkdir(dirname) + with open(res_file, 'w') as f: + for i, box in enumerate(boxes): + poly = np.array(box).astype(np.int32).reshape((-1)) + strResult = ','.join([str(p) for p in poly]) + '\r\n' + f.write(strResult) + # bỏ comment đoạn này để có thể bounding box lại b + poly = poly.reshape(-1, 2) + # cv2.polylines(img, [poly.reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2) + # ptColor = (0, 255, 255) + xmin = min(poly[:, 0]) + xmax = max(poly[:, 0]) + ymin = min(poly[:, 1]) + ymax = max(poly[:, 1]) + width = xmax - xmin + height = ymax - ymin + # các điểm này từ file txt + pts = np.array([[xmin, ymax], [xmax, ymax], [xmax, ymin], [xmin, ymin]]) + + word = crop_images.crop(pts, img) + + folder = '/'.join(filename.split('/')[:-1]) + # đầu tiên đây là folder cropWord + dir = 'cropWord/' + if os.path.isdir(os.path.join(dir + folder)) == False: + os.makedirs(os.path.join(dir + folder)) + try: + file_name = os.path.join(dir + filename) + cv2.imwrite(file_name + str(i) + '.jpg', word) + except: + continue + + if verticals is not None: + if verticals[i]: + ptColor = (255, 0, 0) + + if texts is not None: + font = cv2.FONT_HERSHEY_SIMPLEX + font_scale = 0.5 + cv2.putText(img, "{}".format(texts[i]), (poly[0][0] + 1, poly[0][1] + 1), font, font_scale, (0, 0, 0), + thickness=1) + cv2.putText(img, "{:.2f}".format(texts[i]), tuple(poly[0]), font, font_scale, (0, 255, 255), + thickness=1) + + # Save result image + # cv2.imwrite(res_img_file, img) diff --git a/git b/git new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/imgproc.py b/imgproc.py new file mode 100644 index 0000000000000000000000000000000000000000..0804b1bafbd6516ffb16a57fcc61392620f7426a --- /dev/null +++ b/imgproc.py @@ -0,0 +1,74 @@ +""" +Copyright (c) 2019-present NAVER Corp. +MIT License +""" + +# -*- coding: utf-8 -*- +import numpy as np +from skimage import io +import cv2 + + +def loadImage(img_file): + img = io.imread(img_file) # RGB order + if img.shape[0] == 2: img = img[0] + if len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + if img.shape[2] == 4: img = img[:, :, :3] + img = np.array(img) + + return img + + +def normalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)): + # should be RGB order + img = in_img.copy().astype(np.float32) + + img -= np.array([mean[0] * 255.0, mean[1] * 255.0, mean[2] * 255.0], dtype=np.float32) + img /= np.array([variance[0] * 255.0, variance[1] * 255.0, variance[2] * 255.0], dtype=np.float32) + return img + + +def denormalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)): + # should be RGB order + img = in_img.copy() + img *= variance + img += mean + img *= 255.0 + img = np.clip(img, 0, 255).astype(np.uint8) + return img + + +def resize_aspect_ratio(img, square_size, interpolation, mag_ratio=1): + height, width, channel = img.shape + + # magnify image size + target_size = mag_ratio * max(height, width) + + # set original image size + if target_size > square_size: + target_size = square_size + + ratio = target_size / max(height, width) + + target_h, target_w = int(height * ratio), int(width * ratio) + proc = cv2.resize(img, (target_w, target_h), interpolation=interpolation) + + # make canvas and paste image + target_h32, target_w32 = target_h, target_w + if target_h % 32 != 0: + target_h32 = target_h + (32 - target_h % 32) + if target_w % 32 != 0: + target_w32 = target_w + (32 - target_w % 32) + resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32) + resized[0:target_h, 0:target_w, :] = proc + target_h, target_w = target_h32, target_w32 + + size_heatmap = (int(target_w / 2), int(target_h / 2)) + + return resized, ratio, size_heatmap + + +def cvt2HeatmapImg(img): + img = (np.clip(img, 0, 1) * 255).astype(np.uint8) + img = cv2.applyColorMap(img, cv2.COLORMAP_JET) + return img diff --git a/refinenet.py b/refinenet.py new file mode 100644 index 0000000000000000000000000000000000000000..b209843d6221eecf20e959145333f4a690298568 --- /dev/null +++ b/refinenet.py @@ -0,0 +1,65 @@ +""" +Copyright (c) 2019-present NAVER Corp. +MIT License +""" + +# -*- coding: utf-8 -*- +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Variable +from basenet.vgg16_bn import init_weights + + +class RefineNet(nn.Module): + def __init__(self): + super(RefineNet, self).__init__() + + self.last_conv = nn.Sequential( + nn.Conv2d(34, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True), + nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True), + nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True) + ) + + self.aspp1 = nn.Sequential( + nn.Conv2d(64, 128, kernel_size=3, dilation=6, padding=6), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 1, kernel_size=1) + ) + + self.aspp2 = nn.Sequential( + nn.Conv2d(64, 128, kernel_size=3, dilation=12, padding=12), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 1, kernel_size=1) + ) + + self.aspp3 = nn.Sequential( + nn.Conv2d(64, 128, kernel_size=3, dilation=18, padding=18), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 1, kernel_size=1) + ) + + self.aspp4 = nn.Sequential( + nn.Conv2d(64, 128, kernel_size=3, dilation=24, padding=24), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), + nn.Conv2d(128, 1, kernel_size=1) + ) + + init_weights(self.last_conv.modules()) + init_weights(self.aspp1.modules()) + init_weights(self.aspp2.modules()) + init_weights(self.aspp3.modules()) + init_weights(self.aspp4.modules()) + + def forward(self, y, upconv4): + refine = torch.cat([y.permute(0,3,1,2), upconv4], dim=1) + refine = self.last_conv(refine) + + aspp1 = self.aspp1(refine) + aspp2 = self.aspp2(refine) + aspp3 = self.aspp3(refine) + aspp4 = self.aspp4(refine) + + #out = torch.add([aspp1, aspp2, aspp3, aspp4], dim=1) + out = aspp1 + aspp2 + aspp3 + aspp4 + return out.permute(0, 2, 3, 1) # , refine.permute(0,2,3,1) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4c2452c06cc69c6ba9dce6bd154d679d1a128a4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +numpy +pandas +matplotlib +scikit-learn +tensorflow +seaborn +beautifulsoup4 +requests +plotly +keras +vietocr +streamlit +torch --index-url https://download.pytorch.org/whl/cu121 +torchvision --index-url https://download.pytorch.org/whl/cu121 +torchaudio --index-url https://download.pytorch.org/whl/cu121 diff --git a/test.py b/test.py new file mode 100644 index 0000000000000000000000000000000000000000..a7ff64e685b08e4ebd8b7941fcfa97a9edba0090 --- /dev/null +++ b/test.py @@ -0,0 +1,112 @@ +"""Modify to Remove Argument Parser""" + +""" +Copyright (c) 2019-present NAVER Corp. +MIT License +""" + +# -*- coding: utf-8 -*- +import sys +import os +import time +import argparse + +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +from torch.autograd import Variable + +from PIL import Image + +import cv2 +from skimage import io +import numpy as np +import craft_utils +import imgproc +import file_utils +import json +import zipfile + +from craft import CRAFT + +from collections import OrderedDict + + +def copyStateDict(state_dict): + if list(state_dict.keys())[0].startswith("module"): + start_idx = 1 + else: + start_idx = 0 + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + name = ".".join(k.split(".")[start_idx:]) + new_state_dict[name] = v + return new_state_dict + + +def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, args, refine_net=None): + t0 = time.time() + # resize + # cứ biết là lấy ra kích thước ảnh mới , tỉ lệ với chiều rộng chiều cao ảnh, kích thước bản đồ nhiệt + img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, + interpolation=cv2.INTER_LINEAR, + mag_ratio=args.mag_ratio) + + # đây chính là lấy tỉ lệ với chiều rộng và chiều cao ảnh còn là gì thì xuống xem + ratio_h = ratio_w = 1 / target_ratio + # preprocessing Bước tiền xử lý + x = imgproc.normalizeMeanVariance(img_resized) + ''' + chú ý rằng tensor trong pytorch bằng với array trong numpy + trong 2 lệnh tiếp theo + đầu tiên là thay đổi thứ tự và dòng tiếp tăng số chiều của nó lên + mục đích chưa rõ + https://github.com/pytorch/pytorch/issues/44541 + ''' + x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] + x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] + + if cuda: + x = x.cpu() + + # forward pass + # từ net(x) lấy ra y và feature nữa, mục đích chưa rõ + with torch.no_grad(): + y, feature = net(x) + + # make score and link map + score_text = y[0, :, :, 0].cpu().data.numpy() + score_link = y[0, :, :, 1].cpu().data.numpy() + # refine link + if refine_net is not None: + with torch.no_grad(): + y_refiner = refine_net(y, feature) + score_link = y_refiner[0, :, :, 0].cpu().data.numpy() + + t0 = time.time() - t0 + t1 = time.time() + + # những tham số truyền vào này này đều là tham số default từ file với pipeline + # mục đích là lấy ra boxes, polys, det_scores mới + # quá trình xử lý chưa rõ + # Post-processing gọi hàm từ file craft_utils + # boxes để lấy tọa độ, polys có vẻ là dương tự nhưng mà dưới dạng weight hoặc cách implement khác + # + boxes, polys, det_scores = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, + poly) + + # coordinate adjustment gọi hàm từ file craft_utils + boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) + polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) + + for k in range(len(polys)): + if polys[k] is None: polys[k] = boxes[k] + t1 = time.time() - t1 + + # render results (optional) + render_img = score_text.copy() + render_img = np.hstack((render_img, score_link)) + ret_score_text = imgproc.cvt2HeatmapImg(render_img) + if args.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) + + return boxes, polys, ret_score_text, det_scores diff --git a/weights/craft_mlt_25k.pth b/weights/craft_mlt_25k.pth new file mode 100644 index 0000000000000000000000000000000000000000..88871234c9270456cdf0137652a48b929d0ddf72 --- /dev/null +++ b/weights/craft_mlt_25k.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17 +size 83152330 diff --git a/weights/craft_refiner_CTW1500.pth b/weights/craft_refiner_CTW1500.pth new file mode 100644 index 0000000000000000000000000000000000000000..282e2748fedc1aa4c12b82f7edc987578ce05e7d --- /dev/null +++ b/weights/craft_refiner_CTW1500.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7000cd3e9c76f2231b62b32182212203f73c08dfaa12bb16ffb529948a01399 +size 1854124