test_ocr_1

Sleeping

App Files Files Community

B2W1234 commited on Jun 22

Commit

0a96b76

•

1 Parent(s): 83133b9

Upload 53 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -35
0.jpg +0 -0
README.md +1 -12
app.py +254 -0
basenet/__init__.py +0 -0
basenet/__pycache__/__init__.cpython-310.pyc +0 -0
basenet/__pycache__/vgg16_bn.cpython-310.pyc +0 -0
basenet/vgg16_bn.py +71 -0
craft.py +86 -0
craft_utils.py +273 -0
crop_Word/crop_1.jpg +0 -0
crop_Word/crop_10.jpg +0 -0
crop_Word/crop_11.jpg +0 -0
crop_Word/crop_12.jpg +0 -0
crop_Word/crop_13.jpg +0 -0
crop_Word/crop_14.jpg +0 -0
crop_Word/crop_15.jpg +0 -0
crop_Word/crop_16.jpg +0 -0
crop_Word/crop_17.jpg +0 -0
crop_Word/crop_18.jpg +0 -0
crop_Word/crop_19.jpg +0 -0
crop_Word/crop_2.jpg +0 -0
crop_Word/crop_20.jpg +0 -0
crop_Word/crop_21.jpg +0 -0
crop_Word/crop_22.jpg +0 -0
crop_Word/crop_23.jpg +0 -0
crop_Word/crop_24.jpg +0 -0
crop_Word/crop_25.jpg +0 -0
crop_Word/crop_26.jpg +0 -0
crop_Word/crop_27.jpg +0 -0
crop_Word/crop_28.jpg +0 -0
crop_Word/crop_29.jpg +0 -0
crop_Word/crop_3.jpg +0 -0
crop_Word/crop_30.jpg +0 -0
crop_Word/crop_31.jpg +0 -0
crop_Word/crop_32.jpg +0 -0
crop_Word/crop_33.jpg +0 -0
crop_Word/crop_34.jpg +0 -0
crop_Word/crop_4.jpg +0 -0
crop_Word/crop_5.jpg +0 -0
crop_Word/crop_6.jpg +0 -0
crop_Word/crop_7.jpg +0 -0
crop_Word/crop_8.jpg +0 -0
crop_Word/crop_9.jpg +0 -0
crop_images.py +73 -0
file_utils.py +104 -0
git +0 -0
imgproc.py +74 -0
refinenet.py +65 -0
requirements.txt +15 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


1	+ *.pth filter=lfs diff=lfs merge=lfs -text

0.jpg ADDED Viewed

README.md CHANGED Viewed

@@ -1,12 +1 @@
----
-title: Test Ocr
-emoji: 🌍
-colorFrom: red
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.36.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # ocr_all

app.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import streamlit as st
+from PIL import Image
+from tkinter import ttk
+import tkinter as tk
+from tkinter import filedialog
+from PIL import Image, ImageTk
+import os
+import argparse
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+from torch.autograd import Variable
+from test import copyStateDict
+from PIL import Image
+import cv2
+from skimage import io
+import numpy as np
+import test
+import file_utils
+import pandas as pd
+from craft import CRAFT
+from collections import OrderedDict
+from PIL import Image
+from vietocr.tool.predictor import Predictor
+from vietocr.tool.config import Cfg
+import os
+import tkinter as tk
+from tkinter import filedialog
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+import cv2
+import glob
+import matplotlib.pyplot as plt
+import tkinter as tk
+from tkinter import filedialog
+from PIL import Image, ImageTk
+import file_utils
+import os
+import tkinter.messagebox as messagebox
+import imgproc
+# Tạo yêu cầu đến mô hình
+def str2bool(v):
+    return v.lower() in ("yes", "y", "true", "t", "1")
+'''
+'''
+# CRAFT
+parser = argparse.ArgumentParser(description='CRAFT Text Detection')
+parser.add_argument('--trained_model', default='weights/craft_mlt_25k.pth', type=str, help='pretrained model')
+parser.add_argument('--text_threshold', default=0.7, type=float, help='text confidence threshold')
+parser.add_argument('--low_text', default=0.4, type=float, help='text low-bound score')
+parser.add_argument('--link_threshold', default=0.4, type=float, help='link confidence threshold')
+parser.add_argument('--cpu', default=True, type=str2bool, help='Use cpu for inference')
+parser.add_argument('--canvas_size', default=1280, type=int, help='image size for inference')
+parser.add_argument('--mag_ratio', default=1.5, type=float, help='image magnification ratio')
+parser.add_argument('--poly', default=False, action='store_true', help='enable polygon type')
+parser.add_argument('--show_time', default=False, action='store_true', help='show processing time')
+parser.add_argument('--test_folder', default='data_image', type=str, help='đường dẫn tới ảnh đầu vào')
+parser.add_argument('--refine', default=True, action='store_true', help='enable link refiner')
+parser.add_argument('--refiner_model', default='weights/craft_refiner_CTW1500.pth', type=str,
+                    help='pretrained refiner model')
+args = parser.parse_args()
+#########################################################################################
+csv_columns = ['x_top_left', 'y_top_left', 'x_top_right', 'y_top_right', 'x_bot_right', 'y_bot_right', 'x_bot_left',
+                'y_bot_left']
+# load net
+net = CRAFT()  # initialize
+print('Đang thực hiện load weight (' + args.trained_model + ')')
+'''
+nhảy sang file test, đưa vào train model
+'''
+if args.cpu:
+    net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu')))
+else:
+    net.load_state_dict(copyStateDict(torch.load(args.trained_model, map_location='cpu')))
+if args.cpu:
+    net = net.cpu()
+    net = torch.nn.DataParallel(net)
+    cudnn.benchmark = False
+net.eval()
+# LinkRefiner Đoạn này code không chạy qua nên không cần đọc vì weight đã load ở cái bên trên
+# còn refine để mặc định bên trên là False nên sẽ bị bỏ qua
+# ------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+refine_net = None
+if args.refine:
+    from refinenet import RefineNet
+    refine_net = RefineNet()
+    print('Đang thực hiện load weight (' + args.refiner_model + ')')
+    if args.cpu:
+        refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu')))
+        refine_net = refine_net.cpu()
+        refine_net = torch.nn.DataParallel(refine_net)
+    else:
+        refine_net.load_state_dict(copyStateDict(torch.load(args.refiner_model, map_location='cpu')))
+    refine_net.eval()
+    args.poly = True
+config = Cfg.load_config_from_name('vgg_transformer')
+config['export'] = 'transformerocr_checkpoint.pth'
+config['device'] = 'cpu'
+config['predictor']['beamsearch'] = False
+detector = Predictor(config)
+# ------------------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------------------
+# Tạo tiêu đề và phần tải lên hình ảnh
+st.title("Trích xuất thông tin từ căn cước công dân")
+uploaded_file = st.file_uploader("Tải lên ảnh căn cước công dân", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Hiển thị hình ảnh tải lên
+    image = Image.open(uploaded_file)
+    image.save("uploaded_image.jpg")
+    st.image(image, caption='Hình ảnh căn cước', use_column_width=True)
+    import tempfile
+    # Lưu trữ tạm thời và lấy đường dẫn
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+        # Save the image to the temp file
+        temp_file.write(uploaded_file.read())
+        temp_file_path = temp_file.name
+    # Hiển thị đường dẫn tạm thời
+    print(f"Đường dẫn tạm thời của file: {temp_file_path}")
+    # Nút chạy
+    if st.button("Run"):
+        print("ok")
+        image_path = "uploaded_image.jpg"
+        k = 1
+        crop_folder = "crop_Word"
+        result_folder = "Results"
+        image = imgproc.loadImage(image_path)
+        bboxes, polys, score_text, det_scores = test.test_net(net, image, args.text_threshold, args.link_threshold,
+                                                                args.low_text, args.cpu, args.poly, args, refine_net)
+        bbox_score = {}
+        def crop_polygon(image, vertices, box_num1):
+            # Tạo mặt nạ
+            mask = np.zeros(image.shape[:2], dtype=np.uint8)
+            cv2.fillPoly(mask, [np.int32(vertices)], 255)
+            # Tìm bounding rect để crop vùng chứa đa giác
+            rect = cv2.boundingRect(np.int32(vertices))
+            # Crop và lấy hình ảnh con theo bounding rect
+            cropped = image[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]]
+            # Tạo mặt nạ cho vùng đã crop
+            cropped_mask = mask[rect[1]:rect[1]+rect[3], rect[0]:rect[0]+rect[2]]
+            # Lọc vùng bằng mặt nạ
+            result = cv2.bitwise_and(cropped, cropped, mask=cropped_mask)
+            crop_path = os.path.join(crop_folder, f"crop_{box_num1 + 1}.jpg")
+            cv2.imwrite(crop_path, result)
+            return result
+        if len(bboxes) == 0:
+            with open(f"data_text//text_{k}.txt", "w", encoding="utf-8") as f:
+                f.write(" ")
+        else:
+            # for box_num, item in enumerate(bboxes):
+            #     # Crop the bbox from the image
+            #     pts = np.array(item, np.int32).reshape((-1, 1, 2))
+            #     rect = cv2.boundingRect(pts)
+            #     x, y, w, h = rect
+            #     cropped_img = image[y:y+h, x:x+w].copy()
+            #     crop_path = os.path.join(crop_folder, f"crop_{box_num + 1}.jpg")
+            #     cv2.imwrite(crop_path, cropped_img)
+            for box_num in range(len(bboxes)):
+                item = bboxes[box_num]
+                data = np.array([[int(item[0][0]), int(item[0][1]), int(item[1][0]), int(item[1][1]), int(item[2][0]),
+                                    int(item[2][1]), int(item[3][0]), int(item[3][1])]])
+                csvdata = pd.DataFrame(data, columns=csv_columns)
+                csvdata.to_csv(f'data{k}.csv', index=False, mode='a', header=False)
+            # save score text
+            filename, file_ext = os.path.splitext(os.path.basename(image_path))
+            mask_file = result_folder + "/res_" + filename + '_mask.jpg'  # tạo đường dẫn file bản đồ nhiệt
+            cv2.imwrite(mask_file, score_text)  # in ra bản đồ nhiệt
+            #
+            file_utils.saveResult(image_path, image[:, :, ::-1], polys, dirname=result_folder)
+            cropped_images = []
+            for i, box in enumerate(bboxes):
+                cropped = crop_polygon(image, box, i)
+                cropped_images.append(cropped)
+            print(f"Đã cắt {len(cropped_images)} vùng bounding box.")
+            path = glob.glob("crop_Word/*.jpg")
+            cv_img = [str(detector.predict(Image.open(f'crop_Word/crop_' + str(i + 1) + '.jpg'))) for i in
+                        range(len(bboxes))]
+            print(cv_img)
+            # from google.generativeai.types import HarmCategory, HarmBlockThreshold
+            # import google.generativeai as genai
+            # genai.configure(api_key="AIzaSyAH4ayK6nL71wxPtuYOCe32OdZVZAANWic")
+            # # Khởi tạo mô hình
+            # model = genai.GenerativeModel(model_name='gemini-1.5-flash')
+            # # Thiết lập safe_setting cho các loại harm có sẵn và hợp lệ
+            # safety_settings = [
+            #     {"category": HarmCategory.HARM_CATEGORY_HATE_SPEECH, "threshold": HarmBlockThreshold.BLOCK_NONE},
+            #     {"category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, "threshold": HarmBlockThreshold.BLOCK_NONE},
+            #     {"category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, "threshold": HarmBlockThreshold.BLOCK_NONE},
+            #     {"category": HarmCategory.HARM_CATEGORY_HARASSMENT, "threshold": HarmBlockThreshold.BLOCK_NONE}
+            # ]
+            # print("ok")
+            # response = model.generate_content(
+            #     [f"tìm tên, ngày sinh, nơi cư trú, số căn cước, hạn sử dụng trong mảng sau  {cv_img}, chỉ trả về tên, ngày sinh, nơi cư trú, số căn cước,hạn sử dụng mà model tìm thấy được, không trả lời thêm gì, ví dụ 'NGUYỄN THANH SANG, 18/05/1981, 223/11 Kv Bỉnh- Dương, Long Hòa, Bình Thủy, Cần Thơ, 092081007131, 18/05/2041  '"],
+            #     safety_settings=safety_settings
+            # )
+            # # print(f"tìm tên, ngày sinh, nơi cư trú, số căn cước, hạn sử dụng trong mảng sau  {cv_img}, chỉ trả về tên, ngày sinh, nơi cư trú, số căn cước,hạn sử dụng mà model tìm thấy được, không trả lời thêm gì, ví dụ 'NGUYỄN THANH SANG, 18/05/1981, 223/11 Kv Bỉnh- Dương, Long Hòa, Bình Thủy, Cần Thơ, 092081007131, 18/05/2041  '")
+            # print(response.text)
+            for box in bboxes:
+                cv2.polylines(image, [np.int32(box)], isClosed=True, color=(0, 255, 0), thickness=1)
+            plt.figure(figsize=(20, 20))
+            plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            plt.title('Detected Text Bounding Boxes')
+            plt.show()
+            print(f"đã load xong ảnh {k + 1}")
+        # # Hiển thị kết quả
+        st.subheader("Kết quả trích xuất:")
+        st.text_area("ALL TEXT",cv_img)
+        # st.text_area("Tên", response.text.get('name', ''))
+        # st.text_area("Ngày sinh", response.text.get('dob', ''))
+        # st.text_area("Nơi cư trú", response.text.get('address', ''))
+        # st.text_area("Số căn cước", response.text.get('id_number', ''))
+        # st.text_area("Hạn sử dụng", response.text.get('expiry', ''))

basenet/__init__.py ADDED Viewed

File without changes

basenet/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (175 Bytes). View file

basenet/__pycache__/vgg16_bn.cpython-310.pyc ADDED Viewed

Binary file (2.28 kB). View file

basenet/vgg16_bn.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from collections import namedtuple
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+from torchvision import models
+def init_weights(modules):
+    for m in modules:
+        if isinstance(m, nn.Conv2d):
+            init.xavier_uniform_(m.weight.data)
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1)
+            m.bias.data.zero_()
+        elif isinstance(m, nn.Linear):
+            m.weight.data.normal_(0, 0.01)
+            m.bias.data.zero_()
+class vgg16_bn(torch.nn.Module):
+    def __init__(self, pretrained=True, freeze=True):
+        super(vgg16_bn, self).__init__()
+        vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        for x in range(12):         # conv2_2
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(12, 19):         # conv3_3
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(19, 29):         # conv4_3
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(29, 39):         # conv5_3
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        # fc6, fc7 without atrous conv
+        self.slice5 = torch.nn.Sequential(
+                nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
+                nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
+                nn.Conv2d(1024, 1024, kernel_size=1)
+        )
+        if not pretrained:
+            init_weights(self.slice1.modules())
+            init_weights(self.slice2.modules())
+            init_weights(self.slice3.modules())
+            init_weights(self.slice4.modules())
+        init_weights(self.slice5.modules())        # no pretrained model for fc6 and fc7
+        if freeze:
+            for param in self.slice1.parameters():      # only first conv
+                param.requires_grad= False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu2_2 = h
+        h = self.slice2(h)
+        h_relu3_2 = h
+        h = self.slice3(h)
+        h_relu4_3 = h
+        h = self.slice4(h)
+        h_relu5_3 = h
+        h = self.slice5(h)
+        h_fc7 = h
+        vgg_outputs = namedtuple("VggOutputs", ['fc7', 'relu5_3', 'relu4_3', 'relu3_2', 'relu2_2'])
+        out = vgg_outputs(h_fc7, h_relu5_3, h_relu4_3, h_relu3_2, h_relu2_2)
+        return out

craft.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from basenet.vgg16_bn import vgg16_bn, init_weights
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(device)
+class double_conv(nn.Module):
+    def __init__(self, in_ch, mid_ch, out_ch):
+        super(double_conv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch + mid_ch, mid_ch, kernel_size=1),
+            nn.BatchNorm2d(mid_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class CRAFT(nn.Module):
+    def __init__(self, pretrained=False, freeze=False):
+        super(CRAFT, self).__init__()
+        """ Base network """
+        self.basenet = vgg16_bn(pretrained, freeze)
+        """ U network """
+        self.upconv1 = double_conv(1024, 512, 256)
+        self.upconv2 = double_conv(512, 256, 128)
+        self.upconv3 = double_conv(256, 128, 64)
+        self.upconv4 = double_conv(128, 64, 32)
+        num_class = 2
+        self.conv_cls = nn.Sequential(
+            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
+            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
+            nn.Conv2d(32, 16, kernel_size=3, padding=1), nn.ReLU(inplace=True),
+            nn.Conv2d(16, 16, kernel_size=1), nn.ReLU(inplace=True),
+            nn.Conv2d(16, num_class, kernel_size=1),
+        )
+        init_weights(self.upconv1.modules())
+        init_weights(self.upconv2.modules())
+        init_weights(self.upconv3.modules())
+        init_weights(self.upconv4.modules())
+        init_weights(self.conv_cls.modules())
+    def forward(self, x):
+        """ Base network """
+        sources = self.basenet(x)
+        """ U network """
+        y = torch.cat([sources[0], sources[1]], dim=1)
+        y = self.upconv1(y)
+        y = F.interpolate(y, size=sources[2].size()[2:], mode='bilinear', align_corners=False)
+        y = torch.cat([y, sources[2]], dim=1)
+        y = self.upconv2(y)
+        y = F.interpolate(y, size=sources[3].size()[2:], mode='bilinear', align_corners=False)
+        y = torch.cat([y, sources[3]], dim=1)
+        y = self.upconv3(y)
+        y = F.interpolate(y, size=sources[4].size()[2:], mode='bilinear', align_corners=False)
+        y = torch.cat([y, sources[4]], dim=1)
+        feature = self.upconv4(y)
+        y = self.conv_cls(feature)
+        return y.permute(0,2,3,1), feature
+if __name__ == '__main__':
+    model = CRAFT(pretrained=True).cuda()
+    output, _ = model(torch.randn(1, 3, 768, 768).cuda())
+    print(output.shape)

craft_utils.py ADDED Viewed

	@@ -0,0 +1,273 @@

+"""Modify to Return Scores of Detection Boxes"""
+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+# -*- coding: utf-8 -*-
+import numpy as np
+import cv2
+import math
+""" auxilary functions """
+# unwarp corodinates
+def warpCoord(Minv, pt):
+    out = np.matmul(Minv, (pt[0], pt[1], 1))
+    return np.array([out[0] / out[2], out[1] / out[2]])
+""" end of auxilary functions """
+def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
+    # prepare data
+    linkmap = linkmap.copy()
+    textmap = textmap.copy()
+    img_h, img_w = textmap.shape
+    # Helper function for generating random colors
+    def random_color():
+        return tuple(np.random.randint(0, 255, 3).tolist())
+    """ labeling method """
+    ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
+    ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
+    text_score_comb = np.clip(text_score + link_score, 0, 1)
+    nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8),
+                                                                         connectivity=4)
+    # Create a color version of linkmap for visualization
+    visualized_linkmap = cv2.cvtColor(linkmap, cv2.COLOR_GRAY2BGR)
+    det = []
+    det_scores = []
+    mapper = []
+    for k in range(1,nLabels):
+        # visualize stats on the original linkmap
+        x, y, w, h = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP], stats[k, cv2.CC_STAT_WIDTH], stats[
+            k, cv2.CC_STAT_HEIGHT]
+        cv2.rectangle(visualized_linkmap, (x, y), (x + w, y + h), random_color(), 2)
+        # size filtering
+        size = stats[k, cv2.CC_STAT_AREA]
+        if size < 10: continue
+        # thresholding
+        if np.max(textmap[labels == k]) < text_threshold: continue
+        # make segmentation map
+        segmap = np.zeros(textmap.shape, dtype=np.uint8)
+        segmap[labels==k] = 255
+        segmap[np.logical_and(link_score==1, text_score==0)] = 0   # remove link area
+        x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
+        w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
+        niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
+        sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
+        # boundary check
+        if sx < 0 : sx = 0
+        if sy < 0 : sy = 0
+        if ex >= img_w: ex = img_w
+        if ey >= img_h: ey = img_h
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
+        segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
+        # make box
+        np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
+        rectangle = cv2.minAreaRect(np_contours)
+        box = cv2.boxPoints(rectangle)
+        # align diamond-shape
+        w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
+        box_ratio = max(w, h) / (min(w, h) + 1e-5)
+        if abs(1 - box_ratio) <= 0.1:
+            l, r = min(np_contours[:,0]), max(np_contours[:,0])
+            t, b = min(np_contours[:,1]), max(np_contours[:,1])
+            box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
+        # make clock-wise order
+        startidx = box.sum(axis=1).argmin()
+        box = np.roll(box, 4-startidx, 0)
+        box = np.array(box)
+        det.append(box)
+        mapper.append(k)
+        det_scores.append(np.max(textmap[labels==k]))
+    # # Show the visualized linkmap with stats drawn
+    # cv2.imshow("Visualized Linkmap with Stats", visualized_linkmap)
+    # cv2.waitKey(0)
+    # cv2.destroyAllWindows()
+    return det, labels, mapper, det_scores
+def getPoly_core(boxes, labels, mapper, linkmap):
+    # configs
+    num_cp = 5
+    max_len_ratio = 0.7
+    expand_ratio = 1.45
+    max_r = 2.0
+    step_r = 0.2
+    polys = []
+    for k, box in enumerate(boxes):
+        # size filter for small instance
+        w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
+        if w < 10 or h < 10:
+            polys.append(None);
+            continue
+        # warp image
+        tar = np.float32([[0, 0], [w, 0], [w, h], [0, h]])
+        M = cv2.getPerspectiveTransform(box, tar)
+        word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
+        try:
+            Minv = np.linalg.inv(M)
+        except:
+            polys.append(None);
+            continue
+        # binarization for selected label
+        cur_label = mapper[k]
+        word_label[word_label != cur_label] = 0
+        word_label[word_label > 0] = 1
+        """ Polygon generation """
+        # find top/bottom contours
+        cp = []
+        max_len = -1
+        for i in range(w):
+            region = np.where(word_label[:, i] != 0)[0]
+            if len(region) < 2: continue
+            cp.append((i, region[0], region[-1]))
+            length = region[-1] - region[0] + 1
+            if length > max_len: max_len = length
+        # pass if max_len is similar to h
+        if h * max_len_ratio < max_len:
+            polys.append(None);
+            continue
+        # get pivot points with fixed length
+        tot_seg = num_cp * 2 + 1
+        seg_w = w / tot_seg  # segment width
+        pp = [None] * num_cp  # init pivot points
+        cp_section = [[0, 0]] * tot_seg
+        seg_height = [0] * num_cp
+        seg_num = 0
+        num_sec = 0
+        prev_h = -1
+        for i in range(0, len(cp)):
+            (x, sy, ey) = cp[i]
+            if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
+                # average previous segment
+                if num_sec == 0: break
+                cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
+                num_sec = 0
+                # reset variables
+                seg_num += 1
+                prev_h = -1
+            # accumulate center points
+            cy = (sy + ey) * 0.5
+            cur_h = ey - sy + 1
+            cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
+            num_sec += 1
+            if seg_num % 2 == 0: continue  # No polygon area
+            if prev_h < cur_h:
+                pp[int((seg_num - 1) / 2)] = (x, cy)
+                seg_height[int((seg_num - 1) / 2)] = cur_h
+                prev_h = cur_h
+        # processing last segment
+        if num_sec != 0:
+            cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
+        # pass if num of pivots is not sufficient or segment widh is smaller than character height
+        if None in pp or seg_w < np.max(seg_height) * 0.25:
+            polys.append(None);
+            continue
+        # calc median maximum of pivot points
+        half_char_h = np.median(seg_height) * expand_ratio / 2
+        # calc gradiant and apply to make horizontal pivots
+        new_pp = []
+        for i, (x, cy) in enumerate(pp):
+            dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
+            dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
+            if dx == 0:  # gradient if zero
+                new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
+                continue
+            rad = - math.atan2(dy, dx)
+            c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
+            new_pp.append([x - s, cy - c, x + s, cy + c])
+        # get edge points to cover character heatmaps
+        isSppFound, isEppFound = False, False
+        grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
+        grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
+        for r in np.arange(0.5, max_r, step_r):
+            dx = 2 * half_char_h * r
+            if not isSppFound:
+                line_img = np.zeros(word_label.shape, dtype=np.uint8)
+                dy = grad_s * dx
+                p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
+                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
+                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
+                    spp = p
+                    isSppFound = True
+            if not isEppFound:
+                line_img = np.zeros(word_label.shape, dtype=np.uint8)
+                dy = grad_e * dx
+                p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
+                cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
+                if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
+                    epp = p
+                    isEppFound = True
+            if isSppFound and isEppFound:
+                break
+        # pass if boundary of polygon is not found
+        if not (isSppFound and isEppFound):
+            polys.append(None);
+            continue
+        # make final polygon
+        poly = []
+        poly.append(warpCoord(Minv, (spp[0], spp[1])))
+        for p in new_pp:
+            poly.append(warpCoord(Minv, (p[0], p[1])))
+        poly.append(warpCoord(Minv, (epp[0], epp[1])))
+        poly.append(warpCoord(Minv, (epp[2], epp[3])))
+        for p in reversed(new_pp):
+            poly.append(warpCoord(Minv, (p[2], p[3])))
+        poly.append(warpCoord(Minv, (spp[2], spp[3])))
+        # add to final result
+        polys.append(np.array(poly))
+    return polys
+def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
+    boxes, labels, mapper, det_scores = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
+    if poly:
+        polys = getPoly_core(boxes, labels, mapper, linkmap)
+    else:
+        polys = [None] * len(boxes)
+    return boxes, polys, det_scores
+def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
+    for i in range(len(polys)):
+        if polys[i] is not None:
+            for j in range(len(polys[i])):
+                polys[i][j][0] *= ratio_w * ratio_net
+                polys[i][j][1] *= ratio_h * ratio_net
+    return polys

crop_Word/crop_1.jpg ADDED Viewed

crop_Word/crop_10.jpg ADDED Viewed

crop_Word/crop_11.jpg ADDED Viewed

crop_Word/crop_12.jpg ADDED Viewed

crop_Word/crop_13.jpg ADDED Viewed

crop_Word/crop_14.jpg ADDED Viewed

crop_Word/crop_15.jpg ADDED Viewed

crop_Word/crop_16.jpg ADDED Viewed

crop_Word/crop_17.jpg ADDED Viewed

crop_Word/crop_18.jpg ADDED Viewed

crop_Word/crop_19.jpg ADDED Viewed

crop_Word/crop_2.jpg ADDED Viewed

crop_Word/crop_20.jpg ADDED Viewed

crop_Word/crop_21.jpg ADDED Viewed

crop_Word/crop_22.jpg ADDED Viewed

crop_Word/crop_23.jpg ADDED Viewed

crop_Word/crop_24.jpg ADDED Viewed

crop_Word/crop_25.jpg ADDED Viewed

crop_Word/crop_26.jpg ADDED Viewed

crop_Word/crop_27.jpg ADDED Viewed

crop_Word/crop_28.jpg ADDED Viewed

crop_Word/crop_29.jpg ADDED Viewed

crop_Word/crop_3.jpg ADDED Viewed

crop_Word/crop_30.jpg ADDED Viewed

crop_Word/crop_31.jpg ADDED Viewed

crop_Word/crop_32.jpg ADDED Viewed

crop_Word/crop_33.jpg ADDED Viewed

crop_Word/crop_34.jpg ADDED Viewed

crop_Word/crop_4.jpg ADDED Viewed

crop_Word/crop_5.jpg ADDED Viewed

crop_Word/crop_6.jpg ADDED Viewed

crop_Word/crop_7.jpg ADDED Viewed

crop_Word/crop_8.jpg ADDED Viewed

crop_Word/crop_9.jpg ADDED Viewed

crop_images.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import numpy as np
+import cv2
+import pandas as pd
+def crop(pts, image):
+    """
+    Takes inputs as 8 points
+    and Returns cropped, masked image with a white background
+    """
+    # Giới hạn giá trị của pts
+    pts[:, 0] = np.clip(pts[:, 0], 0, image.shape[1] - 1)
+    pts[:, 1] = np.clip(pts[:, 1], 0, image.shape[0] - 1)
+    rect = cv2.boundingRect(pts)
+    x, y, w, h = rect
+    x = int(x)
+    y = int(y)
+    w = int(w)
+    if h == 0 or w == 0:
+        return np.ones((10, 10, 3),
+                       np.uint8) * 255  # Trả về một ảnh trắng 10x10, bạn có thể thay đổi kích thước này nếu muốn
+    cropped = image[y:y + h, x:x + w].copy()
+    pts = pts - pts.min(axis=0)
+    mask = np.zeros(cropped.shape[:2], np.uint8)
+    # print("Kích thước của mask:", mask.shape)
+    # print("Kích thước của cropped:", cropped.shape)
+    # print("Giá trị của pts:", pts)
+    cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
+    dst = cv2.bitwise_and(cropped, cropped, mask=mask)
+    bg = np.ones_like(cropped, np.uint8) * 255
+    cv2.bitwise_not(bg, bg, mask=mask)
+    dst2 = bg + dst
+    return dst2
+def generate_words(image_name, score_bbox, image):
+    num_bboxes = len(score_bbox)
+    for num in range(num_bboxes):
+        bbox_coords = score_bbox[num].split(':')[-1].split(',\n')
+        if bbox_coords != ['{}']:
+            l_t = float(bbox_coords[0].strip(' array([').strip(']').split(',')[0])
+            t_l = float(bbox_coords[0].strip(' array([').strip(']').split(',')[1])
+            r_t = float(bbox_coords[1].strip(' [').strip(']').split(',')[0])
+            t_r = float(bbox_coords[1].strip(' [').strip(']').split(',')[1])
+            r_b = float(bbox_coords[2].strip(' [').strip(']').split(',')[0])
+            b_r = float(bbox_coords[2].strip(' [').strip(']').split(',')[1])
+            l_b = float(bbox_coords[3].strip(' [').strip(']').split(',')[0])
+            b_l = float(bbox_coords[3].strip(' [').strip(']').split(',')[1].strip(']'))
+            pts = np.array([[int(l_t), int(t_l)], [int(r_t), int(t_r)], [int(r_b), int(b_r)], [int(l_b), int(b_l)]])
+            if np.all(pts) > 0:
+                word = crop(pts, image)
+                folder = '/'.join(image_name.split('/')[:-1])
+                # CHANGE DIR
+                dir = '/content/Pipeline/Crop Words/'
+                if os.path.isdir(os.path.join(dir + folder)) == False:
+                    os.makedirs(os.path.join(dir + folder))
+                try:
+                    file_name = os.path.join(dir + image_name)
+                    cv2.imwrite(
+                        file_name + '_{}_{}_{}_{}_{}_{}_{}_{}.jpg'.format(l_t, t_l, r_t, t_r, r_b, b_r, l_b, b_l), word)
+                    print('Image saved to ' + file_name + '_{}_{}_{}_{}_{}_{}_{}_{}.jpg'.format(l_t, t_l, r_t, t_r, r_b,
+                                                                                                b_r, l_b, b_l))
+                except:
+                    continue

file_utils.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# -*- coding: utf-8 -*-
+import os
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+import crop_images
+import imgproc
+import matplotlib.pyplot as plt
+# borrowed from https://github.com/lengstrom/fast-style-transfer/blob/master/src/utils.py
+def get_files(img_dir):
+    imgs, masks, xmls = list_files(img_dir)
+    return imgs, masks, xmls
+def list_files(in_path):
+    img_files = []
+    mask_files = []
+    gt_files = []
+    for (dirpath, dirnames, filenames) in os.walk(in_path):
+        for file in filenames:
+            filename, ext = os.path.splitext(file)
+            ext = str.lower(ext)
+            if ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.pgm':
+                img_files.append(os.path.join(dirpath, file))
+            elif ext == '.bmp':
+                mask_files.append(os.path.join(dirpath, file))
+            elif ext == '.xml' or ext == '.gt' or ext == '.txt':
+                gt_files.append(os.path.join(dirpath, file))
+            elif ext == '.zip':
+                continue
+    # img_files.sort()
+    # mask_files.sort()
+    # gt_files.sort()
+    return img_files, mask_files, gt_files
+def saveResult(img_file, img, boxes, dirname='Results', verticals=None, texts=None):
+    """ save text detection result one by one
+    Args:
+        img_file (str): image file name
+        img (array): raw image context
+        boxes (array): array of result file
+            Shape: [num_detections, 4] for BB output / [num_detections, 4] for QUAD output
+    Return:
+        None
+    """
+    img = np.array(img)
+    # make result file list: tên ảnh và đuôi ảnh
+    filename, file_ext = os.path.splitext(os.path.basename(img_file))
+    # result directory
+    res_file = dirname + "res_" + filename + '.txt'
+    res_img_file = dirname + "res_" + filename + '.jpg'
+    if not os.path.isdir(dirname):
+        os.mkdir(dirname)
+    with open(res_file, 'w') as f:
+        for i, box in enumerate(boxes):
+            poly = np.array(box).astype(np.int32).reshape((-1))
+            strResult = ','.join([str(p) for p in poly]) + '\r\n'
+            f.write(strResult)
+            # bỏ comment đoạn này để có thể bounding box lại b
+            poly = poly.reshape(-1, 2)
+            # cv2.polylines(img, [poly.reshape((-1, 1, 2))], True, color=(0, 0, 255), thickness=2)
+            # ptColor = (0, 255, 255)
+            xmin = min(poly[:, 0])
+            xmax = max(poly[:, 0])
+            ymin = min(poly[:, 1])
+            ymax = max(poly[:, 1])
+            width = xmax - xmin
+            height = ymax - ymin
+            # các điểm này từ file txt
+            pts = np.array([[xmin, ymax], [xmax, ymax], [xmax, ymin], [xmin, ymin]])
+            word = crop_images.crop(pts, img)
+            folder = '/'.join(filename.split('/')[:-1])
+            # đầu tiên đây là folder cropWord
+            dir = 'cropWord/'
+            if os.path.isdir(os.path.join(dir + folder)) == False:
+                os.makedirs(os.path.join(dir + folder))
+            try:
+                file_name = os.path.join(dir + filename)
+                cv2.imwrite(file_name + str(i) + '.jpg', word)
+            except:
+                continue
+            if verticals is not None:
+                if verticals[i]:
+                    ptColor = (255, 0, 0)
+            if texts is not None:
+                font = cv2.FONT_HERSHEY_SIMPLEX
+                font_scale = 0.5
+                cv2.putText(img, "{}".format(texts[i]), (poly[0][0] + 1, poly[0][1] + 1), font, font_scale, (0, 0, 0),
+                            thickness=1)
+                cv2.putText(img, "{:.2f}".format(texts[i]), tuple(poly[0]), font, font_scale, (0, 255, 255),
+                            thickness=1)
+    # Save result image
+    # cv2.imwrite(res_img_file, img)

git ADDED Viewed

File without changes

imgproc.py ADDED Viewed

	@@ -0,0 +1,74 @@

+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+# -*- coding: utf-8 -*-
+import numpy as np
+from skimage import io
+import cv2
+def loadImage(img_file):
+    img = io.imread(img_file)  # RGB order
+    if img.shape[0] == 2: img = img[0]
+    if len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    if img.shape[2] == 4:   img = img[:, :, :3]
+    img = np.array(img)
+    return img
+def normalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
+    # should be RGB order
+    img = in_img.copy().astype(np.float32)
+    img -= np.array([mean[0] * 255.0, mean[1] * 255.0, mean[2] * 255.0], dtype=np.float32)
+    img /= np.array([variance[0] * 255.0, variance[1] * 255.0, variance[2] * 255.0], dtype=np.float32)
+    return img
+def denormalizeMeanVariance(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
+    # should be RGB order
+    img = in_img.copy()
+    img *= variance
+    img += mean
+    img *= 255.0
+    img = np.clip(img, 0, 255).astype(np.uint8)
+    return img
+def resize_aspect_ratio(img, square_size, interpolation, mag_ratio=1):
+    height, width, channel = img.shape
+    # magnify image size
+    target_size = mag_ratio * max(height, width)
+    # set original image size
+    if target_size > square_size:
+        target_size = square_size
+    ratio = target_size / max(height, width)
+    target_h, target_w = int(height * ratio), int(width * ratio)
+    proc = cv2.resize(img, (target_w, target_h), interpolation=interpolation)
+    # make canvas and paste image
+    target_h32, target_w32 = target_h, target_w
+    if target_h % 32 != 0:
+        target_h32 = target_h + (32 - target_h % 32)
+    if target_w % 32 != 0:
+        target_w32 = target_w + (32 - target_w % 32)
+    resized = np.zeros((target_h32, target_w32, channel), dtype=np.float32)
+    resized[0:target_h, 0:target_w, :] = proc
+    target_h, target_w = target_h32, target_w32
+    size_heatmap = (int(target_w / 2), int(target_h / 2))
+    return resized, ratio, size_heatmap
+def cvt2HeatmapImg(img):
+    img = (np.clip(img, 0, 1) * 255).astype(np.uint8)
+    img = cv2.applyColorMap(img, cv2.COLORMAP_JET)
+    return img

refinenet.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+Copyright (c) 2019-present NAVER Corp.
+MIT License
+"""
+# -*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from basenet.vgg16_bn import init_weights
+class RefineNet(nn.Module):
+    def __init__(self):
+        super(RefineNet, self).__init__()
+        self.last_conv = nn.Sequential(
+            nn.Conv2d(34, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)
+        )
+        self.aspp1 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=6, padding=6), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        self.aspp2 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=12, padding=12), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        self.aspp3 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=18, padding=18), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        self.aspp4 = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, dilation=24, padding=24), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 128, kernel_size=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
+            nn.Conv2d(128, 1, kernel_size=1)
+        )
+        init_weights(self.last_conv.modules())
+        init_weights(self.aspp1.modules())
+        init_weights(self.aspp2.modules())
+        init_weights(self.aspp3.modules())
+        init_weights(self.aspp4.modules())
+    def forward(self, y, upconv4):
+        refine = torch.cat([y.permute(0,3,1,2), upconv4], dim=1)
+        refine = self.last_conv(refine)
+        aspp1 = self.aspp1(refine)
+        aspp2 = self.aspp2(refine)
+        aspp3 = self.aspp3(refine)
+        aspp4 = self.aspp4(refine)
+        #out = torch.add([aspp1, aspp2, aspp3, aspp4], dim=1)
+        out = aspp1 + aspp2 + aspp3 + aspp4
+        return out.permute(0, 2, 3, 1)  # , refine.permute(0,2,3,1)

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+numpy
+pandas
+matplotlib
+scikit-learn
+tensorflow
+seaborn
+beautifulsoup4
+requests
+plotly
+keras
+vietocr
+streamlit
+torch --index-url https://download.pytorch.org/whl/cu121
+torchvision --index-url https://download.pytorch.org/whl/cu121
+torchaudio --index-url https://download.pytorch.org/whl/cu121