Spaces:

HaoFeng2019
/

DocTr

Running

App Files Files Community

HaoFeng2019 commited on Apr 12, 2023

Commit

532251c

•

1 Parent(s): 059ec7f

Upload 8 files

Browse files

Files changed (6) hide show

GeoTr.py +2 -2
app.py +15 -15
demo.py +12 -12
position_encoding.py +1 -1
requirements.txt +1 -1
seg.py +3 -2

GeoTr.py CHANGED Viewed

@@ -107,7 +107,7 @@ class TransDecoder(nn.Module):
         self.position_embedding = build_position_encoding(hidden_dim)
     def forward(self, imgf, query_embed):
-        pos = self.position_embedding(torch.ones(imgf.shape[0], imgf.shape[2], imgf.shape[3]).bool().cuda())  # torch.Size([1, 128, 36, 36])
         bs, c, h, w = imgf.shape
         imgf = imgf.flatten(2).permute(2, 0, 1)
@@ -129,7 +129,7 @@ class TransEncoder(nn.Module):
         self.position_embedding = build_position_encoding(hidden_dim)
     def forward(self, imgf):
-        pos = self.position_embedding(torch.ones(imgf.shape[0], imgf.shape[2], imgf.shape[3]).bool().cuda())  # torch.Size([1, 128, 36, 36])
         bs, c, h, w = imgf.shape
         imgf = imgf.flatten(2).permute(2, 0, 1)
         pos = pos.flatten(2).permute(2, 0, 1)

         self.position_embedding = build_position_encoding(hidden_dim)
     def forward(self, imgf, query_embed):
+        pos = self.position_embedding(torch.ones(imgf.shape[0], imgf.shape[2], imgf.shape[3]).bool())   #.cuda())  # torch.Size([1, 128, 36, 36])
         bs, c, h, w = imgf.shape
         imgf = imgf.flatten(2).permute(2, 0, 1)
         self.position_embedding = build_position_encoding(hidden_dim)
     def forward(self, imgf):
+        pos = self.position_embedding(torch.ones(imgf.shape[0], imgf.shape[2], imgf.shape[3]).bool())   #.cuda())  # torch.Size([1, 128, 36, 36])
         bs, c, h, w = imgf.shape
         imgf = imgf.flatten(2).permute(2, 0, 1)
         pos = pos.flatten(2).permute(2, 0, 1)

app.py CHANGED Viewed

@@ -11,10 +11,10 @@ import torch.nn.functional as F
 import skimage.io as io
 import numpy as np
 import cv2
-#import glob
 import os
 from PIL import Image
-#import argparse
 import warnings
 warnings.filterwarnings('ignore')
@@ -47,10 +47,10 @@ def reload_model(model, path=""):
         return model
     else:
         model_dict = model.state_dict()
-        pretrained_dict = torch.load(path, map_location='cuda:0')
-        print(len(pretrained_dict.keys()))
         pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict}
-        print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
@@ -62,10 +62,10 @@ def reload_segmodel(model, path=""):
         return model
     else:
         model_dict = model.state_dict()
-        pretrained_dict = torch.load(path, map_location='cuda:0')
-        print(len(pretrained_dict.keys()))
         pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if k[6:] in model_dict}
-        print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
@@ -81,13 +81,13 @@ def rec(opt):
     if not os.path.exists(opt.isave_path):  # create save path
         os.mkdir(opt.isave_path)
-    GeoTr_Seg_model = GeoTr_Seg().cuda()
     # reload segmentation model
     reload_segmodel(GeoTr_Seg_model.msk, opt.Seg_path)
     # reload geometric unwarping model
     reload_model(GeoTr_Seg_model.GeoTr, opt.GeoTr_path)
-    IllTr_model = IllTr().cuda()
     # reload illumination rectification model
     reload_model(IllTr_model, opt.IllTr_path)
@@ -107,7 +107,7 @@ def rec(opt):
         with torch.no_grad():
             # geometric unwarping
-            bm = GeoTr_Seg_model(im.cuda())
             bm = bm.cpu()
             bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))  # x flow
             bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))  # y flow
@@ -132,11 +132,11 @@ def rec(opt):
 def process_image(input_image):
-    GeoTr_Seg_model = GeoTr_Seg().cuda()
     reload_segmodel(GeoTr_Seg_model.msk, './model_pretrained/seg.pth')
     reload_model(GeoTr_Seg_model.GeoTr, './model_pretrained/geotr.pth')
-    IllTr_model = IllTr().cuda()
     reload_model(IllTr_model, './model_pretrained/illtr.pth')
     GeoTr_Seg_model.eval()
@@ -149,7 +149,7 @@ def process_image(input_image):
     im = torch.from_numpy(im).float().unsqueeze(0)
     with torch.no_grad():
-        bm = GeoTr_Seg_model(im.cuda())
         bm = bm.cpu()
         bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))
         bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))
@@ -173,6 +173,6 @@ input_image = gr.inputs.Image()
 output_image = gr.outputs.Image(type='pil')
-iface = gr.Interface(fn=process_image, inputs=input_image, outputs=output_image, title="Image Correction")
 iface.launch()

 import skimage.io as io
 import numpy as np
 import cv2
+import glob
 import os
 from PIL import Image
+import argparse
 import warnings
 warnings.filterwarnings('ignore')
         return model
     else:
         model_dict = model.state_dict()
+        pretrained_dict = torch.load(path, map_location='cpu')
+        #print(len(pretrained_dict.keys()))
         pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict}
+        #print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
         return model
     else:
         model_dict = model.state_dict()
+        pretrained_dict = torch.load(path, map_location='cpu')
+        #print(len(pretrained_dict.keys()))
         pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if k[6:] in model_dict}
+        #print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
     if not os.path.exists(opt.isave_path):  # create save path
         os.mkdir(opt.isave_path)
+    GeoTr_Seg_model = GeoTr_Seg()#.cuda()
     # reload segmentation model
     reload_segmodel(GeoTr_Seg_model.msk, opt.Seg_path)
     # reload geometric unwarping model
     reload_model(GeoTr_Seg_model.GeoTr, opt.GeoTr_path)
+    IllTr_model = IllTr()#.cuda()
     # reload illumination rectification model
     reload_model(IllTr_model, opt.IllTr_path)
         with torch.no_grad():
             # geometric unwarping
+            bm = GeoTr_Seg_model(im)
             bm = bm.cpu()
             bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))  # x flow
             bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))  # y flow
 def process_image(input_image):
+    GeoTr_Seg_model = GeoTr_Seg()#.cuda()
     reload_segmodel(GeoTr_Seg_model.msk, './model_pretrained/seg.pth')
     reload_model(GeoTr_Seg_model.GeoTr, './model_pretrained/geotr.pth')
+    IllTr_model = IllTr()#.cuda()
     reload_model(IllTr_model, './model_pretrained/illtr.pth')
     GeoTr_Seg_model.eval()
     im = torch.from_numpy(im).float().unsqueeze(0)
     with torch.no_grad():
+        bm = GeoTr_Seg_model(im)
         bm = bm.cpu()
         bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))
         bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))
 output_image = gr.outputs.Image(type='pil')
+iface = gr.Interface(fn=process_image, inputs=input_image, outputs=output_image, title="DocTr")
 iface.launch()

demo.py CHANGED Viewed

@@ -47,10 +47,10 @@ def reload_model(model, path=""):
         return model
     else:
         model_dict = model.state_dict()
-        pretrained_dict = torch.load(path, map_location='cuda:0')
-        print(len(pretrained_dict.keys()))
         pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict}
-        print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
@@ -62,10 +62,10 @@ def reload_segmodel(model, path=""):
         return model
     else:
         model_dict = model.state_dict()
-        pretrained_dict = torch.load(path, map_location='cuda:0')
-        print(len(pretrained_dict.keys()))
         pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if k[6:] in model_dict}
-        print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
@@ -81,13 +81,13 @@ def rec(opt):
     if not os.path.exists(opt.isave_path):  # create save path
         os.mkdir(opt.isave_path)
-    GeoTr_Seg_model = GeoTr_Seg().cuda()
     # reload segmentation model
     reload_segmodel(GeoTr_Seg_model.msk, opt.Seg_path)
     # reload geometric unwarping model
     reload_model(GeoTr_Seg_model.GeoTr, opt.GeoTr_path)
-    IllTr_model = IllTr().cuda()
     # reload illumination rectification model
     reload_model(IllTr_model, opt.IllTr_path)
@@ -107,7 +107,7 @@ def rec(opt):
         with torch.no_grad():
             # geometric unwarping
-            bm = GeoTr_Seg_model(im.cuda())
             bm = bm.cpu()
             bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))  # x flow
             bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))  # y flow
@@ -132,11 +132,11 @@ def rec(opt):
 def process_image(input_image):
-    GeoTr_Seg_model = GeoTr_Seg().cuda()
     reload_segmodel(GeoTr_Seg_model.msk, './model_pretrained/seg.pth')
     reload_model(GeoTr_Seg_model.GeoTr, './model_pretrained/geotr.pth')
-    IllTr_model = IllTr().cuda()
     reload_model(IllTr_model, './model_pretrained/illtr.pth')
     GeoTr_Seg_model.eval()
@@ -149,7 +149,7 @@ def process_image(input_image):
     im = torch.from_numpy(im).float().unsqueeze(0)
     with torch.no_grad():
-        bm = GeoTr_Seg_model(im.cuda())
         bm = bm.cpu()
         bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))
         bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))

         return model
     else:
         model_dict = model.state_dict()
+        pretrained_dict = torch.load(path, map_location='cpu')
+        #print(len(pretrained_dict.keys()))
         pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if k[7:] in model_dict}
+        #print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
         return model
     else:
         model_dict = model.state_dict()
+        pretrained_dict = torch.load(path, map_location='cpu')
+        #print(len(pretrained_dict.keys()))
         pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if k[6:] in model_dict}
+        #print(len(pretrained_dict.keys()))
         model_dict.update(pretrained_dict)
         model.load_state_dict(model_dict)
     if not os.path.exists(opt.isave_path):  # create save path
         os.mkdir(opt.isave_path)
+    GeoTr_Seg_model = GeoTr_Seg()#.cuda()
     # reload segmentation model
     reload_segmodel(GeoTr_Seg_model.msk, opt.Seg_path)
     # reload geometric unwarping model
     reload_model(GeoTr_Seg_model.GeoTr, opt.GeoTr_path)
+    IllTr_model = IllTr()#.cuda()
     # reload illumination rectification model
     reload_model(IllTr_model, opt.IllTr_path)
         with torch.no_grad():
             # geometric unwarping
+            bm = GeoTr_Seg_model(im)
             bm = bm.cpu()
             bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))  # x flow
             bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))  # y flow
 def process_image(input_image):
+    GeoTr_Seg_model = GeoTr_Seg()#.cuda()
     reload_segmodel(GeoTr_Seg_model.msk, './model_pretrained/seg.pth')
     reload_model(GeoTr_Seg_model.GeoTr, './model_pretrained/geotr.pth')
+    IllTr_model = IllTr()#.cuda()
     reload_model(IllTr_model, './model_pretrained/illtr.pth')
     GeoTr_Seg_model.eval()
     im = torch.from_numpy(im).float().unsqueeze(0)
     with torch.no_grad():
+        bm = GeoTr_Seg_model(im)
         bm = bm.cpu()
         bm0 = cv2.resize(bm[0, 0].numpy(), (w, h))
         bm1 = cv2.resize(bm[0, 1].numpy(), (w, h))

position_encoding.py CHANGED Viewed

@@ -58,7 +58,7 @@ class PositionEmbeddingSine(nn.Module):
             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
-        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32).cuda()
         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
         pos_x = x_embed[:, :, :, None] / dim_t

             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32)#.cuda()
         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
         pos_x = x_embed[:, :, :, None] / dim_t

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ gradio
 numpy
 opencv_python
 Pillow
-skimage
 timm
 torch
 torchvision

 numpy
 opencv_python
 Pillow
+scikit_image
 timm
 torch
 torchvision

seg.py CHANGED Viewed

@@ -40,6 +40,7 @@ class REBNCONV(nn.Module):
         self.relu_s1 = nn.ReLU(inplace=True)
     def forward(self, x):
         hx = x
         xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
@@ -559,9 +560,9 @@ def get_parameter_number(net):
 if __name__ == '__main__':
-    net = U2NET(4, 1).cuda()
     print(get_parameter_number(net))  # 69090500 加attention后69442032
     with torch.no_grad():
-        inputs = torch.zeros(1, 3, 256, 256).cuda()
         outs = net(inputs)
         print(outs[0].shape)  # torch.Size([2, 3, 256, 256]) torch.Size([2, 2, 256, 256])

         self.relu_s1 = nn.ReLU(inplace=True)
     def forward(self, x):
+        #print(x.device)
         hx = x
         xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
 if __name__ == '__main__':
+    net = U2NET(4, 1)#.cuda()
     print(get_parameter_number(net))  # 69090500 加attention后69442032
     with torch.no_grad():
+        inputs = torch.zeros(1, 3, 256, 256)#.cuda()
         outs = net(inputs)
         print(outs[0].shape)  # torch.Size([2, 3, 256, 256]) torch.Size([2, 2, 256, 256])