import torch import cv2 import numpy as np import torch.nn.functional as F import time import argparse import os import os.path as osp from models.TextEnhancement import MARCONetPlus from utils.utils_image import get_image_paths, imread_uint, uint2tensor4, tensor2uint from networks.rrdbnet2_arch import RRDBNet as BSRGAN def inference(input_path=None, output_path=None, aligned=False, bg_sr=False, scale_factor=2, save_text=False, device=None): if device == None or device == 'gpu': use_cuda = torch.cuda.is_available() if device == 'cpu': use_cuda = False device = torch.device('cuda' if use_cuda else 'cpu') if input_path is None: exit('input image path is none. Please see our document') if output_path is None: TIMESTAMP = time.strftime("%m-%d_%H-%M", time.localtime()) if input_path[-1] == '/' or input_path[-1] == '\\': input_path = input_path[:-1] output_path = osp.join(input_path+'_'+TIMESTAMP+'_MARCONetPlus') os.makedirs(output_path, exist_ok=True) # use bsrgan to restore the background of the whole image if bg_sr: ##BG model BGModel = BSRGAN(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=2) # define network model_old = torch.load('./checkpoints/bsrgan_bg.pth') state_dict = BGModel.state_dict() for ((key, param),(key2, param2)) in zip(model_old.items(), state_dict.items()): state_dict[key2] = param BGModel.load_state_dict(state_dict, strict=True) BGModel.eval() for k, v in BGModel.named_parameters(): v.requires_grad = False BGModel = BGModel.to(device) torch.cuda.empty_cache() lq_paths = get_image_paths(input_path) if len(lq_paths) ==0: exit('No Image in the LR path.') WEncoderPath='./checkpoints/net_w_encoder_860000.pth' PriorModelPath='./checkpoints/net_prior_860000.pth' SRModelPath='./checkpoints/net_sr_860000.pth' YoloPath = './checkpoints/yolo11m_short_character.pt' TextModel = MARCONetPlus(WEncoderPath, PriorModelPath, SRModelPath, YoloPath, device=device) print('{:>25s} : {:s}'.format('Model Name', 'MARCONetPlusPlus')) if use_cuda: print('{:>25s} : {:25s} : {:s}'.format('GPU ID', 'No GPU is available. Use CPU instead.')) torch.cuda.empty_cache() L_path = input_path E_path = output_path # save path print('{:>25s} : {:s}'.format('Input Path', L_path)) print('{:>25s} : {:s}'.format('Output Path', E_path)) if aligned: print('{:>25s} : {:s}'.format('Image Details', 'Aligned Text Layout. No text detection is used.')) else: print('{:>25s} : {:s}'.format('Image Details', 'UnAligned Text Image. It will crop text region using CnSTD, restore, and paste results back.')) print('{:>25s} : {}'.format('Scale Facter', scale_factor)) print('{:>25s} : {:s}'.format('Save LR & SR text layout', 'True' if save_text else 'False')) idx = 0 for iix, img_path in enumerate(lq_paths): #################################### #####(1) Read Image #################################### idx += 1 img_name, ext = os.path.splitext(os.path.basename(img_path)) print('{:>20s} {:04d} --> {: