Spaces:

to-be
/

Scanned_document_denoise_reconstruct

Running

App Files Files Community

Scanned_document_denoise_reconstruct / app.py

to-be

Update app.py

9741eb4 about 1 year ago

raw

history blame

3.67 kB

	import torch
	import torch.nn.functional as F
	import os
	from skimage import img_as_ubyte
	import cv2
	import argparse
	import shutil
	import gradio as gr
	from PIL import Image
	from runpy import run_path
	import numpy as np


	examples = [['sample1.png'],
	['sample2.png'],
	['Sample3.png'],
	['Sample4.png'],
	['Sample5.png'],
	['Sample6.png']]


	title = "Restormer"
	description = """
	Gradio demo for reconstruction of noisy scanned, photocopied documents\n
	using <b>Restormer: Efficient Transformer for High-Resolution Image Restoration</b>, CVPR 2022--ORAL. <a href='https://arxiv.org/abs/2111.09881'>[Paper]</a><a href='https://github.com/swz30/Restormer'>[Github Code]</a>\n
	<a href='https://medium.com/towards-data-science/effective-data-augmentation-for-ocr-8013080aa9fa'>[See my article for more details]</a>\n
	<b> Note:</b> Since this demo uses CPU, by default it will run on the downsampled version of the input image (for speedup). But if you want to perform inference on the original input, then choose the option "Full Resolution Image" from the dropdown menu.
	"""

	article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.09881'>Restormer: Efficient Transformer for High-Resolution Image Restoration </a> \| <a href='https://github.com/swz30/Restormer'>Github Repo</a></p>"


	def inference(img):
	if not os.path.exists('temp'):
	os.system('mkdir temp')

	# 'Downsampled Image'
	#### Resize the longer edge of the input image
	max_res = 200
	width, height = img.size
	if max(width,height) > max_res:
	scale = max_res /max(width,height)
	width = int(scale*width)
	height = int(scale*height)
	img = img.resize((width,height))


	parameters = {'inp_channels':3, 'out_channels':3, 'dim':48, 'num_blocks':[4,6,6,8], 'num_refinement_blocks':4, 'heads':[1,2,4,8], 'ffn_expansion_factor':2.66, 'bias':False, 'LayerNorm_type':'WithBias', 'dual_pixel_task':False}
	load_arch = run_path('restormer_arch.py')
	model = load_arch['Restormer'](**parameters)

	checkpoint = torch.load('net_g_92000.pth')
	model.load_state_dict(checkpoint['params'])
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = model.to(device)
	model.eval()

	img_multiple_of = 8

	with torch.inference_mode():
	if torch.cuda.is_available():
	torch.cuda.ipc_collect()
	torch.cuda.empty_cache()

	open_cv_image = np.array(img)
	img = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB)

	input_ = torch.from_numpy(img).float().div(255.).permute(2,0,1).unsqueeze(0).to(device)

	# Pad the input if not_multiple_of 8
	h,w = input_.shape[2], input_.shape[3]
	H,W = ((h+img_multiple_of)//img_multiple_of)img_multiple_of, ((w+img_multiple_of)//img_multiple_of)img_multiple_of
	padh = H-h if h%img_multiple_of!=0 else 0
	padw = W-w if w%img_multiple_of!=0 else 0
	input_ = F.pad(input_, (0,padw,0,padh), 'reflect')

	restored = torch.clamp(model(input_),0,1)

	# Unpad the output
	restored = img_as_ubyte(restored[:,:,:h,:w].permute(0, 2, 3, 1).cpu().detach().numpy()[0])
	#convert to pil when returning

	return Image.fromarray(cv2.cvtColor(restored, cv2.COLOR_RGB2BGR))

	gr.Interface(
	inference,
	[
	gr.Image(type="pil", label="Input"),
	],
	gr.Image(type="pil", label="cleaned and restored"),
	title=title,
	description=description,
	article=article,
	examples=examples,
	).launch(debug=False,enable_queue=True)