# Copyright 2020 Erik Härkönen. All rights reserved. # This file is licensed to you under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. You may obtain a copy # of the License at http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software distributed under # the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS # OF ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. import tkinter as tk import numpy as np import time from contextlib import contextmanager import pycuda.driver from pycuda.gl import graphics_map_flags from glumpy import gloo, gl from pyopengltk import OpenGLFrame import torch from torch.autograd import Variable # TkInter widget that can draw torch tensors directly from GPU memory @contextmanager def cuda_activate(img): """Context manager simplifying use of pycuda.gl.RegisteredImage""" mapping = img.map() yield mapping.array(0,0) mapping.unmap() def create_shared_texture(w, h, c=4, map_flags=graphics_map_flags.WRITE_DISCARD, dtype=np.uint8): """Create and return a Texture2D with gloo and pycuda views.""" tex = np.zeros((h,w,c), dtype).view(gloo.Texture2D) tex.activate() # force gloo to create on GPU tex.deactivate() cuda_buffer = pycuda.gl.RegisteredImage( int(tex.handle), tex.target, map_flags) return tex, cuda_buffer # Shape batch as square if possible def get_grid_dims(B): S = int(B**0.5 + 0.5) while B % S != 0: S -= 1 return (B // S, S) def create_gl_texture(tensor_shape): if len(tensor_shape) != 4: raise RuntimeError('Please provide a tensor of shape NCHW') N, C, H, W = tensor_shape cols, rows = get_grid_dims(N) tex, cuda_buffer = create_shared_texture(W*cols, H*rows, 4) return tex, cuda_buffer # Create window with OpenGL context class TorchImageView(OpenGLFrame): def __init__(self, root = None, show_fps=True, **kwargs): self.root = root or tk.Tk() self.width = kwargs.get('width', 512) self.height = kwargs.get('height', 512) self.show_fps = show_fps self.pycuda_initialized = False self.animate = 0 # disable internal main loop OpenGLFrame.__init__(self, root, **kwargs) # Called by pyopengltk.BaseOpenGLFrame # when the frame goes onto the screen def initgl(self): if not self.pycuda_initialized: self.setup_gl(self.width, self.height) self.pycuda_initialized = True """Initalize gl states when the frame is created""" gl.glViewport(0, 0, self.width, self.height) gl.glClearColor(0.0, 0.0, 0.0, 0.0) self.dt_history = [1000/60] self.t0 = time.time() self.t_last = self.t0 self.nframes = 0 def setup_gl(self, width, height): # setup pycuda and torch import pycuda.gl.autoinit import pycuda.gl assert torch.cuda.is_available(), "PyTorch: CUDA is not available" print('Using GPU {}'.format(torch.cuda.current_device())) # Create tensor to be shared between GL and CUDA # Always overwritten so no sharing is necessary dummy = torch.cuda.FloatTensor((1)) dummy.uniform_() dummy = Variable(dummy) # Create a buffer with pycuda and gloo views, using tensor created above self.tex, self.cuda_buffer = create_gl_texture((1, 3, width, height)) # create a shader to program to draw to the screen vertex = """ uniform float scale; attribute vec2 position; attribute vec2 texcoord; varying vec2 v_texcoord; void main() { v_texcoord = texcoord; gl_Position = vec4(scale*position, 0.0, 1.0); } """ fragment = """ uniform sampler2D tex; varying vec2 v_texcoord; void main() { gl_FragColor = texture2D(tex, v_texcoord); } """ # Build the program and corresponding buffers (with 4 vertices) self.screen = gloo.Program(vertex, fragment, count=4) # NDC coordinates: Texcoords: Vertex order, # (-1, +1) (+1, +1) (0,0) (1,0) triangle strip: # +-------+ +----+ 1----3 # | NDC | | | | / | # | SPACE | | | | / | # +-------+ +----+ 2----4 # (-1, -1) (+1, -1) (0,1) (1,1) # Upload data to GPU self.screen['position'] = [(-1,+1), (-1,-1), (+1,+1), (+1,-1)] self.screen['texcoord'] = [(0,0), (0,1), (1,0), (1,1)] self.screen['scale'] = 1.0 self.screen['tex'] = self.tex # Don't call directly, use update() instead def redraw(self): t_now = time.time() dt = t_now - self.t_last self.t_last = t_now self.dt_history = ([dt] + self.dt_history)[:50] dt_mean = sum(self.dt_history) / len(self.dt_history) if self.show_fps and self.nframes % 60 == 0: self.master.title('FPS: {:.0f}'.format(1 / dt_mean)) def draw(self, img): assert len(img.shape) == 4, "Please provide an NCHW image tensor" assert img.device.type == "cuda", "Please provide a CUDA tensor" if img.dtype.is_floating_point: img = (255*img).byte() # Tile images N, C, H, W = img.shape if N > 1: cols, rows = get_grid_dims(N) img = img.reshape(cols, rows, C, H, W) img = img.permute(2, 1, 3, 0, 4) # [C, rows, H, cols, W] img = img.reshape(1, C, rows*H, cols*W) tensor = img.squeeze().permute(1, 2, 0).data # CHW => HWC if C == 3: tensor = torch.cat((tensor, tensor[:,:,0:1]),2) # add the alpha channel tensor[:,:,3] = 1 # set alpha tensor = tensor.contiguous() tex_h, tex_w, _ = self.tex.shape tensor_h, tensor_w, _ = tensor.shape if (tex_h, tex_w) != (tensor_h, tensor_w): print(f'Resizing texture to {tensor_w}*{tensor_h}') self.tex, self.cuda_buffer = create_gl_texture((N, C, H, W)) # original shape self.screen['tex'] = self.tex # copy from torch into buffer assert self.tex.nbytes == tensor.numel()*tensor.element_size(), "Tensor and texture shape mismatch!" with cuda_activate(self.cuda_buffer) as ary: cpy = pycuda.driver.Memcpy2D() cpy.set_src_device(tensor.data_ptr()) cpy.set_dst_array(ary) cpy.width_in_bytes = cpy.src_pitch = cpy.dst_pitch = self.tex.nbytes//tensor_h cpy.height = tensor_h cpy(aligned=False) torch.cuda.synchronize() # draw to screen self.screen.draw(gl.GL_TRIANGLE_STRIP) def update(self): self.update_idletasks() self.tkMakeCurrent() self.redraw() self.tkSwapBuffers() # USAGE: # root = tk.Tk() # iv = TorchImageView(root, width=512, height=512) # iv.pack(fill='both', expand=True) # while True: # iv.draw(nchw_tensor) # root.update() # iv.update()