LandShapes-Antarctica / TkTorchWindow.py
Bill Ton Hoang Nguyen
PLS
69b57b2
# Copyright 2020 Erik Härkönen. All rights reserved.
# This file is licensed to you under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. You may obtain a copy
# of the License at http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
# OF ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.
import tkinter as tk
import numpy as np
import time
from contextlib import contextmanager
import pycuda.driver
from pycuda.gl import graphics_map_flags
from glumpy import gloo, gl
from pyopengltk import OpenGLFrame
import torch
from torch.autograd import Variable
# TkInter widget that can draw torch tensors directly from GPU memory
@contextmanager
def cuda_activate(img):
"""Context manager simplifying use of pycuda.gl.RegisteredImage"""
mapping = img.map()
yield mapping.array(0,0)
mapping.unmap()
def create_shared_texture(w, h, c=4,
map_flags=graphics_map_flags.WRITE_DISCARD,
dtype=np.uint8):
"""Create and return a Texture2D with gloo and pycuda views."""
tex = np.zeros((h,w,c), dtype).view(gloo.Texture2D)
tex.activate() # force gloo to create on GPU
tex.deactivate()
cuda_buffer = pycuda.gl.RegisteredImage(
int(tex.handle), tex.target, map_flags)
return tex, cuda_buffer
# Shape batch as square if possible
def get_grid_dims(B):
S = int(B**0.5 + 0.5)
while B % S != 0:
S -= 1
return (B // S, S)
def create_gl_texture(tensor_shape):
if len(tensor_shape) != 4:
raise RuntimeError('Please provide a tensor of shape NCHW')
N, C, H, W = tensor_shape
cols, rows = get_grid_dims(N)
tex, cuda_buffer = create_shared_texture(W*cols, H*rows, 4)
return tex, cuda_buffer
# Create window with OpenGL context
class TorchImageView(OpenGLFrame):
def __init__(self, root = None, show_fps=True, **kwargs):
self.root = root or tk.Tk()
self.width = kwargs.get('width', 512)
self.height = kwargs.get('height', 512)
self.show_fps = show_fps
self.pycuda_initialized = False
self.animate = 0 # disable internal main loop
OpenGLFrame.__init__(self, root, **kwargs)
# Called by pyopengltk.BaseOpenGLFrame
# when the frame goes onto the screen
def initgl(self):
if not self.pycuda_initialized:
self.setup_gl(self.width, self.height)
self.pycuda_initialized = True
"""Initalize gl states when the frame is created"""
gl.glViewport(0, 0, self.width, self.height)
gl.glClearColor(0.0, 0.0, 0.0, 0.0)
self.dt_history = [1000/60]
self.t0 = time.time()
self.t_last = self.t0
self.nframes = 0
def setup_gl(self, width, height):
# setup pycuda and torch
import pycuda.gl.autoinit
import pycuda.gl
assert torch.cuda.is_available(), "PyTorch: CUDA is not available"
print('Using GPU {}'.format(torch.cuda.current_device()))
# Create tensor to be shared between GL and CUDA
# Always overwritten so no sharing is necessary
dummy = torch.cuda.FloatTensor((1))
dummy.uniform_()
dummy = Variable(dummy)
# Create a buffer with pycuda and gloo views, using tensor created above
self.tex, self.cuda_buffer = create_gl_texture((1, 3, width, height))
# create a shader to program to draw to the screen
vertex = """
uniform float scale;
attribute vec2 position;
attribute vec2 texcoord;
varying vec2 v_texcoord;
void main()
{
v_texcoord = texcoord;
gl_Position = vec4(scale*position, 0.0, 1.0);
} """
fragment = """
uniform sampler2D tex;
varying vec2 v_texcoord;
void main()
{
gl_FragColor = texture2D(tex, v_texcoord);
} """
# Build the program and corresponding buffers (with 4 vertices)
self.screen = gloo.Program(vertex, fragment, count=4)
# NDC coordinates: Texcoords: Vertex order,
# (-1, +1) (+1, +1) (0,0) (1,0) triangle strip:
# +-------+ +----+ 1----3
# | NDC | | | | / |
# | SPACE | | | | / |
# +-------+ +----+ 2----4
# (-1, -1) (+1, -1) (0,1) (1,1)
# Upload data to GPU
self.screen['position'] = [(-1,+1), (-1,-1), (+1,+1), (+1,-1)]
self.screen['texcoord'] = [(0,0), (0,1), (1,0), (1,1)]
self.screen['scale'] = 1.0
self.screen['tex'] = self.tex
# Don't call directly, use update() instead
def redraw(self):
t_now = time.time()
dt = t_now - self.t_last
self.t_last = t_now
self.dt_history = ([dt] + self.dt_history)[:50]
dt_mean = sum(self.dt_history) / len(self.dt_history)
if self.show_fps and self.nframes % 60 == 0:
self.master.title('FPS: {:.0f}'.format(1 / dt_mean))
def draw(self, img):
assert len(img.shape) == 4, "Please provide an NCHW image tensor"
assert img.device.type == "cuda", "Please provide a CUDA tensor"
if img.dtype.is_floating_point:
img = (255*img).byte()
# Tile images
N, C, H, W = img.shape
if N > 1:
cols, rows = get_grid_dims(N)
img = img.reshape(cols, rows, C, H, W)
img = img.permute(2, 1, 3, 0, 4) # [C, rows, H, cols, W]
img = img.reshape(1, C, rows*H, cols*W)
tensor = img.squeeze().permute(1, 2, 0).data # CHW => HWC
if C == 3:
tensor = torch.cat((tensor, tensor[:,:,0:1]),2) # add the alpha channel
tensor[:,:,3] = 1 # set alpha
tensor = tensor.contiguous()
tex_h, tex_w, _ = self.tex.shape
tensor_h, tensor_w, _ = tensor.shape
if (tex_h, tex_w) != (tensor_h, tensor_w):
print(f'Resizing texture to {tensor_w}*{tensor_h}')
self.tex, self.cuda_buffer = create_gl_texture((N, C, H, W)) # original shape
self.screen['tex'] = self.tex
# copy from torch into buffer
assert self.tex.nbytes == tensor.numel()*tensor.element_size(), "Tensor and texture shape mismatch!"
with cuda_activate(self.cuda_buffer) as ary:
cpy = pycuda.driver.Memcpy2D()
cpy.set_src_device(tensor.data_ptr())
cpy.set_dst_array(ary)
cpy.width_in_bytes = cpy.src_pitch = cpy.dst_pitch = self.tex.nbytes//tensor_h
cpy.height = tensor_h
cpy(aligned=False)
torch.cuda.synchronize()
# draw to screen
self.screen.draw(gl.GL_TRIANGLE_STRIP)
def update(self):
self.update_idletasks()
self.tkMakeCurrent()
self.redraw()
self.tkSwapBuffers()
# USAGE:
# root = tk.Tk()
# iv = TorchImageView(root, width=512, height=512)
# iv.pack(fill='both', expand=True)
# while True:
# iv.draw(nchw_tensor)
# root.update()
# iv.update()