Possible bugs with device signature and stream usage

0 views
Skip to first unread message

Chris

unread,
Apr 13, 2018, 6:22:23 PM4/13/18
to Numba Public Discussion - Public
I am encountering 2 things that I believe are bugs in regards to using streams and using device signatures (the are unrelated to one another). I also have accompanying code that shows what is going wrong (it requires Pyside2). Most of the code is just for initialization, the issues I am encountering occur in the render kernel.

The first issue has to do with stream usage. If I initialize a stream in the __init__ function of the OpenGL window and try to use it within the render kernel (as a dedicated stream for rendering) I am met with an error message. If, instead, I don't use a stream or use a stream I create for the single call to the render function everything works correct. To add to this I can use the initialized stream on to_device calls. It only causes issues when using it for a kernel call.

The second issue has to do with the kernel signature used in the rendering. If I add the correct signature to the kernel and call it with the render function I also get an error. If I use inspect_types() on the compiled kernel function the input objects seem to match the signature exactly but when calling the kernel it causes an error.

The error messages I got are posted below the code.

CODE:

from OpenGL.GL import *
from OpenGL.GLUT import *
from OpenGL.GLU import *
from OpenGL.GL.ARB.vertex_buffer_object import *
from OpenGL.GL.ARB.pixel_buffer_object import *

from PySide2 import QtCore, QtGui, QtWidgets, QtOpenGL
from numba import cuda
import numpy as np

import ctypes
class ExternalMemory(object):
"""
Provide an externally managed memory.
Allows Numba to access objects created in PyCUDA.
Interface requirement: __cuda_memory__, device_ctypes_pointer, _cuda_memize_
"""
__cuda_memory__ = True

def __init__(self, ptr, size):
self.device_ctypes_pointer = ctypes.c_void_p(ptr)
self._cuda_memsize_ = size

class RenderGUI(QtWidgets.QMainWindow):
def __init__(self, parent = None):
super().__init__(parent)

self.mainWidget = QtWidgets.QWidget()
self.setCentralWidget(self.mainWidget)
self.mainLayout = QtWidgets.QGridLayout()

self.openGLWindow()

self.mainWidget.setLayout(self.mainLayout)

def openGLWindow(self):
self.GLWindowSize = np.array([750,750], dtype = np.uint32)

self.glWindow = OpenGLWidget(parent = self)
self.glWindow.setFixedSize(self.GLWindowSize[0], self.GLWindowSize[1])
self.mainLayout.addWidget(self.glWindow, 0, 0)

def keyPressEvent(self, event):
self.glWindow.updateGL()

class OpenGLWidget(QtOpenGL.QGLWidget):
def __init__(self, parent):
super().__init__(parent)

#######################DEFINE RENDER STREAM#######################
self.renderstream    = cuda.stream()
##################################################################

self.pbo             = None
self.tex             = None
self.cudaPBOResource = None
self.GLWindowSize    = parent.GLWindowSize

def initializeGL(self):
gluOrtho2D(0, self.GLWindowSize[0], self.GLWindowSize[1], 0)

import pycuda.gl.autoinit
import pycuda.gl
import pycuda.driver

self.cuda_gl = pycuda.gl
self.cuda_driver = pycuda.driver

self.backgroundRender = backgroundRender


self.initPixelBuffer()

def paintGL(self):
self.render()
self.drawTexture()

def render(self):
d_out = self.cudaPBOResource.map()

gridDim  = (divUp(self.GLWindowSize[0], 16), divUp(self.GLWindowSize[1], 16))
blockDim = (16, 16)

d_out_pycuda_to_numba_ptr = ExternalMemory(d_out.device_ptr(), self.GLWindowSize[0]*self.GLWindowSize[1]*4)
d_out_numba = cuda.devicearray.DeviceNDArray(shape = (self.GLWindowSize[0]*self.GLWindowSize[1], 4),
strides = (4,1),
dtype = np.dtype('uint8'),
gpu_data = d_out_pycuda_to_numba_ptr)

#Stream works for data transfer but not for kernel
test = cuda.to_device(np.zeros((100,100)), stream = self.renderstream)

#even changing d_out_numba to basic device array causes an error
# d_out_numba = cuda.to_device(np.zeros((750*750, 4), dtype = np.uint8))

# #DOESN'T WORK
# backgroundRender[gridDim, blockDim, self.renderstream](d_out_numba,
#    self.GLWindowSize[0],
#    self.GLWindowSize[1])

# #WORKS
# backgroundRender[gridDim, blockDim, cuda.stream()](d_out_numba,
#    self.GLWindowSize[0],
#    self.GLWindowSize[1])

#WORKS
backgroundRender[gridDim, blockDim](d_out_numba,
self.GLWindowSize[0],
self.GLWindowSize[1])

# backgroundRender.inspect_types()

cuda.synchronize()

d_out.unmap()

def drawTexture(self):
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, self.GLWindowSize[0], self.GLWindowSize[1], 0, GL_RGBA, GL_UNSIGNED_BYTE, None)
glEnable(GL_TEXTURE_2D)
glBegin(GL_QUADS)

glTexCoord2f(0.0, 0.0)
glVertex2f(0, 0)
glTexCoord2f(0.0, 1.0)
glVertex2f(0, self.GLWindowSize[1])
glTexCoord2f(1.0, 1.0)
glVertex2f(self.GLWindowSize[0], self.GLWindowSize[1])
glTexCoord2f(1.0, 0.0)
glVertex2f(self.GLWindowSize[0], 0)

glEnd()
glDisable(GL_TEXTURE_2D)

def initPixelBuffer(self):
data = np.zeros((self.GLWindowSize[0]*self.GLWindowSize[1], 4), dtype = np.uint8)

self.pbo = glGenBuffers(1)
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, self.pbo)
glBufferData(GL_PIXEL_UNPACK_BUFFER, data, GL_STREAM_DRAW)
self.cudaPBOResource = self.cuda_gl.BufferObject(long(self.pbo))
self.tex = glGenTextures(1)
glBindTexture(GL_TEXTURE_2D, self.tex)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)

def minimumSizeHint(self):
return QtCore.QSize(self.GLWindowSize[0]/2, self.GLWindowSize[1]/2)

def maximumSizeHint(self):
return QtCore.QSize(self.GLWindowSize[0], self.GLWindowSize[1])

def sizeHint(self):
return QtCore.QSize(self.GLWindowSize[0], self.GLWindowSize[1])

def resizeGL(self, width, height):
side = min(width, height)
glViewport(int((width - side)/2), int((height - side)/2), side, side)

def wheelEvent(self, event):
self.updateGL()

def divUp(threadSize, blockSize):
return (threadSize + blockSize - 1)//blockSize

#Adding signature doesn't work
@cuda.jit#('void(uint8[:,:],uint32,uint32)')
def backgroundRender(d_out, w, h):
c, r = cuda.grid(2)

if c < w and r < h:
n = c + r*w

d_out[n][0] = 255
d_out[n][1] = 0
d_out[n][2] = 0
d_out[n][3] = 0

def main():
app = QtWidgets.QApplication(sys.argv)
window = RenderGUI()
window.show()
sys.exit(app.exec_())

if __name__ == '__main__':
main()


 #With Signiture added to device function when called in render()
# Traceback (most recent call last):
#   File "stream_test.py", line 75, in paintGL
#     self.render()
#   File "stream_test.py", line 98, in render
#     self.GLWindowSize[1])
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/compiler.py", line 483, in __call__
#     sharedmem=self.sharedmem)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/compiler.py", line 557, in _kernel_call
#     cu_func(*kernelargs)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 1301, in __call__
#     self.sharedmem, streamhandle, args)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 1345, in launch_kernel
#     None)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 288, in safe_cuda_api_call
#     self._check_error(fname, retcode)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 323, in _check_error
#     raise CudaAPIError(retcode, msg)
# numba.cuda.cudadrv.driver.CudaAPIError: [400] Call to cuLaunchKernel results in CUDA_ERROR_INVALID_HANDLE

# #With instanced stream is used in render()
# Traceback (most recent call last):
#   File "stream_test.py", line 75, in paintGL
#     self.render()
#   File "stream_test.py", line 97, in render
#     self.GLWindowSize[1])
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/compiler.py", line 703, in __call__
#     cfg(*args)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/compiler.py", line 483, in __call__
#     sharedmem=self.sharedmem)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/compiler.py", line 557, in _kernel_call
#     cu_func(*kernelargs)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 1301, in __call__
#     self.sharedmem, streamhandle, args)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 1345, in launch_kernel
#     None)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 288, in safe_cuda_api_call
#     self._check_error(fname, retcode)
#   File "/home/uchytilc/anaconda3/lib/python3.6/site-packages/numba/cuda/cudadrv/driver.py", line 323, in _check_error
#     raise CudaAPIError(retcode, msg)
# numba.cuda.cudadrv.driver.CudaAPIError: [400] Call to cuLaunchKernel results in CUDA_ERROR_INVALID_HANDLE

Reply all
Reply to author
Forward
0 new messages