opengl - C/CUDA - Modifying CUDA/GL interop example to store image in a memory buffer -
i trying store image, generated cuda-opengl interop example in 'cuda-by example' textbook, memory buffer can store images.
i want store 2 images, 1 green "x" , orangish "x", in memory buffer. when render pbuffer opengl, should green "x" image example output, however, black screen. not sure why not getting right output. please tell me what's wrong?
i obtained code memory buffer a memory buffer multiple images
#include "book.h" #include "cpu_bitmap.h" #include "cuda.h" #include <cuda_gl_interop.h> pfnglbindbufferarbproc glbindbuffer = null; pfngldeletebuffersarbproc gldeletebuffers = null; pfnglgenbuffersarbproc glgenbuffers = null; pfnglbufferdataarbproc glbufferdata = null; #define dim 512 #define imagesize_max (dim*dim) // change gluint bufferobj; cudagraphicsresource *resource; // based on ripple code, uses uchar4 type of data // graphic inter op uses. see screenshot - basic2.png __global__ void kernel( uchar4 *ptr1) { // map threadidx/blockidx pixel position int x = threadidx.x + blockidx.x * blockdim.x; int y = threadidx.y + blockidx.y * blockdim.y; int offset = x + y * blockdim.x * griddim.x ; // calculate value @ position float fx = x/(float)dim - 0.5f; float fy = y/(float)dim - 0.5f; unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) ); // accessing uchar4 vs unsigned char* ptr1[offset].x = 0; ptr1[offset].y = green; ptr1[offset].z = 0; ptr1[offset].w = 255; } // code __global__ void kernel2( uchar4 *ptr2) { // map threadidx/blockidx pixel position int x = threadidx.x + blockidx.x * blockdim.x; int y = threadidx.y + blockidx.y * blockdim.y; int offset = x + y * blockdim.x * griddim.x ; // calculate value @ position float fx = x/(float)dim - 0.5f; float fy = y/(float)dim - 0.5f; unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) ); // accessing uchar4 vs unsigned char* ptr2[offset].x = 1000; ptr2[offset].y = green; ptr2[offset].z = 0; ptr2[offset].w = 255; } __global__ void copy ( uchar4 *pbuffer, uchar4 *ptr, uchar4 *ptr2, size_t size, int ) { int x = threadidx.x + blockidx.x * blockdim.x; int y = threadidx.y + blockidx.y * blockdim.y; int idx = x + y * blockdim.x * griddim.x ; int bdx = idx; if (a==1) { while ( idx < dim*dim) { pbuffer[idx] = ptr[idx] ; __syncthreads(); if (idx==dim*dim) { break; } } } if (a==2) { while ( (idx < dim*dim) && (bdx < dim*dim) ) { uchar4 temp = ptr2[bdx]; __syncthreads(); pbuffer[idx+4] = temp; __syncthreads(); if ((idx==dim*dim) && (bdx==dim*dim)) { break; } } } } void key_func( unsigned char key, int x, int y ) { switch (key) { case 27: // clean opengl , cuda ( cudagraphicsunregisterresource( resource ) ); glbindbuffer( gl_pixel_unpack_buffer_arb, 0 ); gldeletebuffers( 1, &bufferobj ); exit(0); } } void draw_func( void ) { // pass 0 last parameter, because out bufferobj // source, , field switches being pointer // bitmap mean offset bitmap object gldrawpixels( dim, dim, gl_rgba, gl_unsigned_byte, 0 ); glutswapbuffers(); } int main( int argc, char **argv ) { cudadeviceprop prop; int dev; memset( &prop, 0, sizeof( cudadeviceprop ) ); prop.major = 1; prop.minor = 0; ( cudachoosedevice( &dev, &prop ) ); // tell cuda dev using graphic interop // programming guide: interoperability opengl // requires cuda device specified // cudaglsetgldevice() before other runtime calls. ( cudaglsetgldevice( dev ) ); // these glut calls need made before other opengl // calls, else seg fault glutinit( &argc, argv ); glutinitdisplaymode( glut_double | glut_rgba ); glutinitwindowsize( dim, dim ); glutcreatewindow( "bitmap" ); glbindbuffer = (pfnglbindbufferarbproc)get_proc_address("glbindbuffer"); gldeletebuffers = (pfngldeletebuffersarbproc)get_proc_address("gldeletebuffers"); glgenbuffers = (pfnglgenbuffersarbproc)get_proc_address("glgenbuffers"); glbufferdata = (pfnglbufferdataarbproc)get_proc_address("glbufferdata"); // first 3 standard opengl, 4th cuda reg // of bitmap these calls exist starting in opengl 1.5 glgenbuffers( 1, &bufferobj ); glbindbuffer( gl_pixel_unpack_buffer_arb, bufferobj ); glbufferdata( gl_pixel_unpack_buffer_arb, dim * dim * 4 ,null, gl_dynamic_draw_arb ); // register gl bufferobj , cuda resource ( cudagraphicsglregisterbuffer( &resource, bufferobj, cudagraphicsmapflagsnone ) ); // work memory dst being on gpu, gotten via mapping handle_error( cudagraphicsmapresources( 1, &resource, null ) ); // modified code uchar4 *devptr; size_t size; size_t sizetotal = 0; cudamalloc ( (uchar4 **)&devptr, size); uchar4 *devptr2; cudamalloc ( (uchar4 **)&devptr2, size); uchar4 *pbuffer; (cudamalloc ( (uchar4 **)&pbuffer, size)); uchar4 *pbuffercurrent; (cudamalloc ( (uchar4 **)&pbuffercurrent, size)); uchar4 *pbufferimage; (cudamalloc ( (uchar4 **)&pbufferimage, size)); // register c buffer , cuda resource handle_error( cudagraphicsresourcegetmappedpointer( (void**)&pbufferimage, &size, resource) ); dim3 grids(dim/16,dim/16); dim3 threads(16,16); kernel<<<grids,threads>>>(devptr); kernel2<<<grids,threads>>>(devptr2); int = 1; { if (a==1) { copy<<< grids, threads>>>(pbufferimage, devptr, devptr2, size, a); } if(a==2) { copy<<< grids, threads>>>(pbufferimage, devptr, devptr2, size, a); } a++; } while (a<=2); cudagraphicsunmapresources( 1, &resource, null ) ); // set glut , kick off main loop glutkeyboardfunc( key_func ); glutdisplayfunc( draw_func ); glutmainloop(); }
start doing proper cuda error checking on cuda api calls (e.g. cudamemcpy, etc.) , kernel calls.
when you'll discover kernels not running successfully. these types of things won't work:
uchar4 *devptr; // you've created unallocated null host pointer size_t img1_size = imagesize_max; kernel<<<grids,threads>>>(devptr); // kernel fail uchar4 *devptr2; // you've created unallocated null host pointer size_t img2_size = imagesize_max; kernel2<<<grids,threads>>>(devptr2); // kernel fail
devptr
, devptr2
in above code null pointers. haven't allocated storage associated them. furthermore, since passing them device kernels, need allocated cudamalloc
or similar api function, in order pointers usable in device code.
since not allocated cudamalloc
, try dereference pointers in device code, you'll create kernel fault. evident if error checking, have "unspecified launch failure" or similar report kernels.
i think there number of other problems in code, first should proper cuda error checking , @ least code point you've written is, in fact, running.
and code you've posted doesn't compile.
after fixing compile errors discovered have infinite loop:
cudamalloc ( (uchar4 **)&pbuffercurrent, sizetotal + sizeof(size) + size); cudamalloc ( (uchar4 **)&pbuffer, sizetotal + sizeof(size) + size); { if (!pbuffercurrent) { break; } pbuffer = pbuffercurrent; pbuffercurrent += sizetotal; imageget ( pbuffercurrent + sizeof(size), size, devptr); sizetotal += (sizeof(size) + size); } while (a==1);
since a
initialized 1 in loop, , nothing in loop modifies a
, loop never exit based on while
condition. since pbuffercurrent never 0 if it's been set cudamalloc
, break
never taken.
if malloc
or cudamalloc
pointer called pbuffercurrent
, it's hard me imagine under circumstances ever make sense:
pbuffercurrent += sizetotal;
and although legal, don't see how makes sense:
pbuffer = pbuffercurrent;
you created allocation pbuffer
using cudamalloc
, first thing throw away?
Comments
Post a Comment