opengl - C/CUDA - Modifying CUDA/GL interop example to store image in a memory buffer -


i trying store image, generated cuda-opengl interop example in 'cuda-by example' textbook, memory buffer can store images.

i want store 2 images, 1 green "x" , orangish "x", in memory buffer. when render pbuffer opengl, should green "x" image example output, however, black screen. not sure why not getting right output. please tell me what's wrong?

i obtained code memory buffer a memory buffer multiple images

    #include "book.h"     #include "cpu_bitmap.h"     #include "cuda.h"     #include <cuda_gl_interop.h>      pfnglbindbufferarbproc    glbindbuffer     = null;     pfngldeletebuffersarbproc gldeletebuffers  = null;     pfnglgenbuffersarbproc    glgenbuffers     = null;     pfnglbufferdataarbproc    glbufferdata     = null;      #define     dim    512      #define imagesize_max (dim*dim)  // change      gluint  bufferobj;     cudagraphicsresource *resource;      // based on ripple code, uses uchar4 type of data     // graphic inter op uses. see screenshot - basic2.png      __global__ void kernel( uchar4 *ptr1)        {    // map threadidx/blockidx pixel position     int x = threadidx.x + blockidx.x * blockdim.x;     int y = threadidx.y + blockidx.y * blockdim.y;     int offset = x + y * blockdim.x * griddim.x ;      // calculate value @ position     float fx = x/(float)dim - 0.5f;     float fy = y/(float)dim - 0.5f;     unsigned char   green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );      // accessing uchar4 vs unsigned char*     ptr1[offset].x = 0;     ptr1[offset].y = green;     ptr1[offset].z = 0;     ptr1[offset].w = 255;      }      // code      __global__ void kernel2( uchar4 *ptr2)      {     // map threadidx/blockidx pixel position     int x = threadidx.x + blockidx.x * blockdim.x;     int y = threadidx.y + blockidx.y * blockdim.y;     int offset = x + y * blockdim.x * griddim.x ;      // calculate value @ position     float fx = x/(float)dim - 0.5f;     float fy = y/(float)dim - 0.5f;     unsigned char   green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );      // accessing uchar4 vs unsigned char*     ptr2[offset].x = 1000;     ptr2[offset].y = green;     ptr2[offset].z = 0;     ptr2[offset].w = 255;     }      __global__ void copy ( uchar4 *pbuffer, uchar4 *ptr, uchar4 *ptr2, size_t size, int )     {      int x = threadidx.x + blockidx.x * blockdim.x;     int y = threadidx.y + blockidx.y * blockdim.y;     int idx = x + y * blockdim.x * griddim.x ;     int bdx = idx;      if (a==1)     {     while ( idx < dim*dim)     {     pbuffer[idx] = ptr[idx] ;     __syncthreads();     if (idx==dim*dim)     {      break;     }     }     }       if (a==2)     {     while ( (idx < dim*dim) && (bdx < dim*dim) )     {     uchar4 temp = ptr2[bdx];     __syncthreads();      pbuffer[idx+4] = temp;     __syncthreads();      if ((idx==dim*dim) && (bdx==dim*dim))     {     break;     }     }       }      }       void key_func( unsigned char key, int x, int y )      {     switch (key)      {     case 27:         // clean opengl , cuda         ( cudagraphicsunregisterresource( resource ) );         glbindbuffer( gl_pixel_unpack_buffer_arb, 0 );         gldeletebuffers( 1, &bufferobj );         exit(0);     }     }      void draw_func( void ) {     // pass 0 last parameter, because out bufferobj     // source, , field switches being pointer     // bitmap mean offset bitmap object     gldrawpixels( dim, dim, gl_rgba, gl_unsigned_byte, 0 );     glutswapbuffers();     }       int main( int argc, char **argv )      {     cudadeviceprop  prop;     int dev;      memset( &prop, 0, sizeof( cudadeviceprop ) );     prop.major = 1;     prop.minor = 0;     ( cudachoosedevice( &dev, &prop ) );      // tell cuda dev using graphic interop     // programming guide:  interoperability opengl     //     requires cuda device specified     //     cudaglsetgldevice() before other runtime calls.      ( cudaglsetgldevice( dev ) );      // these glut calls need made before other opengl     // calls, else seg fault     glutinit( &argc, argv );     glutinitdisplaymode( glut_double | glut_rgba );     glutinitwindowsize( dim, dim );     glutcreatewindow( "bitmap" );      glbindbuffer    = (pfnglbindbufferarbproc)get_proc_address("glbindbuffer");     gldeletebuffers = (pfngldeletebuffersarbproc)get_proc_address("gldeletebuffers");     glgenbuffers    = (pfnglgenbuffersarbproc)get_proc_address("glgenbuffers");     glbufferdata    = (pfnglbufferdataarbproc)get_proc_address("glbufferdata");      // first 3 standard opengl, 4th cuda reg      // of bitmap these calls exist starting in opengl 1.5     glgenbuffers( 1, &bufferobj );     glbindbuffer( gl_pixel_unpack_buffer_arb, bufferobj );     glbufferdata( gl_pixel_unpack_buffer_arb, dim * dim * 4 ,null, gl_dynamic_draw_arb );      // register gl bufferobj , cuda resource     ( cudagraphicsglregisterbuffer( &resource, bufferobj, cudagraphicsmapflagsnone ) );      // work memory dst being on gpu, gotten via mapping     handle_error( cudagraphicsmapresources( 1, &resource, null ) );      // modified code      uchar4 *devptr;      size_t size;     size_t sizetotal = 0;     cudamalloc ( (uchar4 **)&devptr,  size);      uchar4 *devptr2;       cudamalloc ( (uchar4 **)&devptr2,  size);       uchar4 *pbuffer;      (cudamalloc ( (uchar4 **)&pbuffer,  size));      uchar4 *pbuffercurrent;      (cudamalloc ( (uchar4 **)&pbuffercurrent,  size));      uchar4 *pbufferimage;      (cudamalloc ( (uchar4 **)&pbufferimage,  size));      // register c buffer , cuda resource      handle_error(      cudagraphicsresourcegetmappedpointer( (void**)&pbufferimage,                                            &size,                                            resource) );      dim3    grids(dim/16,dim/16);     dim3    threads(16,16);       kernel<<<grids,threads>>>(devptr);     kernel2<<<grids,threads>>>(devptr2);      int = 1;          {      if (a==1)     {     copy<<< grids, threads>>>(pbufferimage, devptr, devptr2, size, a);       }      if(a==2)     {     copy<<< grids, threads>>>(pbufferimage, devptr, devptr2, size, a);     }      a++;      } while (a<=2);       cudagraphicsunmapresources( 1, &resource, null ) );      // set glut , kick off main loop     glutkeyboardfunc( key_func );     glutdisplayfunc( draw_func );     glutmainloop();    } 

start doing proper cuda error checking on cuda api calls (e.g. cudamemcpy, etc.) , kernel calls.

when you'll discover kernels not running successfully. these types of things won't work:

uchar4 *devptr;   // you've created unallocated null host pointer size_t img1_size = imagesize_max;  kernel<<<grids,threads>>>(devptr);  // kernel fail   uchar4 *devptr2;   // you've created unallocated null host pointer size_t img2_size = imagesize_max;  kernel2<<<grids,threads>>>(devptr2);  // kernel fail 

devptr , devptr2 in above code null pointers. haven't allocated storage associated them. furthermore, since passing them device kernels, need allocated cudamalloc or similar api function, in order pointers usable in device code.

since not allocated cudamalloc, try dereference pointers in device code, you'll create kernel fault. evident if error checking, have "unspecified launch failure" or similar report kernels.

i think there number of other problems in code, first should proper cuda error checking , @ least code point you've written is, in fact, running.

and code you've posted doesn't compile.

after fixing compile errors discovered have infinite loop:

cudamalloc ( (uchar4 **)&pbuffercurrent,  sizetotal + sizeof(size) + size);  cudamalloc ( (uchar4 **)&pbuffer,  sizetotal + sizeof(size) + size);   {  if (!pbuffercurrent) { break; }  pbuffer = pbuffercurrent;  pbuffercurrent += sizetotal;  imageget ( pbuffercurrent + sizeof(size), size, devptr);  sizetotal += (sizeof(size) + size);  } while (a==1);  

since a initialized 1 in loop, , nothing in loop modifies a, loop never exit based on while condition. since pbuffercurrent never 0 if it's been set cudamalloc, break never taken.

if malloc or cudamalloc pointer called pbuffercurrent, it's hard me imagine under circumstances ever make sense:

pbuffercurrent += sizetotal; 

and although legal, don't see how makes sense:

pbuffer = pbuffercurrent; 

you created allocation pbuffer using cudamalloc, first thing throw away?


Comments

Popular posts from this blog

basic authentication with http post params android -

vb.net - Virtual Keyboard commands -

css - Firefox for ubuntu renders wrong colors -