@@ -508,58 +508,58 @@ void filterPixelStage2(const float* __restrict__ depth, const float* __restrict_
508508
509509namespace libfreenect2
510510{
511+
511512class CudaFrame : public Frame
512513{
513514public:
514- CudaFrame (size_t width, size_t height, size_t bytes_per_pixel ):
515- Frame (width, height, bytes_per_pixel , (unsigned char *)-1 )
515+ CudaFrame (Buffer *buffer ):
516+ Frame (512 , 424 , 4 , (unsigned char *)-1 )
516517 {
517- data = NULL ;
518-
519- size_t size = width*height*bytes_per_pixel;
520- cudaError_t err = cudaHostAlloc (&rawdata, size, cudaHostAllocPortable);
521- if (err != cudaSuccess) {
522- LOG_ERROR << " cudaHostAlloc: " << cudaGetErrorString (err);
523- rawdata = NULL ;
524- }
525- data = rawdata;
518+ data = buffer->data ;
519+ rawdata = reinterpret_cast <unsigned char *>(buffer);
526520 }
527521
528522 virtual ~CudaFrame ()
529523 {
530- CALL_CUDA (cudaFreeHost (rawdata));
524+ Buffer *buffer = reinterpret_cast <Buffer*>(rawdata);
525+ buffer->allocator ->free (buffer);
531526 rawdata = NULL ;
532527 }
533528};
534529
535530class CudaAllocator : public Allocator
536531{
537532private:
533+ const bool input;
534+
538535 bool allocate_cuda (Buffer *b, size_t size)
539536 {
540- CHECK_CUDA (cudaHostAlloc (&b->data , size, cudaHostAllocWriteCombined | cudaHostAllocPortable));
537+ unsigned int flags = cudaHostAllocPortable;
538+ if (!input)
539+ flags |= cudaHostAllocWriteCombined;
540+ CHECK_CUDA (cudaHostAlloc (&b->data , size, flags));
541541 b->length = 0 ;
542542 b->capacity = size;
543543 return true ;
544544 }
545545
546546public:
547+ CudaAllocator (bool input): input(input) {}
548+
547549 virtual Buffer *allocate (size_t size)
548550 {
549551 Buffer *b = new Buffer ();
550- if (!allocate_cuda (b, size)) {
551- delete b;
552- b = NULL ;
553- }
552+ if (!allocate_cuda (b, size))
553+ b->data = NULL ;
554554 return b;
555555 }
556556
557557 virtual void free (Buffer *b)
558558 {
559- if (b == NULL || b-> data == NULL )
559+ if (b == NULL )
560560 return ;
561-
562- CALL_CUDA (cudaFreeHost (b->data ));
561+ if (b-> data )
562+ CALL_CUDA (cudaFreeHost (b->data ));
563563 delete b;
564564 }
565565};
@@ -603,7 +603,9 @@ public:
603603
604604 Frame *ir_frame, *depth_frame;
605605
606- Allocator *allocator;
606+ Allocator *input_allocator;
607+ Allocator *ir_allocator;
608+ Allocator *depth_allocator;
607609
608610 bool good;
609611
@@ -614,23 +616,29 @@ public:
614616 params (),
615617 ir_frame (NULL ),
616618 depth_frame (NULL ),
617- allocator (NULL )
619+ input_allocator (NULL ),
620+ ir_allocator (NULL ),
621+ depth_allocator (NULL )
618622 {
619623 good = initDevice (deviceId);
620624 if (!good)
621625 return ;
622626
627+ input_allocator = new PoolAllocator (new CudaAllocator (true ));
628+ ir_allocator = new PoolAllocator (new CudaAllocator (false ));
629+ depth_allocator = new PoolAllocator (new CudaAllocator (false ));
630+
623631 newIrFrame ();
624632 newDepthFrame ();
625-
626- allocator = new PoolAllocator (new CudaAllocator);
627633 }
628634
629635 ~CudaDepthPacketProcessorImpl ()
630636 {
631- delete allocator;
632637 delete ir_frame;
633638 delete depth_frame;
639+ delete input_allocator;
640+ delete ir_allocator;
641+ delete depth_allocator;
634642 if (good)
635643 freeDeviceMemory ();
636644 }
@@ -858,12 +866,12 @@ public:
858866
859867 void newIrFrame ()
860868 {
861- ir_frame = new CudaFrame (512 , 424 , 4 );
869+ ir_frame = new CudaFrame (ir_allocator-> allocate (IMAGE_SIZE* sizeof ( float )) );
862870 }
863871
864872 void newDepthFrame ()
865873 {
866- depth_frame = new CudaFrame (512 , 424 , 4 );
874+ depth_frame = new CudaFrame (depth_allocator-> allocate (IMAGE_SIZE* sizeof ( float )) );
867875 }
868876
869877 void fill_trig_table (const protocol::P0TablesResponse *p0table)
@@ -953,6 +961,6 @@ void CudaDepthPacketProcessor::process(const DepthPacket &packet)
953961
954962Allocator *CudaDepthPacketProcessor::getAllocator ()
955963{
956- return impl_->allocator ;
964+ return impl_->input_allocator ;
957965}
958966} // namespace libfreenect2
0 commit comments