Skip to content

Commit 4e1bc69

Browse files
committed
cuda: Use memory pooling for frames
1 parent 0394f8b commit 4e1bc69

File tree

1 file changed

+36
-28
lines changed

1 file changed

+36
-28
lines changed

src/cuda_depth_packet_processor.cu

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -508,58 +508,58 @@ void filterPixelStage2(const float* __restrict__ depth, const float* __restrict_
508508

509509
namespace libfreenect2
510510
{
511+
511512
class CudaFrame: public Frame
512513
{
513514
public:
514-
CudaFrame(size_t width, size_t height, size_t bytes_per_pixel):
515-
Frame(width, height, bytes_per_pixel, (unsigned char*)-1)
515+
CudaFrame(Buffer *buffer):
516+
Frame(512, 424, 4, (unsigned char*)-1)
516517
{
517-
data = NULL;
518-
519-
size_t size = width*height*bytes_per_pixel;
520-
cudaError_t err = cudaHostAlloc(&rawdata, size, cudaHostAllocPortable);
521-
if (err != cudaSuccess) {
522-
LOG_ERROR << "cudaHostAlloc: " << cudaGetErrorString(err);
523-
rawdata = NULL;
524-
}
525-
data = rawdata;
518+
data = buffer->data;
519+
rawdata = reinterpret_cast<unsigned char *>(buffer);
526520
}
527521

528522
virtual ~CudaFrame()
529523
{
530-
CALL_CUDA(cudaFreeHost(rawdata));
524+
Buffer *buffer = reinterpret_cast<Buffer*>(rawdata);
525+
buffer->allocator->free(buffer);
531526
rawdata = NULL;
532527
}
533528
};
534529

535530
class CudaAllocator: public Allocator
536531
{
537532
private:
533+
const bool input;
534+
538535
bool allocate_cuda(Buffer *b, size_t size)
539536
{
540-
CHECK_CUDA(cudaHostAlloc(&b->data, size, cudaHostAllocWriteCombined | cudaHostAllocPortable));
537+
unsigned int flags = cudaHostAllocPortable;
538+
if (!input)
539+
flags |= cudaHostAllocWriteCombined;
540+
CHECK_CUDA(cudaHostAlloc(&b->data, size, flags));
541541
b->length = 0;
542542
b->capacity = size;
543543
return true;
544544
}
545545

546546
public:
547+
CudaAllocator(bool input): input(input) {}
548+
547549
virtual Buffer *allocate(size_t size)
548550
{
549551
Buffer *b = new Buffer();
550-
if (!allocate_cuda(b, size)) {
551-
delete b;
552-
b = NULL;
553-
}
552+
if (!allocate_cuda(b, size))
553+
b->data = NULL;
554554
return b;
555555
}
556556

557557
virtual void free(Buffer *b)
558558
{
559-
if (b == NULL || b->data == NULL)
559+
if (b == NULL)
560560
return;
561-
562-
CALL_CUDA(cudaFreeHost(b->data));
561+
if (b->data)
562+
CALL_CUDA(cudaFreeHost(b->data));
563563
delete b;
564564
}
565565
};
@@ -603,7 +603,9 @@ public:
603603

604604
Frame *ir_frame, *depth_frame;
605605

606-
Allocator *allocator;
606+
Allocator *input_allocator;
607+
Allocator *ir_allocator;
608+
Allocator *depth_allocator;
607609

608610
bool good;
609611

@@ -614,23 +616,29 @@ public:
614616
params(),
615617
ir_frame(NULL),
616618
depth_frame(NULL),
617-
allocator(NULL)
619+
input_allocator(NULL),
620+
ir_allocator(NULL),
621+
depth_allocator(NULL)
618622
{
619623
good = initDevice(deviceId);
620624
if (!good)
621625
return;
622626

627+
input_allocator = new PoolAllocator(new CudaAllocator(true));
628+
ir_allocator = new PoolAllocator(new CudaAllocator(false));
629+
depth_allocator = new PoolAllocator(new CudaAllocator(false));
630+
623631
newIrFrame();
624632
newDepthFrame();
625-
626-
allocator = new PoolAllocator(new CudaAllocator);
627633
}
628634

629635
~CudaDepthPacketProcessorImpl()
630636
{
631-
delete allocator;
632637
delete ir_frame;
633638
delete depth_frame;
639+
delete input_allocator;
640+
delete ir_allocator;
641+
delete depth_allocator;
634642
if (good)
635643
freeDeviceMemory();
636644
}
@@ -858,12 +866,12 @@ public:
858866

859867
void newIrFrame()
860868
{
861-
ir_frame = new CudaFrame(512, 424, 4);
869+
ir_frame = new CudaFrame(ir_allocator->allocate(IMAGE_SIZE*sizeof(float)));
862870
}
863871

864872
void newDepthFrame()
865873
{
866-
depth_frame = new CudaFrame(512, 424, 4);
874+
depth_frame = new CudaFrame(depth_allocator->allocate(IMAGE_SIZE*sizeof(float)));
867875
}
868876

869877
void fill_trig_table(const protocol::P0TablesResponse *p0table)
@@ -953,6 +961,6 @@ void CudaDepthPacketProcessor::process(const DepthPacket &packet)
953961

954962
Allocator *CudaDepthPacketProcessor::getAllocator()
955963
{
956-
return impl_->allocator;
964+
return impl_->input_allocator;
957965
}
958966
} // namespace libfreenect2

0 commit comments

Comments
 (0)