From 3b86289c8e0b28aef529797c6ed837361d001b21 Mon Sep 17 00:00:00 2001 From: blackzafiro Date: Thu, 2 Mar 2017 17:19:22 -0600 Subject: [PATCH 01/25] Moved cuda registration to its own files, still looking for a way to compile with thrust, since I get an exception due to the use of catch. --- CMakeLists.txt | 4 +- include/libfreenect2/cuda_registration.h | 145 +++++++++++++++++++++++ src/cuda_registration.cu | 27 +++++ 3 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 include/libfreenect2/cuda_registration.h create mode 100644 src/cuda_registration.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f392203f..45672130e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,6 +125,7 @@ SET(SOURCES include/libfreenect2/packet_pipeline.h include/internal/libfreenect2/packet_processor.h include/libfreenect2/registration.h + include/libfreenect2/cuda_registration.h include/internal/libfreenect2/resource.h include/internal/libfreenect2/rgb_packet_processor.h include/internal/libfreenect2/rgb_packet_stream_parser.h @@ -354,7 +355,7 @@ IF(ENABLE_CUDA) ) SET(CUDA_FLAGS -use_fast_math) IF(NOT MSVC) - SET(CUDA_FLAGS "${CUDA_FLAGS} -Xcompiler -fPIC") + SET(CUDA_FLAGS "${CUDA_FLAGS} -Xcompiler -fPIC -D_FORCE_INLINES") ENDIF() IF(HAVE_CXX11 EQUAL yes AND CUDA_VERSION VERSION_GREATER 7.0) SET(CUDA_FLAGS "${CUDA_FLAGS} -std=c++11") @@ -365,6 +366,7 @@ IF(ENABLE_CUDA) CUDA_COMPILE(CUDA_OBJECTS src/cuda_depth_packet_processor.cu src/cuda_kde_depth_packet_processor.cu + src/cuda_registration.cu OPTIONS ${CUDA_FLAGS} ) SET(CMAKE_CXX_FLAGS "${OLD_CMAKE_CXX_FLAGS}") diff --git a/include/libfreenect2/cuda_registration.h b/include/libfreenect2/cuda_registration.h new file mode 100644 index 000000000..c79071ea7 --- /dev/null +++ b/include/libfreenect2/cuda_registration.h @@ -0,0 +1,145 @@ +/* + * This file is part of the OpenKinect Project. http://www.openkinect.org + * + * Copyright (c) 2014 individual OpenKinect contributors. See the CONTRIB file + * for details. + * + * This code is licensed to you under the terms of the Apache License, version + * 2.0, or, at your option, the terms of the GNU General Public License, + * version 2.0. See the APACHE20 and GPL2 files for the text of the licenses, + * or the following URLs: + * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.gnu.org/licenses/gpl-2.0.txt + * + * If you redistribute this file in source form, modified or unmodified, you + * may: + * 1) Leave this header intact and distribute it under the same terms, + * accompanying it with the APACHE20 and GPL20 files, or + * 2) Delete the Apache 2.0 clause and accompany it with the GPL2 file, or + * 3) Delete the GPL v2 clause and accompany it with the APACHE20 file + * In all cases you must keep the copyright notice intact and include a copy + * of the CONTRIB file. + * + * Binary distributions must follow the binary distribution requirements of + * either License. + */ + +/** @file cuda_registration.h Class for merging depth and color frames using cuda. */ + +#ifndef CUDA_REGISTRATION_H_ +#define CUDA_REGISTRATION_H_ + +#include +#include +#include +#include + +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT +#include +#include +#include +#include +#include + +namespace libfreenect2 +{ + +typedef thrust::tuple TupleXYZRGB; + +class CudaRegistrationImpl; + +/** @defgroup registration Registration and Geometry + * Register depth to color, create point clouds. */ + +/** Combine frames of depth and color camera using gpus. @ingroup registration + * Right now this class uses a reverse engineered formula that uses factory + * preset extrinsic parameters the same way the Registration class does. + */ +class LIBFREENECT2_API CudaRegistration +{ +public: + /** + * @param depth_p Depth camera parameters. You can use the factory values, or use your own. + * @param rgb_p Color camera parameters. Probably use the factory values for now. + */ + CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p); + ~CudaRegistration(); + + /** Undistort and register a single depth point to color camera. + * @param dx Distorted depth coordinate x (pixel) + * @param dy Distorted depth coordinate y (pixel) + * @param dz Depth value (millimeter) + * @param[out] cx Undistorted color coordinate x (normalized) + * @param[out] cy Undistorted color coordinate y (normalized) + */ + void apply(int dx, int dy, float dz, float& cx, float &cy) const; + + /** Map color images onto depth images + * @param rgb Color image (1920x1080 BGRX) + * @param depth Depth image (512x424 float) + * @param[out] undistorted Undistorted depth image + * @param[out] registered Color image for the depth image (512x424) + * @param enable_filter Filter out pixels not visible to both cameras. + * @param[out] bigdepth If not `NULL`, return mapping of depth onto colors (1920x1082 float). **1082** not 1080, with a blank top and bottom row. + * @param[out] color_depth_map Index of mapped color pixel for each depth pixel (512x424). + */ + void apply(const Frame* rgb, const Frame* depth, Frame* undistorted, Frame* registered, const bool enable_filter = true, Frame* bigdepth = 0, int* color_depth_map = 0) const; + + /** Undistort depth + * @param depth Depth image (512x424 float) + * @param[out] undistorted Undistorted depth image + */ + void undistortDepth(const Frame* depth, Frame* undistorted) const; + + /** Construct a 3-D point with color in a point cloud. + * @param undistorted Undistorted depth frame from apply(). + * @param registered Registered color frame from apply(). + * @param r Row (y) index in depth image. + * @param c Column (x) index in depth image. + * @param[out] x X coordinate of the 3-D point (meter). + * @param[out] y Y coordinate of the 3-D point (meter). + * @param[out] z Z coordinate of the 3-D point (meter). + * @param[out] rgb Color of the 3-D point (BGRX). To unpack the data, use + * + * const uint8_t *p = reinterpret_cast(&rgb); + * uint8_t b = p[0]; + * uint8_t g = p[1]; + * uint8_t r = p[2]; + */ + void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; + + /** Construct a 3-D point in a point cloud. + * @param undistorted Undistorted depth frame from apply(). + * @param r Row (y) index in depth image. + * @param c Column (x) index in depth image. + * @param[out] x X coordinate of the 3-D point (meter). + * @param[out] y Y coordinate of the 3-D point (meter). + * @param[out] z Z coordinate of the 3-D point (meter). + */ + void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; + + /** + * Construct a point cloud as thrust vector of XYZRGB data as tuples of in device memory, which can be used + * for further processing with CUDA. + * @param undistorted Undistorted depth frame from apply(). + * @param registered Registered color frame from apply(). + * @param[out] cloud_data coordinates of the 3-D point (meter) and color (BGRX). + * To unpack the color data, use + * const uint8_t *p = reinterpret_cast(&rgb); + * uint8_t b = p[0]; + * uint8_t g = p[1]; + * uint8_t r = p[2]; + */ + void getPointXYZRGB(const Frame* undistorted, const Frame* registered, thrust::device_vector& cloud_data) const; + +private: + CudaRegistrationImpl *impl_; + + /* Disable copy and assignment constructors */ + CudaRegistration(const CudaRegistration&); + CudaRegistration& operator=(const CudaRegistration&); +}; +#endif // LIBFREENECT2_WITH_CUDA_SUPPORT + +} /* namespace libfreenect2 */ +#endif /* REGISTRATION_H_ */ diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu new file mode 100644 index 000000000..7459eacc6 --- /dev/null +++ b/src/cuda_registration.cu @@ -0,0 +1,27 @@ +/* + * This file is part of the OpenKinect Project. http://www.openkinect.org + * + * Copyright (c) 2014 individual OpenKinect contributors. See the CONTRIB file + * for details. + * + * This code is licensed to you under the terms of the Apache License, version + * 2.0, or, at your option, the terms of the GNU General Public License, + * version 2.0. See the APACHE20 and GPL2 files for the text of the licenses, + * or the following URLs: + * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.gnu.org/licenses/gpl-2.0.txt + * + * If you redistribute this file in source form, modified or unmodified, you + * may: + * 1) Leave this header intact and distribute it under the same terms, + * accompanying it with the APACHE20 and GPL20 files, or + * 2) Delete the Apache 2.0 clause and accompany it with the GPL2 file, or + * 3) Delete the GPL v2 clause and accompany it with the APACHE20 file + * In all cases you must keep the copyright notice intact and include a copy + * of the CONTRIB file. + * + * Binary distributions must follow the binary distribution requirements of + * either License. + */ + +#include From 7111eec4309b72d7fcdd050d6319dc0d587bfec4 Mon Sep 17 00:00:00 2001 From: blackzafiro Date: Tue, 4 Apr 2017 15:59:39 -0500 Subject: [PATCH 02/25] Will need thrust to create 3D clouds, so -fno-exceptions will be removed if cuda is selected. --- .cproject | 52 ++++++++++++++++ .project | 27 ++++++++ .settings/language.settings.xml | 15 +++++ .settings/org.eclipse.cdt.codan.core.prefs | 71 ++++++++++++++++++++++ CMakeLists.txt | 6 ++ src/cuda_registration.cu | 24 +++++++- 6 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 .cproject create mode 100644 .project create mode 100644 .settings/language.settings.xml create mode 100644 .settings/org.eclipse.cdt.codan.core.prefs diff --git a/.cproject b/.cproject new file mode 100644 index 000000000..d76b773eb --- /dev/null +++ b/.cproject @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.project b/.project new file mode 100644 index 000000000..0ae1eed8b --- /dev/null +++ b/.project @@ -0,0 +1,27 @@ + + + libfreenect2 + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + diff --git a/.settings/language.settings.xml b/.settings/language.settings.xml new file mode 100644 index 000000000..f165e0141 --- /dev/null +++ b/.settings/language.settings.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/.settings/org.eclipse.cdt.codan.core.prefs b/.settings/org.eclipse.cdt.codan.core.prefs new file mode 100644 index 000000000..b5248c620 --- /dev/null +++ b/.settings/org.eclipse.cdt.codan.core.prefs @@ -0,0 +1,71 @@ +eclipse.preferences.version=1 +org.eclipse.cdt.codan.checkers.errnoreturn=Warning +org.eclipse.cdt.codan.checkers.errnoreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"No return\\")",implicit\=>false} +org.eclipse.cdt.codan.checkers.errreturnvalue=Error +org.eclipse.cdt.codan.checkers.errreturnvalue.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused return value\\")"} +org.eclipse.cdt.codan.checkers.nocommentinside=-Error +org.eclipse.cdt.codan.checkers.nocommentinside.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Nesting comments\\")"} +org.eclipse.cdt.codan.checkers.nolinecomment=-Error +org.eclipse.cdt.codan.checkers.nolinecomment.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Line comments\\")"} +org.eclipse.cdt.codan.checkers.noreturn=Error +org.eclipse.cdt.codan.checkers.noreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"No return value\\")",implicit\=>false} +org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation=Error +org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Abstract class cannot be instantiated\\")"} +org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem=Error +org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Ambiguous problem\\")"} +org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem=Warning +org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Assignment in condition\\")"} +org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem=Error +org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Assignment to itself\\")"} +org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem=Warning +org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"No break at end of case\\")",no_break_comment\=>"no break",last_case_param\=>false,empty_case_param\=>false} +org.eclipse.cdt.codan.internal.checkers.CatchByReference=Warning +org.eclipse.cdt.codan.internal.checkers.CatchByReference.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Catching by reference is recommended\\")",unknown\=>false,exceptions\=>()} +org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem=Error +org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Circular inheritance\\")"} +org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization=Warning +org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Class members should be properly initialized\\")",skip\=>true} +org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Field cannot be resolved\\")"} +org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Function cannot be resolved\\")"} +org.eclipse.cdt.codan.internal.checkers.InvalidArguments=Error +org.eclipse.cdt.codan.internal.checkers.InvalidArguments.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid arguments\\")"} +org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem=Error +org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid template argument\\")"} +org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem=Error +org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Label statement not found\\")"} +org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem=Error +org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Member declaration not found\\")"} +org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Method cannot be resolved\\")"} +org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker=-Info +org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Name convention for function\\")",pattern\=>"^[a-z]",macro\=>true,exceptions\=>()} +org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem=Warning +org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Class has a virtual method and non-virtual destructor\\")"} +org.eclipse.cdt.codan.internal.checkers.OverloadProblem=Error +org.eclipse.cdt.codan.internal.checkers.OverloadProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid overload\\")"} +org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem=Error +org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid redeclaration\\")"} +org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem=Error +org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid redefinition\\")"} +org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem=-Warning +org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Return with parenthesis\\")"} +org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem=-Warning +org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Format String Vulnerability\\")"} +org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem=Warning +org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Statement has no effect\\")",macro\=>true,exceptions\=>()} +org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem=Warning +org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Suggested parenthesis around expression\\")",paramNot\=>false} +org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem=Warning +org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Suspicious semicolon\\")",else\=>false,afterelse\=>false} +org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Type cannot be resolved\\")"} +org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem=Warning +org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused function declaration\\")",macro\=>true} +org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem=Warning +org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused static function\\")",macro\=>true} +org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem=Warning +org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused variable declaration in file scope\\")",macro\=>true,exceptions\=>("@(\#)","$Id")} +org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Symbol is not resolved\\")"} diff --git a/CMakeLists.txt b/CMakeLists.txt index 45672130e..4fec8cdb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -363,6 +363,12 @@ IF(ENABLE_CUDA) SET(OLD_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") STRING(REGEX REPLACE "-std=c\\+\\+.." "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + # Thrust requires exceptions. If OpenCL from NVidia is used we don't need this flag. + STRING(REGEX REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + MESSAGE(STATUS "*************") + mESSAGE(STATUS ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS ${CUDA_FLAGS}) + MESSAGE(STATUS "*************") CUDA_COMPILE(CUDA_OBJECTS src/cuda_depth_packet_processor.cu src/cuda_kde_depth_packet_processor.cu diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index 7459eacc6..d27517b59 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -23,5 +23,27 @@ * Binary distributions must follow the binary distribution requirements of * either License. */ - + +/** @file Implementation of merging depth and color images using cuda. */ + #include + +namespace libfreenect2 +{ + +/* + * The information used here has been taken from libfreenect2::Registration source + * code. + */ +static const float depth_q = 0.01; +static const float color_q = 0.002199; + +CudaRegistration::CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p): + impl_(new RegistrationImpl(depth_p, rgb_p)) {} + +CudaRegistration::~CudaRegistration() +{ + delete impl_; +} + +} /* namespace libfreenect2 */ \ No newline at end of file From a4453bf8528c897e508931b224a7c73deb78d0b3 Mon Sep 17 00:00:00 2001 From: blackzafiro Date: Tue, 7 Mar 2017 21:05:37 -0600 Subject: [PATCH 03/25] Began implementation of CudaRegistration. CudaRegistration is properly initialized, with depth and color maps. --- .gitignore | 5 + src/cuda_registration.cu | 204 ++++++++++++++++++++++++++++++++++++++- src/registration.cpp | 4 +- 3 files changed, 209 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 6926523a6..5f00aeb6c 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,11 @@ ClientBin/ *.pfx *.publishsettings +# Nsight Nvidia Eclipse +.cproject +.project +.settings/ + # RIA/Silverlight projects Generated_Code/ diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index d27517b59..78f571504 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -27,6 +27,92 @@ /** @file Implementation of merging depth and color images using cuda. */ #include +#include "libfreenect2/logging.h" + +#define MONO_ROWS 424 +#define MONO_COLS 512 + + +typedef unsigned char uchar; + +#define CHECK_CUDA(expr) do { cudaError_t err = (expr); if (err != cudaSuccess) { LOG_ERROR << #expr ": " << cudaGetErrorString(err); return false; } } while(0) +#define CALL_CUDA(expr) do { cudaError_t err = (expr); if (err != cudaSuccess) { LOG_ERROR << #expr ": " << cudaGetErrorString(err); } } while(0) + +static __device__ +void distort(int mx, int my, float& d_x, float& d_y, const libfreenect2::Freenect2Device::IrCameraParams& d_depth) +{ + float dx = ((float)mx - d_depth.cx) / d_depth.fx; + float dy = ((float)my - d_depth.cy) / d_depth.fy; + float dx2 = dx * dx; + float dy2 = dy * dy; + float r2 = dx2 + dy2; + float dxdy2 = 2 * dx * dy; + float kr = 1 + ((d_depth.k3 * r2 + d_depth.k2) * r2 + d_depth.k1) * r2; + d_x = d_depth.fx * (dx * kr + d_depth.p2 * (r2 + 2 * dx2) + d_depth.p1 * dxdy2) + d_depth.cx; + d_y = d_depth.fy * (dy * kr + d_depth.p1 * (r2 + 2 * dy2) + d_depth.p2 * dxdy2) + d_depth.cy; +} + +static __device__ +void depth_to_color(float mx, float my, float& d_rx, float& d_ry, + const libfreenect2::Freenect2Device::IrCameraParams& d_depth, + const libfreenect2::Freenect2Device::ColorCameraParams& d_color, + const float depth_q, const float color_q) +{ + mx = (mx - d_depth.cx) * depth_q; + my = (my - d_depth.cy) * depth_q; + + float wx = + (mx * mx * mx * d_color.mx_x3y0) + (my * my * my * d_color.mx_x0y3) + + (mx * mx * my * d_color.mx_x2y1) + (my * my * mx * d_color.mx_x1y2) + + (mx * mx * d_color.mx_x2y0) + (my * my * d_color.mx_x0y2) + (mx * my * d_color.mx_x1y1) + + (mx * d_color.mx_x1y0) + (my * d_color.mx_x0y1) + (d_color.mx_x0y0); + + float wy = + (mx * mx * mx * d_color.my_x3y0) + (my * my * my * d_color.my_x0y3) + + (mx * mx * my * d_color.my_x2y1) + (my * my * mx * d_color.my_x1y2) + + (mx * mx * d_color.my_x2y0) + (my * my * d_color.my_x0y2) + (mx * my * d_color.my_x1y1) + + (mx * d_color.my_x1y0) + (my * d_color.my_x0y1) + (d_color.my_x0y0); + + d_rx = (wx / (d_color.fx * color_q)) - (d_color.shift_m / d_color.shift_d); + d_ry = (wy / color_q) + d_color.cy; +} + +static __global__ +void dInitMaps(int* d_map_dist, float* d_map_x, float* d_map_y, float* d_map_yi, + const libfreenect2::Freenect2Device::IrCameraParams d_depth, + const libfreenect2::Freenect2Device::ColorCameraParams d_color, + const float depth_q, const float color_q) +{ + // Configuration copied from cuda_depth_packet_processor.cu + const uint i = blockIdx.x*blockDim.x + threadIdx.x; + + const uint x = i % MONO_COLS; + const uint y = i / MONO_COLS; + + float mx, my; + int ix, iy, index; + float rx, ry; + + // compute the distorted coordinate for current pixel + distort(x, y, mx, my, d_depth); + + // rounding the values and check if the pixel is inside the image + ix = (int)(mx + 0.5f); + iy = (int)(my + 0.5f); + if(ix < 0 || ix >= 512 || iy < 0 || iy >= 424) + index = -1; + else + // computing the index from the coordinates for faster access to the data + index = iy * 512 + ix; + d_map_dist[i] = index; + + // compute the depth to color mapping entries for the current pixel + depth_to_color(x, y, rx, ry, d_depth, d_color, depth_q, color_q); + d_map_x[i] = rx; + d_map_y[i] = ry; + // compute the y offset to minimize later computations + d_map_yi[i] = (int)(ry + 0.5f); +} namespace libfreenect2 { @@ -38,12 +124,126 @@ namespace libfreenect2 static const float depth_q = 0.01; static const float color_q = 0.002199; +class CudaRegistrationImpl +{ +public: + CudaRegistrationImpl(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p): + depth(depth_p), color(rgb_p), + filter_width_half(2), filter_height_half(1), filter_tolerance(0.01f), + block_size(128), grid_size(MONO_IMAGE_SIZE/block_size) + { + good = setupDevice(); + if (!good) + return; + + good = initMaps(); + if (!good) + return; + } + + ~CudaRegistrationImpl() + { + if (good) + freeDeviceMemory(); + } + + void apply(int dx, int dy, float dz, float& cx, float &cy) const; + void apply(const Frame* rgb, const Frame* depth, Frame* undistorted, Frame* registered, const bool enable_filter, Frame* bigdepth, int* color_depth_map) const; + void undistortDepth(const Frame *depth, Frame *undistorted) const; + void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; + void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; + void distort(int mx, int my, float& dx, float& dy) const; + void depth_to_color(float mx, float my, float& rx, float& ry) const; + +private: + Freenect2Device::IrCameraParams depth; ///< Depth camera parameters. + Freenect2Device::ColorCameraParams color; ///< Color camera parameters. + + const int filter_width_half; + const int filter_height_half; + const float filter_tolerance; + + static const size_t MONO_IMAGE_SIZE = MONO_COLS * MONO_ROWS; + + size_t block_size; + size_t grid_size; + + bool good; // Memory correctly allocated + + // Maps + int* d_distort_map; + float* d_depth_to_color_map_x; + float* d_depth_to_color_map_y; + float* d_depth_to_color_map_yi; + + bool allocateDeviceMemory() + { + CHECK_CUDA(cudaMalloc(&d_distort_map, MONO_IMAGE_SIZE * sizeof(int))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_x, MONO_IMAGE_SIZE * sizeof(float))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_y, MONO_IMAGE_SIZE * sizeof(float))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_yi, MONO_IMAGE_SIZE * sizeof(float))); + + cudaDeviceSynchronize(); + + CHECK_CUDA(cudaGetLastError()); + return true; + } + + bool setupDevice() + { + // Continue to use same device than cuda_depth_packet_processor? + if (!allocateDeviceMemory()) + return false; + + return true; + } + + bool initMaps() + { + dInitMaps<<>>(d_distort_map, d_depth_to_color_map_x, + d_depth_to_color_map_y, d_depth_to_color_map_yi, + depth, color, depth_q, color_q); + + cudaDeviceSynchronize(); + CHECK_CUDA(cudaGetLastError()); + + return true; + } + + void freeDeviceMemory() + { + CALL_CUDA(cudaFree(d_distort_map)); + CALL_CUDA(cudaFree(d_depth_to_color_map_x)); + CALL_CUDA(cudaFree(d_depth_to_color_map_y)); + CALL_CUDA(cudaFree(d_depth_to_color_map_yi)); + } +}; + CudaRegistration::CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p): - impl_(new RegistrationImpl(depth_p, rgb_p)) {} + impl_(new CudaRegistrationImpl(depth_p, rgb_p)) {} CudaRegistration::~CudaRegistration() { delete impl_; } -} /* namespace libfreenect2 */ \ No newline at end of file +void CudaRegistration::apply(const Frame* rgb, const Frame* depth, Frame* undistorted, Frame* registered, const bool enable_filter, Frame* bigdepth, int* color_depth_map) const +{ + impl_->apply(rgb, depth, undistorted, registered, enable_filter, bigdepth, color_depth_map); +} + +void CudaRegistrationImpl::apply(const Frame *rgb, const Frame *depth, Frame *undistorted, Frame *registered, const bool enable_filter, Frame *bigdepth, int *color_depth_map) const +{ + // Check if all frames are valid and have the correct size + if (!rgb || !depth || !undistorted || !registered || + rgb->width != 1920 || rgb->height != 1080 || rgb->bytes_per_pixel != 4 || + depth->width != 512 || depth->height != 424 || depth->bytes_per_pixel != 4 || + undistorted->width != 512 || undistorted->height != 424 || undistorted->bytes_per_pixel != 4 || + registered->width != 512 || registered->height != 424 || registered->bytes_per_pixel != 4) + { + LOG_ERROR << "Not applying" << std::endl; + return; + } +} + +} /* namespace libfreenect2 */ diff --git a/src/registration.cpp b/src/registration.cpp index 49a3b03e0..b4a9ddd07 100644 --- a/src/registration.cpp +++ b/src/registration.cpp @@ -380,7 +380,7 @@ RegistrationImpl::RegistrationImpl(Freenect2Device::IrCameraParams depth_p, Free for (int y = 0; y < 424; y++) { for (int x = 0; x < 512; x++) { - // compute the dirstored coordinate for current pixel + // compute the distorted coordinate for current pixel distort(x,y,mx,my); // rounding the values and check if the pixel is inside the image ix = (int)(mx + 0.5f); @@ -388,7 +388,7 @@ RegistrationImpl::RegistrationImpl(Freenect2Device::IrCameraParams depth_p, Free if(ix < 0 || ix >= 512 || iy < 0 || iy >= 424) index = -1; else - // computing the index from the coordianted for faster access to the data + // computing the index from the coordinates for faster access to the data index = iy * 512 + ix; *map_dist++ = index; From 2199537a58b06020979ee1897fa45ae913fe5e40 Mon Sep 17 00:00:00 2001 From: blackzafiro Date: Tue, 21 Mar 2017 17:17:02 -0600 Subject: [PATCH 04/25] Added frame for cuda device data. Began implementing apply. Apply with cuda, filtering is not ready. Finished registration with cuda, still untested. Added LIBFREENECT2_API to CudaDeviceFrame. CudaRegistration apply is working. --- include/libfreenect2/cuda_registration.h | 19 +- src/cuda_registration.cu | 325 ++++++++++++++++++++++- 2 files changed, 335 insertions(+), 9 deletions(-) diff --git a/include/libfreenect2/cuda_registration.h b/include/libfreenect2/cuda_registration.h index c79071ea7..76c995e0d 100644 --- a/include/libfreenect2/cuda_registration.h +++ b/include/libfreenect2/cuda_registration.h @@ -46,6 +46,23 @@ namespace libfreenect2 typedef thrust::tuple TupleXYZRGB; +/** + * Frame whose data is allocated on device. + */ +class LIBFREENECT2_API CudaDeviceFrame: public Frame +{ +public: + /** Construct a new frame. + * @param width Width in pixel + * @param height Height in pixel + * @param bytes_per_pixel Bytes per pixel + */ + CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel); + virtual ~CudaDeviceFrame(); +private: + bool allocateMemory(); +}; + class CudaRegistrationImpl; /** @defgroup registration Registration and Geometry @@ -83,7 +100,7 @@ class LIBFREENECT2_API CudaRegistration * @param[out] bigdepth If not `NULL`, return mapping of depth onto colors (1920x1082 float). **1082** not 1080, with a blank top and bottom row. * @param[out] color_depth_map Index of mapped color pixel for each depth pixel (512x424). */ - void apply(const Frame* rgb, const Frame* depth, Frame* undistorted, Frame* registered, const bool enable_filter = true, Frame* bigdepth = 0, int* color_depth_map = 0) const; + bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter = true, CudaDeviceFrame* bigdepth = 0, int* color_depth_map = 0) const; /** Undistort depth * @param depth Depth image (512x424 float) diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index 78f571504..7b0079008 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -28,9 +28,12 @@ #include #include "libfreenect2/logging.h" +#include #define MONO_ROWS 424 #define MONO_COLS 512 +#define COLOR_ROWS 1080 +#define COLOR_COLS 1920 typedef unsigned char uchar; @@ -78,7 +81,7 @@ void depth_to_color(float mx, float my, float& d_rx, float& d_ry, } static __global__ -void dInitMaps(int* d_map_dist, float* d_map_x, float* d_map_y, float* d_map_yi, +void dInitMaps(int* d_map_dist, float* d_map_x, float* d_map_y, int* d_map_yi, const libfreenect2::Freenect2Device::IrCameraParams d_depth, const libfreenect2::Freenect2Device::ColorCameraParams d_color, const float depth_q, const float color_q) @@ -114,9 +117,189 @@ void dInitMaps(int* d_map_dist, float* d_map_x, float* d_map_y, float* d_map_yi, d_map_yi[i] = (int)(ry + 0.5f); } +static __global__ +void setFloat(float* devPtr, float value) +{ + // Configuration copied from cuda_depth_packet_processor.cu + const uint i = blockIdx.x * blockDim.x + threadIdx.x; + + devPtr[i] = value; +} + +/** + * Set all values of array of floats devPtr to value. + * This function does not call for synchronization. + * @param devPtr pointer to memory in device + * @param value value to set + * @param size number of float sized elements in array + */ +void cudaMemsetFloat(float* devPtr, float value, size_t size) +{ + size_t numThreads = 512; + size_t numBlocks = size / numThreads; + setFloat<<>>(devPtr, value); +} + +/** + * Compares value at address with val, if val is smaller it + * saves it at address. + */ +__device__ float atomicKeepSmaller(float* address, float val) +{ + // Implementation addapted from http://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions + int* address_as_ull = (int*)address; + int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(val < __int_as_float(assumed) ? val : __int_as_float(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + + +static __global__ +void undistort(int* d_depth_to_c_off, + float* d_undistorted_data, + float* d_filter_map, + const float* d_depth_data, const int* d_map_dist, + const float* d_map_x, const int* d_map_yi, + const libfreenect2::Freenect2Device::IrCameraParams depth, + const libfreenect2::Freenect2Device::ColorCameraParams color, + const int filter_width_half, + const int filter_height_half, + const int offset_filter_map, + const bool enable_filter) +{ + // getting index of distorted depth pixel + const int i = blockIdx.x * blockDim.x + threadIdx.x; + const int index = d_map_dist[i]; + + // check if distorted depth pixel is outside of the depth image + if(index < 0){ + d_depth_to_c_off[i] = -1; + d_undistorted_data[i] = 0; + return; + } + + // getting depth value for current pixel + const float z = d_depth_data[index]; + d_undistorted_data[i] = z; + + // checking for invalid depth value + if(z <= 0.0f){ + d_depth_to_c_off[i] = -1; + return; + } + + // calculating x offset for rgb image based on depth value + const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding + const float rx = (d_map_x[index] + (color.shift_m / z)) * color.fx + color_cx; + const int cx = rx; // same as round for positive numbers (0.5f was already added to color_cx) + // getting y offset for depth image + const int cy = d_map_yi[i]; + // combining offsets + const int c_off = cx + cy * COLOR_COLS; + + // check if c_off is outside of rgb image + // checking rx/cx is not needed because the color image is much wider then the depth image + if(c_off < 0 || c_off >= COLOR_ROWS * COLOR_COLS){ + d_depth_to_c_off[i] = -1; + return; + } + + // saving the offset for later + d_depth_to_c_off[i] = c_off; + + // I am not sure if there won't be race conditions here due to overlap, the atomic operation should help. + if(enable_filter){ + // setting a window around the filter map pixel corresponding to the color pixel with the current z value + int yi = (cy - filter_height_half) * 1920 + cx - filter_width_half; // index of first pixel to set + for(int r = -filter_height_half; r <= filter_height_half; ++r, yi += COLOR_COLS) // index increased by a full row each iteration + { + float *it = d_filter_map + offset_filter_map + yi; + for(int c = -filter_width_half; c <= filter_width_half; ++c, ++it) + { + // only set if the current z is smaller + atomicKeepSmaller(it, z); + } + } + } +} + +/** Construct 'registered' image with filter. + * Filter drops duplicate pixels due to aspect of two cameras. + */ +static __global__ +void registerImageFiltered(unsigned int *d_registered_data, + const unsigned int * d_rgb_data, + const int* d_depth_to_c_off, + const float* d_undistorted_data, + const float *d_p_filter_map, + const float filter_tolerance) +{ + // getting index of distorted depth pixel + const int i = blockIdx.x * blockDim.x + threadIdx.x; + + // run through all registered color pixels and set them based on filter results + const int c_off = d_depth_to_c_off[i]; + + // check if offset is out of image + if(c_off < 0){ + d_registered_data[i] = 0; + return; + } + + const float min_z = d_p_filter_map[c_off]; + const float z = d_undistorted_data[i]; + + // check for allowed depth noise + d_registered_data[i] = (z - min_z) / z > filter_tolerance ? 0 : d_rgb_data[c_off]; + +} + +/** Construct 'registered' image. */ +static __global__ +void registerImage(unsigned int *d_registered_data, + const unsigned int * d_rgb_data, + const int* d_depth_to_c_off) +{ + // getting index of distorted depth pixel + const int i = blockIdx.x * blockDim.x + threadIdx.x; + + // run through all registered color pixels and set them based on c_off + const int c_off = d_depth_to_c_off[i]; + + // check if offset is out of image + d_registered_data[i] = c_off < 0 ? 0 : d_rgb_data[c_off]; +} + namespace libfreenect2 { +CudaDeviceFrame::CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel): + Frame(width, height, bytes_per_pixel, (unsigned char*)-1) +{ + allocateMemory(); +} + +CudaDeviceFrame::~CudaDeviceFrame() +{ + CALL_CUDA(cudaFree(data)); +} + +bool CudaDeviceFrame::allocateMemory() +{ + CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); + + cudaDeviceSynchronize(); + + CHECK_CUDA(cudaGetLastError()); + return true; +} + /* * The information used here has been taken from libfreenect2::Registration source * code. @@ -148,7 +331,7 @@ public: } void apply(int dx, int dy, float dz, float& cx, float &cy) const; - void apply(const Frame* rgb, const Frame* depth, Frame* undistorted, Frame* registered, const bool enable_filter, Frame* bigdepth, int* color_depth_map) const; + bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const; void undistortDepth(const Frame *depth, Frame *undistorted) const; void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; @@ -174,14 +357,14 @@ private: int* d_distort_map; float* d_depth_to_color_map_x; float* d_depth_to_color_map_y; - float* d_depth_to_color_map_yi; + int* d_depth_to_color_map_yi; bool allocateDeviceMemory() { CHECK_CUDA(cudaMalloc(&d_distort_map, MONO_IMAGE_SIZE * sizeof(int))); CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_x, MONO_IMAGE_SIZE * sizeof(float))); CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_y, MONO_IMAGE_SIZE * sizeof(float))); - CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_yi, MONO_IMAGE_SIZE * sizeof(float))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_yi, MONO_IMAGE_SIZE * sizeof(int))); cudaDeviceSynchronize(); @@ -227,12 +410,12 @@ CudaRegistration::~CudaRegistration() delete impl_; } -void CudaRegistration::apply(const Frame* rgb, const Frame* depth, Frame* undistorted, Frame* registered, const bool enable_filter, Frame* bigdepth, int* color_depth_map) const +bool CudaRegistration::apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const { - impl_->apply(rgb, depth, undistorted, registered, enable_filter, bigdepth, color_depth_map); + return impl_->apply(rgb, depth, undistorted, registered, enable_filter, bigdepth, color_depth_map); } -void CudaRegistrationImpl::apply(const Frame *rgb, const Frame *depth, Frame *undistorted, Frame *registered, const bool enable_filter, Frame *bigdepth, int *color_depth_map) const +bool CudaRegistrationImpl::apply(const Frame *rgb, const Frame *depth, CudaDeviceFrame *undistorted, CudaDeviceFrame *registered, const bool enable_filter, CudaDeviceFrame *bigdepth, int *color_depth_map) const { // Check if all frames are valid and have the correct size if (!rgb || !depth || !undistorted || !registered || @@ -242,8 +425,134 @@ void CudaRegistrationImpl::apply(const Frame *rgb, const Frame *depth, Frame *un registered->width != 512 || registered->height != 424 || registered->bytes_per_pixel != 4) { LOG_ERROR << "Not applying" << std::endl; - return; + return false; } + + // Setup memory + + float *d_depth_data; + size_t depth_size = depth->width * depth->height * sizeof(float); + unsigned int *d_rgb_data; + size_t rgb_size = rgb->width * rgb->height * sizeof(unsigned int); + + CHECK_CUDA(cudaMalloc(&d_depth_data, depth_size)); + cudaMemcpy((void*)d_depth_data, + (const void*)depth->data, depth_size, + cudaMemcpyHostToDevice); + + CHECK_CUDA(cudaMalloc(&d_rgb_data, rgb_size)); + cudaMemcpy((void*)d_rgb_data, + (const void*)rgb->data, rgb_size, + cudaMemcpyHostToDevice); + + cudaDeviceSynchronize(); + CHECK_CUDA(cudaGetLastError()); + + float *d_undistorted_data = (float*)undistorted->data; + unsigned int *d_registered_data = (unsigned int*)registered->data; + const int *d_map_dist = d_distort_map; + const float *d_map_x = d_depth_to_color_map_x; + const int *d_map_yi = d_depth_to_color_map_yi; + + + // Setup parameters + + const int size_depth = MONO_ROWS * MONO_COLS; + const int size_color = COLOR_ROWS * COLOR_COLS; + //const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding + + // size of filter map with a border of filter_height_half on top and bottom so that no check for borders is needed. + // since the color image is wide angle no border to the sides is needed. + const int size_filter_map = size_color + COLOR_COLS * filter_height_half * 2; + // offset to the important data + const int offset_filter_map = COLOR_COLS * filter_height_half; + + + // Auxiliary maps + + // map for storing the min z values used for each color pixel + float *d_filter_map = NULL; + // pointer to the beginning of the important data + float *d_p_filter_map = NULL; + + // map for storing the color offset for each depth pixel + int *d_depth_to_c_off; + CHECK_CUDA(cudaMalloc(&d_depth_to_c_off, size_depth * sizeof(int))); + if (color_depth_map) + { + // I don't know where this other color map could be coming from, + // so for the moment I will assume it is in host memory. + cudaMemcpy((void*)d_depth_to_c_off, + (const void*)color_depth_map, size_depth * sizeof(int), + cudaMemcpyHostToDevice); + } + //int *map_c_off = depth_to_c_off; + + // initializing the depth_map with values outside of the Kinect2 range + if(enable_filter){ + if(bigdepth) + { + d_filter_map = (float*)bigdepth->data; + } + else + { + CHECK_CUDA(cudaMalloc(&d_filter_map, size_filter_map * sizeof(float))); + } + d_p_filter_map = d_filter_map + offset_filter_map; // works the same even on device + + cudaMemsetFloat(d_filter_map, std::numeric_limits::infinity(), size_filter_map); + } + + /* Fix depth distortion, and compute pixel to use from 'rgb' based on depth measurement, + * stored as x/y offset in the rgb data. + */ + undistort<<>>(d_depth_to_c_off, + d_undistorted_data, d_filter_map, + d_depth_data, d_map_dist, + d_map_x, d_map_yi, + this->depth, this->color, filter_width_half, filter_height_half, offset_filter_map, enable_filter); + if (enable_filter) + { + registerImageFiltered<<>>(d_registered_data, + d_rgb_data, + d_depth_to_c_off, + d_undistorted_data, + d_p_filter_map, + filter_tolerance); + if (!bigdepth) + { + CALL_CUDA(cudaFree(d_filter_map)); + } + } + else + { + registerImage<<>>(d_registered_data, + d_rgb_data, + d_depth_to_c_off); + } + + // Finish + + // -1 represents Invalid + //undistorted->format = undistorted->Float; + //registered->format = registered->BGRX; + + + if (color_depth_map) + { + // I don't know where this other color map could be coming from, + // so for the moment I will assume it is in host memory. + // Placing it back to where it came from + cudaMemcpy((void*)color_depth_map, + (const void*)d_depth_to_c_off, size_depth * sizeof(int), + cudaMemcpyDeviceToHost); + } + CALL_CUDA(cudaFree(d_depth_to_c_off)); + + CALL_CUDA(cudaFree(d_depth_data)); + CALL_CUDA(cudaFree(d_rgb_data)); + + return true; } } /* namespace libfreenect2 */ From 46fd5ab6144ef325640673c981dcfd876a97b62f Mon Sep 17 00:00:00 2001 From: blackzafiro Date: Tue, 4 Apr 2017 17:02:01 -0500 Subject: [PATCH 05/25] Added diretory 'compile' to .gitignore. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5f00aeb6c..1121c4575 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build +compile # Dependency folders depends/*/ From ba92c419d52b69cceb7b946c11176d8c2c0ec4ec Mon Sep 17 00:00:00 2001 From: blackzafiro Date: Tue, 4 Apr 2017 17:12:56 -0500 Subject: [PATCH 06/25] Removed Eclipse auxiliary files --- .cproject | 52 ---------------- .project | 27 -------- .settings/language.settings.xml | 15 ----- .settings/org.eclipse.cdt.codan.core.prefs | 71 ---------------------- 4 files changed, 165 deletions(-) delete mode 100644 .cproject delete mode 100644 .project delete mode 100644 .settings/language.settings.xml delete mode 100644 .settings/org.eclipse.cdt.codan.core.prefs diff --git a/.cproject b/.cproject deleted file mode 100644 index d76b773eb..000000000 --- a/.cproject +++ /dev/null @@ -1,52 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/.project b/.project deleted file mode 100644 index 0ae1eed8b..000000000 --- a/.project +++ /dev/null @@ -1,27 +0,0 @@ - - - libfreenect2 - - - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - diff --git a/.settings/language.settings.xml b/.settings/language.settings.xml deleted file mode 100644 index f165e0141..000000000 --- a/.settings/language.settings.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/.settings/org.eclipse.cdt.codan.core.prefs b/.settings/org.eclipse.cdt.codan.core.prefs deleted file mode 100644 index b5248c620..000000000 --- a/.settings/org.eclipse.cdt.codan.core.prefs +++ /dev/null @@ -1,71 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.cdt.codan.checkers.errnoreturn=Warning -org.eclipse.cdt.codan.checkers.errnoreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"No return\\")",implicit\=>false} -org.eclipse.cdt.codan.checkers.errreturnvalue=Error -org.eclipse.cdt.codan.checkers.errreturnvalue.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused return value\\")"} -org.eclipse.cdt.codan.checkers.nocommentinside=-Error -org.eclipse.cdt.codan.checkers.nocommentinside.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Nesting comments\\")"} -org.eclipse.cdt.codan.checkers.nolinecomment=-Error -org.eclipse.cdt.codan.checkers.nolinecomment.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Line comments\\")"} -org.eclipse.cdt.codan.checkers.noreturn=Error -org.eclipse.cdt.codan.checkers.noreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"No return value\\")",implicit\=>false} -org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation=Error -org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Abstract class cannot be instantiated\\")"} -org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem=Error -org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Ambiguous problem\\")"} -org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem=Warning -org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Assignment in condition\\")"} -org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem=Error -org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Assignment to itself\\")"} -org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem=Warning -org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"No break at end of case\\")",no_break_comment\=>"no break",last_case_param\=>false,empty_case_param\=>false} -org.eclipse.cdt.codan.internal.checkers.CatchByReference=Warning -org.eclipse.cdt.codan.internal.checkers.CatchByReference.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Catching by reference is recommended\\")",unknown\=>false,exceptions\=>()} -org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem=Error -org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Circular inheritance\\")"} -org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization=Warning -org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Class members should be properly initialized\\")",skip\=>true} -org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem=Error -org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Field cannot be resolved\\")"} -org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem=Error -org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Function cannot be resolved\\")"} -org.eclipse.cdt.codan.internal.checkers.InvalidArguments=Error -org.eclipse.cdt.codan.internal.checkers.InvalidArguments.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid arguments\\")"} -org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem=Error -org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid template argument\\")"} -org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem=Error -org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Label statement not found\\")"} -org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem=Error -org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Member declaration not found\\")"} -org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem=Error -org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Method cannot be resolved\\")"} -org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker=-Info -org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Name convention for function\\")",pattern\=>"^[a-z]",macro\=>true,exceptions\=>()} -org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem=Warning -org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Class has a virtual method and non-virtual destructor\\")"} -org.eclipse.cdt.codan.internal.checkers.OverloadProblem=Error -org.eclipse.cdt.codan.internal.checkers.OverloadProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid overload\\")"} -org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem=Error -org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid redeclaration\\")"} -org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem=Error -org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Invalid redefinition\\")"} -org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem=-Warning -org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Return with parenthesis\\")"} -org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem=-Warning -org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Format String Vulnerability\\")"} -org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem=Warning -org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Statement has no effect\\")",macro\=>true,exceptions\=>()} -org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem=Warning -org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Suggested parenthesis around expression\\")",paramNot\=>false} -org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem=Warning -org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Suspicious semicolon\\")",else\=>false,afterelse\=>false} -org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem=Error -org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Type cannot be resolved\\")"} -org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem=Warning -org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused function declaration\\")",macro\=>true} -org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem=Warning -org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused static function\\")",macro\=>true} -org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem=Warning -org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Unused variable declaration in file scope\\")",macro\=>true,exceptions\=>("@(\#)","$Id")} -org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem=Error -org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},suppression_comment\=>"@suppress(\\"Symbol is not resolved\\")"} From 601df003e216070796b422d33aad5f07e1a70398 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Fri, 23 Aug 2024 11:15:57 -0600 Subject: [PATCH 07/25] Fixed spaces vs tab convention --- src/cuda_registration.cu | 776 +++++++++++++++++++-------------------- 1 file changed, 388 insertions(+), 388 deletions(-) diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index 7b0079008..e0c8872ee 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -44,15 +44,15 @@ typedef unsigned char uchar; static __device__ void distort(int mx, int my, float& d_x, float& d_y, const libfreenect2::Freenect2Device::IrCameraParams& d_depth) { - float dx = ((float)mx - d_depth.cx) / d_depth.fx; - float dy = ((float)my - d_depth.cy) / d_depth.fy; - float dx2 = dx * dx; - float dy2 = dy * dy; - float r2 = dx2 + dy2; - float dxdy2 = 2 * dx * dy; - float kr = 1 + ((d_depth.k3 * r2 + d_depth.k2) * r2 + d_depth.k1) * r2; - d_x = d_depth.fx * (dx * kr + d_depth.p2 * (r2 + 2 * dx2) + d_depth.p1 * dxdy2) + d_depth.cx; - d_y = d_depth.fy * (dy * kr + d_depth.p1 * (r2 + 2 * dy2) + d_depth.p2 * dxdy2) + d_depth.cy; + float dx = ((float)mx - d_depth.cx) / d_depth.fx; + float dy = ((float)my - d_depth.cy) / d_depth.fy; + float dx2 = dx * dx; + float dy2 = dy * dy; + float r2 = dx2 + dy2; + float dxdy2 = 2 * dx * dy; + float kr = 1 + ((d_depth.k3 * r2 + d_depth.k2) * r2 + d_depth.k1) * r2; + d_x = d_depth.fx * (dx * kr + d_depth.p2 * (r2 + 2 * dx2) + d_depth.p1 * dxdy2) + d_depth.cx; + d_y = d_depth.fy * (dy * kr + d_depth.p1 * (r2 + 2 * dy2) + d_depth.p2 * dxdy2) + d_depth.cy; } static __device__ @@ -61,23 +61,23 @@ void depth_to_color(float mx, float my, float& d_rx, float& d_ry, const libfreenect2::Freenect2Device::ColorCameraParams& d_color, const float depth_q, const float color_q) { - mx = (mx - d_depth.cx) * depth_q; - my = (my - d_depth.cy) * depth_q; - - float wx = - (mx * mx * mx * d_color.mx_x3y0) + (my * my * my * d_color.mx_x0y3) + - (mx * mx * my * d_color.mx_x2y1) + (my * my * mx * d_color.mx_x1y2) + - (mx * mx * d_color.mx_x2y0) + (my * my * d_color.mx_x0y2) + (mx * my * d_color.mx_x1y1) + - (mx * d_color.mx_x1y0) + (my * d_color.mx_x0y1) + (d_color.mx_x0y0); - - float wy = - (mx * mx * mx * d_color.my_x3y0) + (my * my * my * d_color.my_x0y3) + - (mx * mx * my * d_color.my_x2y1) + (my * my * mx * d_color.my_x1y2) + - (mx * mx * d_color.my_x2y0) + (my * my * d_color.my_x0y2) + (mx * my * d_color.my_x1y1) + - (mx * d_color.my_x1y0) + (my * d_color.my_x0y1) + (d_color.my_x0y0); - - d_rx = (wx / (d_color.fx * color_q)) - (d_color.shift_m / d_color.shift_d); - d_ry = (wy / color_q) + d_color.cy; + mx = (mx - d_depth.cx) * depth_q; + my = (my - d_depth.cy) * depth_q; + + float wx = + (mx * mx * mx * d_color.mx_x3y0) + (my * my * my * d_color.mx_x0y3) + + (mx * mx * my * d_color.mx_x2y1) + (my * my * mx * d_color.mx_x1y2) + + (mx * mx * d_color.mx_x2y0) + (my * my * d_color.mx_x0y2) + (mx * my * d_color.mx_x1y1) + + (mx * d_color.mx_x1y0) + (my * d_color.mx_x0y1) + (d_color.mx_x0y0); + + float wy = + (mx * mx * mx * d_color.my_x3y0) + (my * my * my * d_color.my_x0y3) + + (mx * mx * my * d_color.my_x2y1) + (my * my * mx * d_color.my_x1y2) + + (mx * mx * d_color.my_x2y0) + (my * my * d_color.my_x0y2) + (mx * my * d_color.my_x1y1) + + (mx * d_color.my_x1y0) + (my * d_color.my_x0y1) + (d_color.my_x0y0); + + d_rx = (wx / (d_color.fx * color_q)) - (d_color.shift_m / d_color.shift_d); + d_ry = (wy / color_q) + d_color.cy; } static __global__ @@ -86,44 +86,44 @@ void dInitMaps(int* d_map_dist, float* d_map_x, float* d_map_y, int* d_map_yi, const libfreenect2::Freenect2Device::ColorCameraParams d_color, const float depth_q, const float color_q) { - // Configuration copied from cuda_depth_packet_processor.cu - const uint i = blockIdx.x*blockDim.x + threadIdx.x; + // Configuration copied from cuda_depth_packet_processor.cu + const uint i = blockIdx.x*blockDim.x + threadIdx.x; - const uint x = i % MONO_COLS; - const uint y = i / MONO_COLS; + const uint x = i % MONO_COLS; + const uint y = i / MONO_COLS; - float mx, my; - int ix, iy, index; - float rx, ry; + float mx, my; + int ix, iy, index; + float rx, ry; - // compute the distorted coordinate for current pixel - distort(x, y, mx, my, d_depth); + // compute the distorted coordinate for current pixel + distort(x, y, mx, my, d_depth); - // rounding the values and check if the pixel is inside the image - ix = (int)(mx + 0.5f); - iy = (int)(my + 0.5f); - if(ix < 0 || ix >= 512 || iy < 0 || iy >= 424) + // rounding the values and check if the pixel is inside the image + ix = (int)(mx + 0.5f); + iy = (int)(my + 0.5f); + if(ix < 0 || ix >= 512 || iy < 0 || iy >= 424) index = -1; - else - // computing the index from the coordinates for faster access to the data - index = iy * 512 + ix; - d_map_dist[i] = index; - - // compute the depth to color mapping entries for the current pixel - depth_to_color(x, y, rx, ry, d_depth, d_color, depth_q, color_q); - d_map_x[i] = rx; - d_map_y[i] = ry; - // compute the y offset to minimize later computations - d_map_yi[i] = (int)(ry + 0.5f); + else + // computing the index from the coordinates for faster access to the data + index = iy * 512 + ix; + d_map_dist[i] = index; + + // compute the depth to color mapping entries for the current pixel + depth_to_color(x, y, rx, ry, d_depth, d_color, depth_q, color_q); + d_map_x[i] = rx; + d_map_y[i] = ry; + // compute the y offset to minimize later computations + d_map_yi[i] = (int)(ry + 0.5f); } static __global__ void setFloat(float* devPtr, float value) { - // Configuration copied from cuda_depth_packet_processor.cu - const uint i = blockIdx.x * blockDim.x + threadIdx.x; + // Configuration copied from cuda_depth_packet_processor.cu + const uint i = blockIdx.x * blockDim.x + threadIdx.x; - devPtr[i] = value; + devPtr[i] = value; } /** @@ -135,9 +135,9 @@ void setFloat(float* devPtr, float value) */ void cudaMemsetFloat(float* devPtr, float value, size_t size) { - size_t numThreads = 512; - size_t numBlocks = size / numThreads; - setFloat<<>>(devPtr, value); + size_t numThreads = 512; + size_t numBlocks = size / numThreads; + setFloat<<>>(devPtr, value); } /** @@ -146,87 +146,87 @@ void cudaMemsetFloat(float* devPtr, float value, size_t size) */ __device__ float atomicKeepSmaller(float* address, float val) { - // Implementation addapted from http://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions - int* address_as_ull = (int*)address; - int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __float_as_int(val < __int_as_float(assumed) ? val : __int_as_float(assumed))); - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while (assumed != old); - - return __int_as_float(old); + // Implementation addapted from http://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions + int* address_as_ull = (int*)address; + int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(val < __int_as_float(assumed) ? val : __int_as_float(assumed))); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); } static __global__ void undistort(int* d_depth_to_c_off, - float* d_undistorted_data, - float* d_filter_map, - const float* d_depth_data, const int* d_map_dist, - const float* d_map_x, const int* d_map_yi, - const libfreenect2::Freenect2Device::IrCameraParams depth, - const libfreenect2::Freenect2Device::ColorCameraParams color, - const int filter_width_half, - const int filter_height_half, - const int offset_filter_map, - const bool enable_filter) + float* d_undistorted_data, + float* d_filter_map, + const float* d_depth_data, const int* d_map_dist, + const float* d_map_x, const int* d_map_yi, + const libfreenect2::Freenect2Device::IrCameraParams depth, + const libfreenect2::Freenect2Device::ColorCameraParams color, + const int filter_width_half, + const int filter_height_half, + const int offset_filter_map, + const bool enable_filter) { - // getting index of distorted depth pixel - const int i = blockIdx.x * blockDim.x + threadIdx.x; - const int index = d_map_dist[i]; - - // check if distorted depth pixel is outside of the depth image - if(index < 0){ - d_depth_to_c_off[i] = -1; - d_undistorted_data[i] = 0; - return; - } - - // getting depth value for current pixel - const float z = d_depth_data[index]; - d_undistorted_data[i] = z; - - // checking for invalid depth value - if(z <= 0.0f){ - d_depth_to_c_off[i] = -1; - return; - } - - // calculating x offset for rgb image based on depth value - const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding - const float rx = (d_map_x[index] + (color.shift_m / z)) * color.fx + color_cx; - const int cx = rx; // same as round for positive numbers (0.5f was already added to color_cx) - // getting y offset for depth image - const int cy = d_map_yi[i]; - // combining offsets - const int c_off = cx + cy * COLOR_COLS; - - // check if c_off is outside of rgb image - // checking rx/cx is not needed because the color image is much wider then the depth image - if(c_off < 0 || c_off >= COLOR_ROWS * COLOR_COLS){ - d_depth_to_c_off[i] = -1; - return; - } - - // saving the offset for later - d_depth_to_c_off[i] = c_off; - - // I am not sure if there won't be race conditions here due to overlap, the atomic operation should help. - if(enable_filter){ - // setting a window around the filter map pixel corresponding to the color pixel with the current z value - int yi = (cy - filter_height_half) * 1920 + cx - filter_width_half; // index of first pixel to set - for(int r = -filter_height_half; r <= filter_height_half; ++r, yi += COLOR_COLS) // index increased by a full row each iteration - { - float *it = d_filter_map + offset_filter_map + yi; - for(int c = -filter_width_half; c <= filter_width_half; ++c, ++it) - { - // only set if the current z is smaller - atomicKeepSmaller(it, z); - } - } - } + // getting index of distorted depth pixel + const int i = blockIdx.x * blockDim.x + threadIdx.x; + const int index = d_map_dist[i]; + + // check if distorted depth pixel is outside of the depth image + if(index < 0){ + d_depth_to_c_off[i] = -1; + d_undistorted_data[i] = 0; + return; + } + + // getting depth value for current pixel + const float z = d_depth_data[index]; + d_undistorted_data[i] = z; + + // checking for invalid depth value + if(z <= 0.0f){ + d_depth_to_c_off[i] = -1; + return; + } + + // calculating x offset for rgb image based on depth value + const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding + const float rx = (d_map_x[index] + (color.shift_m / z)) * color.fx + color_cx; + const int cx = rx; // same as round for positive numbers (0.5f was already added to color_cx) + // getting y offset for depth image + const int cy = d_map_yi[i]; + // combining offsets + const int c_off = cx + cy * COLOR_COLS; + + // check if c_off is outside of rgb image + // checking rx/cx is not needed because the color image is much wider then the depth image + if(c_off < 0 || c_off >= COLOR_ROWS * COLOR_COLS){ + d_depth_to_c_off[i] = -1; + return; + } + + // saving the offset for later + d_depth_to_c_off[i] = c_off; + + // I am not sure if there won't be race conditions here due to overlap, the atomic operation should help. + if(enable_filter){ + // setting a window around the filter map pixel corresponding to the color pixel with the current z value + int yi = (cy - filter_height_half) * 1920 + cx - filter_width_half; // index of first pixel to set + for(int r = -filter_height_half; r <= filter_height_half; ++r, yi += COLOR_COLS) // index increased by a full row each iteration + { + float *it = d_filter_map + offset_filter_map + yi; + for(int c = -filter_width_half; c <= filter_width_half; ++c, ++it) + { + // only set if the current z is smaller + atomicKeepSmaller(it, z); + } + } + } } /** Construct 'registered' image with filter. @@ -234,46 +234,46 @@ void undistort(int* d_depth_to_c_off, */ static __global__ void registerImageFiltered(unsigned int *d_registered_data, - const unsigned int * d_rgb_data, - const int* d_depth_to_c_off, - const float* d_undistorted_data, - const float *d_p_filter_map, - const float filter_tolerance) + const unsigned int * d_rgb_data, + const int* d_depth_to_c_off, + const float* d_undistorted_data, + const float *d_p_filter_map, + const float filter_tolerance) { - // getting index of distorted depth pixel - const int i = blockIdx.x * blockDim.x + threadIdx.x; + // getting index of distorted depth pixel + const int i = blockIdx.x * blockDim.x + threadIdx.x; - // run through all registered color pixels and set them based on filter results - const int c_off = d_depth_to_c_off[i]; + // run through all registered color pixels and set them based on filter results + const int c_off = d_depth_to_c_off[i]; - // check if offset is out of image - if(c_off < 0){ - d_registered_data[i] = 0; - return; - } + // check if offset is out of image + if(c_off < 0){ + d_registered_data[i] = 0; + return; + } - const float min_z = d_p_filter_map[c_off]; - const float z = d_undistorted_data[i]; + const float min_z = d_p_filter_map[c_off]; + const float z = d_undistorted_data[i]; - // check for allowed depth noise - d_registered_data[i] = (z - min_z) / z > filter_tolerance ? 0 : d_rgb_data[c_off]; + // check for allowed depth noise + d_registered_data[i] = (z - min_z) / z > filter_tolerance ? 0 : d_rgb_data[c_off]; } /** Construct 'registered' image. */ static __global__ void registerImage(unsigned int *d_registered_data, - const unsigned int * d_rgb_data, - const int* d_depth_to_c_off) + const unsigned int * d_rgb_data, + const int* d_depth_to_c_off) { - // getting index of distorted depth pixel - const int i = blockIdx.x * blockDim.x + threadIdx.x; + // getting index of distorted depth pixel + const int i = blockIdx.x * blockDim.x + threadIdx.x; - // run through all registered color pixels and set them based on c_off - const int c_off = d_depth_to_c_off[i]; + // run through all registered color pixels and set them based on c_off + const int c_off = d_depth_to_c_off[i]; - // check if offset is out of image - d_registered_data[i] = c_off < 0 ? 0 : d_rgb_data[c_off]; + // check if offset is out of image + d_registered_data[i] = c_off < 0 ? 0 : d_rgb_data[c_off]; } namespace libfreenect2 @@ -282,22 +282,22 @@ namespace libfreenect2 CudaDeviceFrame::CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel): Frame(width, height, bytes_per_pixel, (unsigned char*)-1) { - allocateMemory(); + allocateMemory(); } CudaDeviceFrame::~CudaDeviceFrame() { - CALL_CUDA(cudaFree(data)); + CALL_CUDA(cudaFree(data)); } bool CudaDeviceFrame::allocateMemory() { - CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); + CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); - CHECK_CUDA(cudaGetLastError()); - return true; + CHECK_CUDA(cudaGetLastError()); + return true; } /* @@ -310,96 +310,96 @@ static const float color_q = 0.002199; class CudaRegistrationImpl { public: - CudaRegistrationImpl(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p): - depth(depth_p), color(rgb_p), - filter_width_half(2), filter_height_half(1), filter_tolerance(0.01f), - block_size(128), grid_size(MONO_IMAGE_SIZE/block_size) - { - good = setupDevice(); - if (!good) - return; - - good = initMaps(); - if (!good) - return; - } - - ~CudaRegistrationImpl() - { - if (good) - freeDeviceMemory(); - } - - void apply(int dx, int dy, float dz, float& cx, float &cy) const; - bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const; - void undistortDepth(const Frame *depth, Frame *undistorted) const; - void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; - void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; - void distort(int mx, int my, float& dx, float& dy) const; - void depth_to_color(float mx, float my, float& rx, float& ry) const; + CudaRegistrationImpl(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p): + depth(depth_p), color(rgb_p), + filter_width_half(2), filter_height_half(1), filter_tolerance(0.01f), + block_size(128), grid_size(MONO_IMAGE_SIZE/block_size) + { + good = setupDevice(); + if (!good) + return; + + good = initMaps(); + if (!good) + return; + } + + ~CudaRegistrationImpl() + { + if (good) + freeDeviceMemory(); + } + + void apply(int dx, int dy, float dz, float& cx, float &cy) const; + bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const; + void undistortDepth(const Frame *depth, Frame *undistorted) const; + void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; + void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; + void distort(int mx, int my, float& dx, float& dy) const; + void depth_to_color(float mx, float my, float& rx, float& ry) const; private: - Freenect2Device::IrCameraParams depth; ///< Depth camera parameters. - Freenect2Device::ColorCameraParams color; ///< Color camera parameters. - - const int filter_width_half; - const int filter_height_half; - const float filter_tolerance; - - static const size_t MONO_IMAGE_SIZE = MONO_COLS * MONO_ROWS; - - size_t block_size; - size_t grid_size; - - bool good; // Memory correctly allocated - - // Maps - int* d_distort_map; - float* d_depth_to_color_map_x; - float* d_depth_to_color_map_y; - int* d_depth_to_color_map_yi; - - bool allocateDeviceMemory() - { - CHECK_CUDA(cudaMalloc(&d_distort_map, MONO_IMAGE_SIZE * sizeof(int))); - CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_x, MONO_IMAGE_SIZE * sizeof(float))); - CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_y, MONO_IMAGE_SIZE * sizeof(float))); - CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_yi, MONO_IMAGE_SIZE * sizeof(int))); - - cudaDeviceSynchronize(); - - CHECK_CUDA(cudaGetLastError()); - return true; - } - - bool setupDevice() - { - // Continue to use same device than cuda_depth_packet_processor? - if (!allocateDeviceMemory()) - return false; - - return true; - } - - bool initMaps() - { - dInitMaps<<>>(d_distort_map, d_depth_to_color_map_x, - d_depth_to_color_map_y, d_depth_to_color_map_yi, - depth, color, depth_q, color_q); - - cudaDeviceSynchronize(); - CHECK_CUDA(cudaGetLastError()); - - return true; - } - - void freeDeviceMemory() - { - CALL_CUDA(cudaFree(d_distort_map)); - CALL_CUDA(cudaFree(d_depth_to_color_map_x)); - CALL_CUDA(cudaFree(d_depth_to_color_map_y)); - CALL_CUDA(cudaFree(d_depth_to_color_map_yi)); - } + Freenect2Device::IrCameraParams depth; ///< Depth camera parameters. + Freenect2Device::ColorCameraParams color; ///< Color camera parameters. + + const int filter_width_half; + const int filter_height_half; + const float filter_tolerance; + + static const size_t MONO_IMAGE_SIZE = MONO_COLS * MONO_ROWS; + + size_t block_size; + size_t grid_size; + + bool good; // Memory correctly allocated + + // Maps + int* d_distort_map; + float* d_depth_to_color_map_x; + float* d_depth_to_color_map_y; + int* d_depth_to_color_map_yi; + + bool allocateDeviceMemory() + { + CHECK_CUDA(cudaMalloc(&d_distort_map, MONO_IMAGE_SIZE * sizeof(int))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_x, MONO_IMAGE_SIZE * sizeof(float))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_y, MONO_IMAGE_SIZE * sizeof(float))); + CHECK_CUDA(cudaMalloc(&d_depth_to_color_map_yi, MONO_IMAGE_SIZE * sizeof(int))); + + cudaDeviceSynchronize(); + + CHECK_CUDA(cudaGetLastError()); + return true; + } + + bool setupDevice() + { + // Continue to use same device than cuda_depth_packet_processor? + if (!allocateDeviceMemory()) + return false; + + return true; + } + + bool initMaps() + { + dInitMaps<<>>(d_distort_map, d_depth_to_color_map_x, + d_depth_to_color_map_y, d_depth_to_color_map_yi, + depth, color, depth_q, color_q); + + cudaDeviceSynchronize(); + CHECK_CUDA(cudaGetLastError()); + + return true; + } + + void freeDeviceMemory() + { + CALL_CUDA(cudaFree(d_distort_map)); + CALL_CUDA(cudaFree(d_depth_to_color_map_x)); + CALL_CUDA(cudaFree(d_depth_to_color_map_y)); + CALL_CUDA(cudaFree(d_depth_to_color_map_yi)); + } }; CudaRegistration::CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p): @@ -412,147 +412,147 @@ CudaRegistration::~CudaRegistration() bool CudaRegistration::apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter, CudaDeviceFrame* bigdepth, int* color_depth_map) const { - return impl_->apply(rgb, depth, undistorted, registered, enable_filter, bigdepth, color_depth_map); + return impl_->apply(rgb, depth, undistorted, registered, enable_filter, bigdepth, color_depth_map); } bool CudaRegistrationImpl::apply(const Frame *rgb, const Frame *depth, CudaDeviceFrame *undistorted, CudaDeviceFrame *registered, const bool enable_filter, CudaDeviceFrame *bigdepth, int *color_depth_map) const { - // Check if all frames are valid and have the correct size - if (!rgb || !depth || !undistorted || !registered || - rgb->width != 1920 || rgb->height != 1080 || rgb->bytes_per_pixel != 4 || - depth->width != 512 || depth->height != 424 || depth->bytes_per_pixel != 4 || - undistorted->width != 512 || undistorted->height != 424 || undistorted->bytes_per_pixel != 4 || - registered->width != 512 || registered->height != 424 || registered->bytes_per_pixel != 4) - { - LOG_ERROR << "Not applying" << std::endl; - return false; - } - - // Setup memory - - float *d_depth_data; - size_t depth_size = depth->width * depth->height * sizeof(float); - unsigned int *d_rgb_data; - size_t rgb_size = rgb->width * rgb->height * sizeof(unsigned int); - - CHECK_CUDA(cudaMalloc(&d_depth_data, depth_size)); - cudaMemcpy((void*)d_depth_data, - (const void*)depth->data, depth_size, - cudaMemcpyHostToDevice); - - CHECK_CUDA(cudaMalloc(&d_rgb_data, rgb_size)); - cudaMemcpy((void*)d_rgb_data, - (const void*)rgb->data, rgb_size, - cudaMemcpyHostToDevice); - - cudaDeviceSynchronize(); - CHECK_CUDA(cudaGetLastError()); - - float *d_undistorted_data = (float*)undistorted->data; - unsigned int *d_registered_data = (unsigned int*)registered->data; - const int *d_map_dist = d_distort_map; - const float *d_map_x = d_depth_to_color_map_x; - const int *d_map_yi = d_depth_to_color_map_yi; - - - // Setup parameters - - const int size_depth = MONO_ROWS * MONO_COLS; - const int size_color = COLOR_ROWS * COLOR_COLS; - //const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding - - // size of filter map with a border of filter_height_half on top and bottom so that no check for borders is needed. - // since the color image is wide angle no border to the sides is needed. - const int size_filter_map = size_color + COLOR_COLS * filter_height_half * 2; - // offset to the important data - const int offset_filter_map = COLOR_COLS * filter_height_half; - - - // Auxiliary maps - - // map for storing the min z values used for each color pixel - float *d_filter_map = NULL; - // pointer to the beginning of the important data - float *d_p_filter_map = NULL; - - // map for storing the color offset for each depth pixel - int *d_depth_to_c_off; - CHECK_CUDA(cudaMalloc(&d_depth_to_c_off, size_depth * sizeof(int))); - if (color_depth_map) - { - // I don't know where this other color map could be coming from, - // so for the moment I will assume it is in host memory. - cudaMemcpy((void*)d_depth_to_c_off, - (const void*)color_depth_map, size_depth * sizeof(int), - cudaMemcpyHostToDevice); - } - //int *map_c_off = depth_to_c_off; - - // initializing the depth_map with values outside of the Kinect2 range - if(enable_filter){ - if(bigdepth) - { - d_filter_map = (float*)bigdepth->data; - } - else - { - CHECK_CUDA(cudaMalloc(&d_filter_map, size_filter_map * sizeof(float))); - } - d_p_filter_map = d_filter_map + offset_filter_map; // works the same even on device - - cudaMemsetFloat(d_filter_map, std::numeric_limits::infinity(), size_filter_map); - } - - /* Fix depth distortion, and compute pixel to use from 'rgb' based on depth measurement, - * stored as x/y offset in the rgb data. - */ - undistort<<>>(d_depth_to_c_off, - d_undistorted_data, d_filter_map, - d_depth_data, d_map_dist, - d_map_x, d_map_yi, - this->depth, this->color, filter_width_half, filter_height_half, offset_filter_map, enable_filter); - if (enable_filter) - { - registerImageFiltered<<>>(d_registered_data, - d_rgb_data, - d_depth_to_c_off, - d_undistorted_data, - d_p_filter_map, - filter_tolerance); - if (!bigdepth) - { - CALL_CUDA(cudaFree(d_filter_map)); - } - } - else - { - registerImage<<>>(d_registered_data, - d_rgb_data, - d_depth_to_c_off); - } - - // Finish - - // -1 represents Invalid - //undistorted->format = undistorted->Float; - //registered->format = registered->BGRX; - - - if (color_depth_map) - { - // I don't know where this other color map could be coming from, - // so for the moment I will assume it is in host memory. - // Placing it back to where it came from - cudaMemcpy((void*)color_depth_map, - (const void*)d_depth_to_c_off, size_depth * sizeof(int), - cudaMemcpyDeviceToHost); - } - CALL_CUDA(cudaFree(d_depth_to_c_off)); - - CALL_CUDA(cudaFree(d_depth_data)); - CALL_CUDA(cudaFree(d_rgb_data)); - - return true; + // Check if all frames are valid and have the correct size + if (!rgb || !depth || !undistorted || !registered || + rgb->width != 1920 || rgb->height != 1080 || rgb->bytes_per_pixel != 4 || + depth->width != 512 || depth->height != 424 || depth->bytes_per_pixel != 4 || + undistorted->width != 512 || undistorted->height != 424 || undistorted->bytes_per_pixel != 4 || + registered->width != 512 || registered->height != 424 || registered->bytes_per_pixel != 4) + { + LOG_ERROR << "Not applying" << std::endl; + return false; + } + + // Setup memory + + float *d_depth_data; + size_t depth_size = depth->width * depth->height * sizeof(float); + unsigned int *d_rgb_data; + size_t rgb_size = rgb->width * rgb->height * sizeof(unsigned int); + + CHECK_CUDA(cudaMalloc(&d_depth_data, depth_size)); + cudaMemcpy((void*)d_depth_data, + (const void*)depth->data, depth_size, + cudaMemcpyHostToDevice); + + CHECK_CUDA(cudaMalloc(&d_rgb_data, rgb_size)); + cudaMemcpy((void*)d_rgb_data, + (const void*)rgb->data, rgb_size, + cudaMemcpyHostToDevice); + + cudaDeviceSynchronize(); + CHECK_CUDA(cudaGetLastError()); + + float *d_undistorted_data = (float*)undistorted->data; + unsigned int *d_registered_data = (unsigned int*)registered->data; + const int *d_map_dist = d_distort_map; + const float *d_map_x = d_depth_to_color_map_x; + const int *d_map_yi = d_depth_to_color_map_yi; + + + // Setup parameters + + const int size_depth = MONO_ROWS * MONO_COLS; + const int size_color = COLOR_ROWS * COLOR_COLS; + //const float color_cx = color.cx + 0.5f; // 0.5f added for later rounding + + // size of filter map with a border of filter_height_half on top and bottom so that no check for borders is needed. + // since the color image is wide angle no border to the sides is needed. + const int size_filter_map = size_color + COLOR_COLS * filter_height_half * 2; + // offset to the important data + const int offset_filter_map = COLOR_COLS * filter_height_half; + + + // Auxiliary maps + + // map for storing the min z values used for each color pixel + float *d_filter_map = NULL; + // pointer to the beginning of the important data + float *d_p_filter_map = NULL; + + // map for storing the color offset for each depth pixel + int *d_depth_to_c_off; + CHECK_CUDA(cudaMalloc(&d_depth_to_c_off, size_depth * sizeof(int))); + if (color_depth_map) + { + // I don't know where this other color map could be coming from, + // so for the moment I will assume it is in host memory. + cudaMemcpy((void*)d_depth_to_c_off, + (const void*)color_depth_map, size_depth * sizeof(int), + cudaMemcpyHostToDevice); + } + //int *map_c_off = depth_to_c_off; + + // initializing the depth_map with values outside of the Kinect2 range + if(enable_filter){ + if(bigdepth) + { + d_filter_map = (float*)bigdepth->data; + } + else + { + CHECK_CUDA(cudaMalloc(&d_filter_map, size_filter_map * sizeof(float))); + } + d_p_filter_map = d_filter_map + offset_filter_map; // works the same even on device + + cudaMemsetFloat(d_filter_map, std::numeric_limits::infinity(), size_filter_map); + } + + /* Fix depth distortion, and compute pixel to use from 'rgb' based on depth measurement, + * stored as x/y offset in the rgb data. + */ + undistort<<>>(d_depth_to_c_off, + d_undistorted_data, d_filter_map, + d_depth_data, d_map_dist, + d_map_x, d_map_yi, + this->depth, this->color, filter_width_half, filter_height_half, offset_filter_map, enable_filter); + if (enable_filter) + { + registerImageFiltered<<>>(d_registered_data, + d_rgb_data, + d_depth_to_c_off, + d_undistorted_data, + d_p_filter_map, + filter_tolerance); + if (!bigdepth) + { + CALL_CUDA(cudaFree(d_filter_map)); + } + } + else + { + registerImage<<>>(d_registered_data, + d_rgb_data, + d_depth_to_c_off); + } + + // Finish + + // -1 represents Invalid + //undistorted->format = undistorted->Float; + //registered->format = registered->BGRX; + + + if (color_depth_map) + { + // I don't know where this other color map could be coming from, + // so for the moment I will assume it is in host memory. + // Placing it back to where it came from + cudaMemcpy((void*)color_depth_map, + (const void*)d_depth_to_c_off, size_depth * sizeof(int), + cudaMemcpyDeviceToHost); + } + CALL_CUDA(cudaFree(d_depth_to_c_off)); + + CALL_CUDA(cudaFree(d_depth_data)); + CALL_CUDA(cudaFree(d_rgb_data)); + + return true; } } /* namespace libfreenect2 */ From 0de07f98a90c415da57dcd595fa1f1b51639d844 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Fri, 23 Aug 2024 14:03:47 -0600 Subject: [PATCH 08/25] Removed OpenGL warning --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index d771b697e..4a363f51d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -267,6 +267,7 @@ ENDIF() SET(HAVE_OpenGL disabled) IF(ENABLE_OPENGL) FIND_PACKAGE(GLFW3) + set(OpenGL_GL_PREFERENCE "GLVND") FIND_PACKAGE(OpenGL) SET(HAVE_OpenGL no) IF(GLFW3_FOUND AND OPENGL_FOUND) From e1e73cfd47adb808d8a44ef0f1e7098f81f4d62f Mon Sep 17 00:00:00 2001 From: veroarriola Date: Mon, 26 Aug 2024 16:00:24 -0600 Subject: [PATCH 09/25] Changing to CUDA as language, not complete. --- .gitignore | 1 + CMakeLists.txt | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 1121c4575..43bfd39e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build +buildcuda compile # Dependency folders diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a363f51d..0721ea483 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12.1) +#CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12.1) +CMAKE_MINIMUM_REQUIRED(VERSION 3.28) SET(PROJECT_VER_MAJOR 0) SET(PROJECT_VER_MINOR 2) @@ -23,6 +24,7 @@ IF(NOT DEFINED CMAKE_BUILD_TYPE) ENDIF() PROJECT(libfreenect2) +INCLUDE(CheckLanguage) IF(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) @@ -45,7 +47,6 @@ OPTION(ENABLE_PROFILING "Collect profiling stats (memory consuming)" OFF) IF(ENABLE_PROFILING) SET(LIBFREENECT2_WITH_PROFILING 1) ENDIF() - IF(MSVC) # suppress several "possible loss of data" warnings, and # "zero-length array in struct" from libusb.h @@ -339,13 +340,19 @@ IF(ENABLE_OPENCL) ENDIF(OpenCL_FOUND) ENDIF(ENABLE_OPENCL) + SET(HAVE_CUDA disabled) IF(ENABLE_CUDA) + CHECK_LANGUAGE(CUDA) +#[[ FIND_PACKAGE(CUDA) +]] SET(HAVE_CUDA no) - IF(CUDA_FOUND AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0) + #IF(CUDA_FOUND AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0) + IF(CMAKE_CUDA_COMPILER AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0) SET(HAVE_CUDA "no (VS2015 not supported)") - ELSEIF(CUDA_FOUND) + #ELSEIF(CUDA_FOUND) + ELSEIF(CMAKE_CUDA_COMPILER) SET(LIBFREENECT2_WITH_CUDA_SUPPORT 1) SET(HAVE_CUDA yes) @@ -391,8 +398,11 @@ IF(ENABLE_CUDA) ${CUDA_LIBRARIES} ) ENDIF() + #ENDIF(CMAKE_CUDA_COMPILER) ENDIF(ENABLE_CUDA) + + # RPATH handling for CUDA 8.0 libOpenCL.so conflict. See #804. IF(HAVE_OpenCL STREQUAL yes AND UNIX AND NOT APPLE) FILE(GLOB CUDA_ld_so_conf /etc/ld.so.conf.d/cuda*.conf) From d279a6087833c186c8cf011dc66173db28192448 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Tue, 27 Aug 2024 00:33:05 -0600 Subject: [PATCH 10/25] Updated to ENABLE_LANGUAGE(CUDA). Old code is still there but commented. --- CMakeLists.txt | 44 +++++++++++++---------- src/opencl_depth_packet_processor.cpp | 2 +- src/opencl_kde_depth_packet_processor.cpp | 3 +- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0721ea483..39bba8717 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ -#CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12.1) -CMAKE_MINIMUM_REQUIRED(VERSION 3.28) +#CMAKE_MINIMUM_REQUIRED(VERSION 3.28.3) +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12.1) SET(PROJECT_VER_MAJOR 0) SET(PROJECT_VER_MINOR 2) @@ -24,7 +24,6 @@ IF(NOT DEFINED CMAKE_BUILD_TYPE) ENDIF() PROJECT(libfreenect2) -INCLUDE(CheckLanguage) IF(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) @@ -129,7 +128,6 @@ SET(SOURCES include/libfreenect2/packet_pipeline.h include/internal/libfreenect2/packet_processor.h include/libfreenect2/registration.h - include/libfreenect2/cuda_registration.h include/internal/libfreenect2/resource.h include/internal/libfreenect2/rgb_packet_processor.h include/internal/libfreenect2/rgb_packet_stream_parser.h @@ -280,7 +278,8 @@ IF(ENABLE_OPENGL) LIST(APPEND LIBFREENECT2_DLLS ${GLFW3_DLL}) LIST(APPEND LIBRARIES ${GLFW3_LIBRARIES} - ${OPENGL_gl_LIBRARY} + ${OPENGL_GL_LIBRARY} + ${OPENGL_LIBRARIES} ) LIST(APPEND SOURCES src/flextGL.cpp @@ -343,10 +342,9 @@ ENDIF(ENABLE_OPENCL) SET(HAVE_CUDA disabled) IF(ENABLE_CUDA) + INCLUDE(CheckLanguage) CHECK_LANGUAGE(CUDA) -#[[ - FIND_PACKAGE(CUDA) -]] +# FIND_PACKAGE(CUDA) SET(HAVE_CUDA no) #IF(CUDA_FOUND AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0) IF(CMAKE_CUDA_COMPILER AND MSVC14 AND CUDA_VERSION VERSION_LESS 8.0) @@ -355,10 +353,13 @@ IF(ENABLE_CUDA) ELSEIF(CMAKE_CUDA_COMPILER) SET(LIBFREENECT2_WITH_CUDA_SUPPORT 1) SET(HAVE_CUDA yes) + ENABLE_LANGUAGE(CUDA) + #FIND_PACKAGE(CUDAToolkit) STRING(REPLACE "\\" "/" NVCUDASAMPLES_ROOT "$ENV{NVCUDASAMPLES_ROOT}") STRING(REPLACE "\\" "/" NVCUDASAMPLES8_0_ROOT "$ENV{NVCUDASAMPLES8_0_ROOT}") - CUDA_INCLUDE_DIRECTORIES( + #CUDA_INCLUDE_DIRECTORIES( + INCLUDE_DIRECTORIES( "${MY_DIR}/include/" "${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc" "${NVCUDASAMPLES_ROOT}/common/inc" @@ -377,26 +378,33 @@ IF(ENABLE_CUDA) # Thrust requires exceptions. If OpenCL from NVidia is used we don't need this flag. STRING(REGEX REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") MESSAGE(STATUS "*************") - mESSAGE(STATUS ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS ${CMAKE_CXX_FLAGS}) MESSAGE(STATUS ${CUDA_FLAGS}) + MESSAGE(STATUS ${CUDA_INCLUDE_DIRS}) + MESSAGE(STATUS ${CUDA_LIBRARIES}) + MESSAGE(STATUS ${CUDA_OBJECTS}) MESSAGE(STATUS "*************") - CUDA_COMPILE(CUDA_OBJECTS + #CUDA_COMPILE(CUDA_OBJECTS + LIST(APPEND SOURCES + #include/libfreenect2/cuda_registration.h src/cuda_depth_packet_processor.cu src/cuda_kde_depth_packet_processor.cu src/cuda_registration.cu - OPTIONS ${CUDA_FLAGS} + #OPTIONS ${CUDA_FLAGS} ) SET(CMAKE_CXX_FLAGS "${OLD_CMAKE_CXX_FLAGS}") INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) - LIST(APPEND SOURCES - ${CUDA_OBJECTS} - ) + #LIST(APPEND SOURCES + # ${CUDA_OBJECTS} + #) - LIST(APPEND LIBRARIES - ${CUDA_LIBRARIES} - ) + #LIST(APPEND LIBRARIES + # ${CUDA_LIBRARIES} + #) + #set_target_properties(freenect2 PROPERTIES CUDA_ARCHITECTURES "35;50;75") + #SET_PROPERTY(TARGET freenect2 PROPERTY CUDA_ARCHITECTURES OFF) ENDIF() #ENDIF(CMAKE_CUDA_COMPILER) ENDIF(ENABLE_CUDA) diff --git a/src/opencl_depth_packet_processor.cpp b/src/opencl_depth_packet_processor.cpp index d8db14d3c..c7f2a6953 100644 --- a/src/opencl_depth_packet_processor.cpp +++ b/src/opencl_depth_packet_processor.cpp @@ -251,7 +251,7 @@ class OpenCLDepthPacketProcessorImpl: public WithPerfLogging newIrFrame(); newDepthFrame(); - const int CL_ICDL_VERSION = 2; + //const int CL_ICDL_VERSION = 2; // Commented out because it is defined somewhere else typedef cl_int (*icdloader_func)(int, size_t, void*, size_t*); #ifdef _MSC_VER #pragma warning(push) diff --git a/src/opencl_kde_depth_packet_processor.cpp b/src/opencl_kde_depth_packet_processor.cpp index 98dbdff54..97765f415 100644 --- a/src/opencl_kde_depth_packet_processor.cpp +++ b/src/opencl_kde_depth_packet_processor.cpp @@ -259,7 +259,7 @@ class OpenCLKdeDepthPacketProcessorImpl: public WithPerfLogging newIrFrame(); newDepthFrame(); - const int CL_ICDL_VERSION = 2; + //const int cl_icdl_version = 2; // CL_ICDL_VERSION is defined somewhere else typedef cl_int (*icdloader_func)(int, size_t, void*, size_t*); #ifdef _MSC_VER #pragma warning(push) @@ -277,6 +277,7 @@ class OpenCLKdeDepthPacketProcessorImpl: public WithPerfLogging if (clGetICDLoaderInfoOCLICD != NULL) { char buf[16]; + //if (clGetICDLoaderInfoOCLICD(cl_icdl_version, sizeof(buf), buf, NULL) == CL_SUCCESS) if (clGetICDLoaderInfoOCLICD(CL_ICDL_VERSION, sizeof(buf), buf, NULL) == CL_SUCCESS) { if (strcmp(buf, "2.2.4") < 0) From 3d23d10147c0a329a9976f1eab5e11c9b4fcb4f8 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Wed, 4 Sep 2024 12:57:31 -0600 Subject: [PATCH 11/25] Fussing as it should have been from the beginning. --- doc/sequence.svg | 126 +++++++++++++++++++++++++++++++++++++++++ examples/Protonect.cpp | 21 +++++-- 2 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 doc/sequence.svg diff --git a/doc/sequence.svg b/doc/sequence.svg new file mode 100644 index 000000000..68b831fa2 --- /dev/null +++ b/doc/sequence.svg @@ -0,0 +1,126 @@ + + + + + + + + + + + + + + Protonect + main + freenect2*dev = 0*pipeline = 0 + libfreenect2::CudaAccessPacketPipeline(deviceId) + + + diff --git a/examples/Protonect.cpp b/examples/Protonect.cpp index 2fbf4a67b..c34ec8c3a 100644 --- a/examples/Protonect.cpp +++ b/examples/Protonect.cpp @@ -101,9 +101,13 @@ class MyFileLogger: public libfreenect2::Logger * Main application entry point. * * Accepted argumemnts: - * - cpu Perform depth processing with the CPU. - * - gl Perform depth processing with OpenGL. - * - cl Perform depth processing with OpenCL. + * - cpu Perform depth processing with the CPU. + * - gl Perform depth processing with OpenGL. + * - cl Perform depth processing with OpenCL. + * - clkde + * - cuda + * - cudakde + * - cudaccess Does not send data to CPU. * - Serial number of the device to open. * - -noviewer Disable viewer window. */ @@ -113,7 +117,7 @@ int main(int argc, char *argv[]) std::string program_path(argv[0]); std::cerr << "Version: " << LIBFREENECT2_VERSION << std::endl; std::cerr << "Environment variables: LOGFILE=" << std::endl; - std::cerr << "Usage: " << program_path << " [-gpu=] [gl | cl | clkde | cuda | cudakde | cpu] []" << std::endl; + std::cerr << "Usage: " << program_path << " [-gpu=] [gl | cl | clkde | cuda | cudakde | cudaccess | cpu] []" << std::endl; std::cerr << " [-noviewer] [-norgb | -nodepth] [-help] [-version]" << std::endl; std::cerr << " [-frames ]" << std::endl; std::cerr << "To pause and unpause: pkill -USR1 Protonect" << std::endl; @@ -225,6 +229,15 @@ int main(int argc, char *argv[]) pipeline = new libfreenect2::CudaKdePacketPipeline(deviceId); #else std::cout << "CUDA pipeline is not supported!" << std::endl; +#endif + } + else if(arg == "cudaccess") + { +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT + if(!pipeline) + pipeline = new libfreenect2::CudaAccessPacketPipeline(deviceId); +#else + std::cout << "CUDA pipeline is not supported!" << std::endl; #endif } else if(arg.find_first_not_of("0123456789") == std::string::npos) //check if parameter could be a serial number From 4a71a371a2e82e6bf4edf2fa4dddc7c1448fb39f Mon Sep 17 00:00:00 2001 From: veroarriola Date: Fri, 6 Sep 2024 16:34:30 -0600 Subject: [PATCH 12/25] Main --- doc/sequence.svg | 244 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 209 insertions(+), 35 deletions(-) diff --git a/doc/sequence.svg b/doc/sequence.svg index 68b831fa2..233831d41 100644 --- a/doc/sequence.svg +++ b/doc/sequence.svg @@ -25,8 +25,8 @@ inkscape:document-units="mm" showgrid="true" inkscape:zoom="2.0838024" - inkscape:cx="262.021" - inkscape:cy="171.56137" + inkscape:cx="532.68007" + inkscape:cy="172.04126" inkscape:window-width="1920" inkscape:window-height="1008" inkscape:window-x="0" @@ -41,7 +41,7 @@ id="defs2"> + + + + + libfreenect2::Freenect2 freenect2; libfreenect2::Freenect2Device *dev = 0; libfreenect2::PacketPipeline *pipeline = 0; + + Protonect + x="3.6607749" + y="8.4984169">Protonect.cpp main + x="8.9905415" + y="31.71825">main freenect2libfreenect2::CudaAccessPacketPipeline(deviceId) + + *dev = 0bool protonect_shutdown = false;*pipeline = 0 + style="fill:#000000;stroke:none;stroke-width:0.665" + x="6.3447084" + y="20.949709" + id="tspan4887">bool protonect_paused = false;libfreenect2::Freenect2Device *devtopause; + + + std::string serial = ""; bool viewer_enabled = true; bool enable_rgb = true; bool enable_depth = true; int deviceId = -1; size_t framemax = -1; + + freenect2.getDefaultDeviceSerialNumber(); + + libfreenect2::CudaAccessPacketPipeline(deviceId) + id="tspan22180" + style="stroke-width:0.365" + x="165.36458" + y="42.333332">freenect2.openDevice(serial, pipeline); + + From 8f23dedc249aa6df59175dde325d9dbbd38d8ff8 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Mon, 9 Sep 2024 15:52:59 -0600 Subject: [PATCH 13/25] Sequence diagram for dev, start. --- doc/sequence.svg | 161 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 147 insertions(+), 14 deletions(-) diff --git a/doc/sequence.svg b/doc/sequence.svg index 233831d41..351c09b09 100644 --- a/doc/sequence.svg +++ b/doc/sequence.svg @@ -24,9 +24,9 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="2.0838024" - inkscape:cx="532.68007" - inkscape:cy="172.04126" + inkscape:zoom="1.4734708" + inkscape:cx="375.98302" + inkscape:cy="586.03129" inkscape:window-width="1920" inkscape:window-height="1008" inkscape:window-x="0" @@ -88,6 +88,27 @@ inkscape:label="Capa 1" inkscape:groupmode="layer" id="layer1"> + int types = 0;libfreenect2::SyncMultiFrameListener listenerlibfreenect2::FrameMap frames; @@ -219,7 +240,7 @@ style="stroke-width:0.365" x="6.6145835" y="55.5625" - id="tspan13101">freenect2.getDefaultDeviceSerialNumber(); freenect2.openDevice(serial, pipeline); + + libfreenect2::Frame::Color | libfreenect2::Frame::Ir | libfreenect2::Frame::Depth + + ( ) + + dev + setColorFrameListener(&listener) + setIrAndDepthFrameListener(&listener); + + + + + start() + From 6bef13a8d9af01f992656193cd086961b59a33b9 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Thu, 30 Jan 2025 13:44:52 -0600 Subject: [PATCH 14/25] Sequence diagram with frame production in Protonect complete. --- doc/sequence.svg | 335 +++++++++++++++++- .../.cuda_registration.h.kate-swp | Bin 0 -> 76 bytes 2 files changed, 322 insertions(+), 13 deletions(-) create mode 100644 include/libfreenect2/.cuda_registration.h.kate-swp diff --git a/doc/sequence.svg b/doc/sequence.svg index 351c09b09..6a03f6136 100644 --- a/doc/sequence.svg +++ b/doc/sequence.svg @@ -25,11 +25,11 @@ inkscape:document-units="mm" showgrid="true" inkscape:zoom="1.4734708" - inkscape:cx="375.98302" - inkscape:cy="586.03129" + inkscape:cx="436.72396" + inkscape:cy="563.29586" inkscape:window-width="1920" - inkscape:window-height="1008" - inkscape:window-x="0" + inkscape:window-height="1044" + inkscape:window-x="1920" inkscape:window-y="0" inkscape:window-maximized="1" inkscape:current-layer="layer1"> @@ -305,21 +305,21 @@ sodipodi:nodetypes="cc" /> ( ) setIrAndDepthFrameListener(&listener); + y="148.16667">setIrAndDepthFrameListener(&listener) start() + y="155.57501">start() + libfreenect2::Registration* registrationlibfreenect2::Frame undistorted(512,424,4), registered(512,424,4); + libfreenect2::Registration(dev->getIrCameraParams(), dev->getColorCameraParams()) + + listener + + + waitForNewFrame(frames, 10*1000) // 10 seconds + + release(frames) + + stop() + + close() + + registration + + + delete + + *rgb + + *ir + + *depth + + + + + frames[libfreenect2::Frame::Color] + frames[libfreenect2::Frame::Ir] + frames[libfreenect2::Frame::Depth] + apply(rgb, depth, &undistorted, &registered) + + + + diff --git a/include/libfreenect2/.cuda_registration.h.kate-swp b/include/libfreenect2/.cuda_registration.h.kate-swp new file mode 100644 index 0000000000000000000000000000000000000000..7d0d75db69238fa26be49a2cfe9db9d0f28c5b81 GIT binary patch literal 76 zcmZQzU=Z?7EJ;-eE>A2_aLdd|RWQ;sU|?VnDLXm!!M&izhc#B`W|Tar-B_z_Ar%}1 Llz{<8S2QjFPiqo8 literal 0 HcmV?d00001 From 828cfa0cc408ce3d522934a1ada892b83b58db60 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Thu, 30 Jan 2025 16:18:47 -0600 Subject: [PATCH 15/25] Testing cuda_registration in Protonect. Need to debug. --- .gitignore | 1 + examples/Protonect.cpp | 46 ++++- .../.cuda_registration.h.kate-swp | Bin 76 -> 0 bytes include/libfreenect2/cuda_registration.h | 162 ------------------ include/libfreenect2/frame_listener.hpp | 22 +++ include/libfreenect2/registration.h | 88 ++++++++++ src/cuda_registration.cu | 14 +- 7 files changed, 165 insertions(+), 168 deletions(-) delete mode 100644 include/libfreenect2/.cuda_registration.h.kate-swp delete mode 100644 include/libfreenect2/cuda_registration.h diff --git a/.gitignore b/.gitignore index 43bfd39e2..d9a7d1086 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ build buildcuda +buildcudadev compile # Dependency folders diff --git a/examples/Protonect.cpp b/examples/Protonect.cpp index c34ec8c3a..eb59fc123 100644 --- a/examples/Protonect.cpp +++ b/examples/Protonect.cpp @@ -160,6 +160,7 @@ int main(int argc, char *argv[]) bool enable_depth = true; int deviceId = -1; size_t framemax = -1; + bool use_cuda_registration = false; for(int argI = 1; argI < argc; ++argI) { @@ -234,8 +235,10 @@ int main(int argc, char *argv[]) else if(arg == "cudaccess") { #ifdef LIBFREENECT2_WITH_CUDA_SUPPORT + use_cuda_registration = true; if(!pipeline) - pipeline = new libfreenect2::CudaAccessPacketPipeline(deviceId); + //pipeline = new libfreenect2::CudaAccessPacketPipeline(deviceId); + pipeline = new libfreenect2::CudaPacketPipeline(deviceId); #else std::cout << "CUDA pipeline is not supported!" << std::endl; #endif @@ -347,6 +350,18 @@ int main(int argc, char *argv[]) /// [registration setup] libfreenect2::Registration* registration = new libfreenect2::Registration(dev->getIrCameraParams(), dev->getColorCameraParams()); libfreenect2::Frame undistorted(512, 424, 4), registered(512, 424, 4); + +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT + libfreenect2::CudaRegistration* cudaRegistration = NULL; + libfreenect2::CudaDeviceFrame device_undistorted(512, 424, 4), device_registered(512, 424, 4); + + if(use_cuda_registration) + { + registration = NULL; + libfreenect2::CudaRegistration* cudaRegistration = new libfreenect2::CudaRegistration(dev->getIrCameraParams(), dev->getColorCameraParams()); + } +#endif + /// [registration setup] size_t framecount = 0; @@ -361,7 +376,7 @@ int main(int argc, char *argv[]) /// [loop start] while(!protonect_shutdown && (framemax == (size_t)-1 || framecount < framemax)) { - if (!listener.waitForNewFrame(frames, 10*1000)) // 10 sconds + if (!listener.waitForNewFrame(frames, 10*1000)) // 10 sconds // CUDA: Wait! Don't we need them in cuda??? { std::cout << "timeout!" << std::endl; return -1; @@ -374,7 +389,14 @@ int main(int argc, char *argv[]) if (enable_rgb && enable_depth) { /// [registration] - registration->apply(rgb, depth, &undistorted, ®istered); + if(use_cuda_registration) + { + cudaRegistration->apply(rgb, depth, &device_undistorted, &device_registered); + } + else + { + registration->apply(rgb, depth, &undistorted, ®istered); + } /// [registration] } @@ -399,6 +421,12 @@ int main(int argc, char *argv[]) } if (enable_rgb && enable_depth) { +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT + if (use_cuda_registration) + { + device_registered.toHostFrame(registered); + } +#endif viewer.addFrame("registered", ®istered); } @@ -417,8 +445,16 @@ int main(int argc, char *argv[]) dev->stop(); dev->close(); /// [stop] - - delete registration; +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT + if (use_cuda_registration) + { + delete cudaRegistration; + } +#endif + if (registration) + { + delete registration; + } return 0; } diff --git a/include/libfreenect2/.cuda_registration.h.kate-swp b/include/libfreenect2/.cuda_registration.h.kate-swp deleted file mode 100644 index 7d0d75db69238fa26be49a2cfe9db9d0f28c5b81..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 76 zcmZQzU=Z?7EJ;-eE>A2_aLdd|RWQ;sU|?VnDLXm!!M&izhc#B`W|Tar-B_z_Ar%}1 Llz{<8S2QjFPiqo8 diff --git a/include/libfreenect2/cuda_registration.h b/include/libfreenect2/cuda_registration.h deleted file mode 100644 index 76c995e0d..000000000 --- a/include/libfreenect2/cuda_registration.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * This file is part of the OpenKinect Project. http://www.openkinect.org - * - * Copyright (c) 2014 individual OpenKinect contributors. See the CONTRIB file - * for details. - * - * This code is licensed to you under the terms of the Apache License, version - * 2.0, or, at your option, the terms of the GNU General Public License, - * version 2.0. See the APACHE20 and GPL2 files for the text of the licenses, - * or the following URLs: - * http://www.apache.org/licenses/LICENSE-2.0 - * http://www.gnu.org/licenses/gpl-2.0.txt - * - * If you redistribute this file in source form, modified or unmodified, you - * may: - * 1) Leave this header intact and distribute it under the same terms, - * accompanying it with the APACHE20 and GPL20 files, or - * 2) Delete the Apache 2.0 clause and accompany it with the GPL2 file, or - * 3) Delete the GPL v2 clause and accompany it with the APACHE20 file - * In all cases you must keep the copyright notice intact and include a copy - * of the CONTRIB file. - * - * Binary distributions must follow the binary distribution requirements of - * either License. - */ - -/** @file cuda_registration.h Class for merging depth and color frames using cuda. */ - -#ifndef CUDA_REGISTRATION_H_ -#define CUDA_REGISTRATION_H_ - -#include -#include -#include -#include - -#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT -#include -#include -#include -#include -#include - -namespace libfreenect2 -{ - -typedef thrust::tuple TupleXYZRGB; - -/** - * Frame whose data is allocated on device. - */ -class LIBFREENECT2_API CudaDeviceFrame: public Frame -{ -public: - /** Construct a new frame. - * @param width Width in pixel - * @param height Height in pixel - * @param bytes_per_pixel Bytes per pixel - */ - CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel); - virtual ~CudaDeviceFrame(); -private: - bool allocateMemory(); -}; - -class CudaRegistrationImpl; - -/** @defgroup registration Registration and Geometry - * Register depth to color, create point clouds. */ - -/** Combine frames of depth and color camera using gpus. @ingroup registration - * Right now this class uses a reverse engineered formula that uses factory - * preset extrinsic parameters the same way the Registration class does. - */ -class LIBFREENECT2_API CudaRegistration -{ -public: - /** - * @param depth_p Depth camera parameters. You can use the factory values, or use your own. - * @param rgb_p Color camera parameters. Probably use the factory values for now. - */ - CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p); - ~CudaRegistration(); - - /** Undistort and register a single depth point to color camera. - * @param dx Distorted depth coordinate x (pixel) - * @param dy Distorted depth coordinate y (pixel) - * @param dz Depth value (millimeter) - * @param[out] cx Undistorted color coordinate x (normalized) - * @param[out] cy Undistorted color coordinate y (normalized) - */ - void apply(int dx, int dy, float dz, float& cx, float &cy) const; - - /** Map color images onto depth images - * @param rgb Color image (1920x1080 BGRX) - * @param depth Depth image (512x424 float) - * @param[out] undistorted Undistorted depth image - * @param[out] registered Color image for the depth image (512x424) - * @param enable_filter Filter out pixels not visible to both cameras. - * @param[out] bigdepth If not `NULL`, return mapping of depth onto colors (1920x1082 float). **1082** not 1080, with a blank top and bottom row. - * @param[out] color_depth_map Index of mapped color pixel for each depth pixel (512x424). - */ - bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter = true, CudaDeviceFrame* bigdepth = 0, int* color_depth_map = 0) const; - - /** Undistort depth - * @param depth Depth image (512x424 float) - * @param[out] undistorted Undistorted depth image - */ - void undistortDepth(const Frame* depth, Frame* undistorted) const; - - /** Construct a 3-D point with color in a point cloud. - * @param undistorted Undistorted depth frame from apply(). - * @param registered Registered color frame from apply(). - * @param r Row (y) index in depth image. - * @param c Column (x) index in depth image. - * @param[out] x X coordinate of the 3-D point (meter). - * @param[out] y Y coordinate of the 3-D point (meter). - * @param[out] z Z coordinate of the 3-D point (meter). - * @param[out] rgb Color of the 3-D point (BGRX). To unpack the data, use - * - * const uint8_t *p = reinterpret_cast(&rgb); - * uint8_t b = p[0]; - * uint8_t g = p[1]; - * uint8_t r = p[2]; - */ - void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; - - /** Construct a 3-D point in a point cloud. - * @param undistorted Undistorted depth frame from apply(). - * @param r Row (y) index in depth image. - * @param c Column (x) index in depth image. - * @param[out] x X coordinate of the 3-D point (meter). - * @param[out] y Y coordinate of the 3-D point (meter). - * @param[out] z Z coordinate of the 3-D point (meter). - */ - void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; - - /** - * Construct a point cloud as thrust vector of XYZRGB data as tuples of in device memory, which can be used - * for further processing with CUDA. - * @param undistorted Undistorted depth frame from apply(). - * @param registered Registered color frame from apply(). - * @param[out] cloud_data coordinates of the 3-D point (meter) and color (BGRX). - * To unpack the color data, use - * const uint8_t *p = reinterpret_cast(&rgb); - * uint8_t b = p[0]; - * uint8_t g = p[1]; - * uint8_t r = p[2]; - */ - void getPointXYZRGB(const Frame* undistorted, const Frame* registered, thrust::device_vector& cloud_data) const; - -private: - CudaRegistrationImpl *impl_; - - /* Disable copy and assignment constructors */ - CudaRegistration(const CudaRegistration&); - CudaRegistration& operator=(const CudaRegistration&); -}; -#endif // LIBFREENECT2_WITH_CUDA_SUPPORT - -} /* namespace libfreenect2 */ -#endif /* REGISTRATION_H_ */ diff --git a/include/libfreenect2/frame_listener.hpp b/include/libfreenect2/frame_listener.hpp index 1b0bda134..5e048b248 100644 --- a/include/libfreenect2/frame_listener.hpp +++ b/include/libfreenect2/frame_listener.hpp @@ -88,6 +88,28 @@ class LIBFREENECT2_API Frame unsigned char* rawdata; ///< Unaligned start of #data. }; + +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT +/** + * Frame whose data is allocated on device. + */ +class LIBFREENECT2_API CudaDeviceFrame: public Frame +{ +public: + /** Construct a new frame. + * @param width Width in pixel + * @param height Height in pixel + * @param bytes_per_pixel Bytes per pixel + */ + CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_pixel); + virtual ~CudaDeviceFrame(); + bool toHostFrame(Frame& frame); +private: + bool allocateMemory(); +}; +#endif + + /** Callback interface to receive new frames. @ingroup frame * You can inherit from FrameListener and define your own listener. */ diff --git a/include/libfreenect2/registration.h b/include/libfreenect2/registration.h index 66389196c..1445e139d 100644 --- a/include/libfreenect2/registration.h +++ b/include/libfreenect2/registration.h @@ -122,5 +122,93 @@ class LIBFREENECT2_API Registration Registration& operator=(const Registration&); }; + + + +// Is it enough with cuda_depth_packet_processor's CudaFrame? +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT + +class CudaRegistrationImpl; + +/** @defgroup registration Registration and Geometry + * Register depth to color, create point clouds. */ + +/** Combine frames of depth and color camera using gpus. @ingroup registration + * Right now this class uses a reverse engineered formula that uses factory + * preset extrinsic parameters the same way the Registration class does. + */ +class LIBFREENECT2_API CudaRegistration +{ +public: + /** + * @param depth_p Depth camera parameters. You can use the factory values, or use your own. + * @param rgb_p Color camera parameters. Probably use the factory values for now. + */ + CudaRegistration(Freenect2Device::IrCameraParams depth_p, Freenect2Device::ColorCameraParams rgb_p); + ~CudaRegistration(); + + /** Undistort and register a single depth point to color camera. + * @param dx Distorted depth coordinate x (pixel) + * @param dy Distorted depth coordinate y (pixel) + * @param dz Depth value (millimeter) + * @param[out] cx Undistorted color coordinate x (normalized) + * @param[out] cy Undistorted color coordinate y (normalized) + */ + void apply(int dx, int dy, float dz, float& cx, float &cy) const; + + /** Map color images onto depth images + * @param rgb Color image (1920x1080 BGRX) + * @param depth Depth image (512x424 float) + * @param[out] undistorted Undistorted depth image + * @param[out] registered Color image for the depth image (512x424) + * @param enable_filter Filter out pixels not visible to both cameras. + * @param[out] bigdepth If not `NULL`, return mapping of depth onto colors (1920x1082 float). **1082** not 1080, with a blank top and bottom row. + * @param[out] color_depth_map Index of mapped color pixel for each depth pixel (512x424). + */ + bool apply(const Frame* rgb, const Frame* depth, CudaDeviceFrame* undistorted, CudaDeviceFrame* registered, const bool enable_filter = true, CudaDeviceFrame* bigdepth = 0, int* color_depth_map = 0) const; + + /** Undistort depth + * @param depth Depth image (512x424 float) + * @param[out] undistorted Undistorted depth image + */ + void undistortDepth(const Frame* depth, Frame* undistorted) const; + + /** Construct a 3-D point with color in a point cloud. + * @param undistorted Undistorted depth frame from apply(). + * @param registered Registered color frame from apply(). + * @param r Row (y) index in depth image. + * @param c Column (x) index in depth image. + * @param[out] x X coordinate of the 3-D point (meter). + * @param[out] y Y coordinate of the 3-D point (meter). + * @param[out] z Z coordinate of the 3-D point (meter). + * @param[out] rgb Color of the 3-D point (BGRX). To unpack the data, use + * + * const uint8_t *p = reinterpret_cast(&rgb); + * uint8_t b = p[0]; + * uint8_t g = p[1]; + * uint8_t r = p[2]; + */ + void getPointXYZRGB (const Frame* undistorted, const Frame* registered, int r, int c, float& x, float& y, float& z, float& rgb) const; + + /** Construct a 3-D point in a point cloud. + * @param undistorted Undistorted depth frame from apply(). + * @param r Row (y) index in depth image. + * @param c Column (x) index in depth image. + * @param[out] x X coordinate of the 3-D point (meter). + * @param[out] y Y coordinate of the 3-D point (meter). + * @param[out] z Z coordinate of the 3-D point (meter). + */ + void getPointXYZ (const Frame* undistorted, int r, int c, float& x, float& y, float& z) const; + +private: + CudaRegistrationImpl *impl_; + + /* Disable copy and assignment constructors */ + CudaRegistration(const CudaRegistration&); + CudaRegistration& operator=(const CudaRegistration&); +}; +#endif // LIBFREENECT2_WITH_CUDA_SUPPORT + + } /* namespace libfreenect2 */ #endif /* REGISTRATION_H_ */ diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index e0c8872ee..5d3e1b97b 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -26,7 +26,7 @@ /** @file Implementation of merging depth and color images using cuda. */ -#include +#include #include "libfreenect2/logging.h" #include @@ -300,6 +300,18 @@ bool CudaDeviceFrame::allocateMemory() return true; } +bool CudaDeviceFrame::toHostFrame(Frame& frame) +{ + size_t frame_size = frame.width * frame.height * frame.bytes_per_pixel; + + cudaMemcpyAsync(data, frame.data, frame_size, cudaMemcpyDeviceToHost); + + cudaDeviceSynchronize(); + + CHECK_CUDA(cudaGetLastError()); + return true; +} + /* * The information used here has been taken from libfreenect2::Registration source * code. From d634183b40cc861d18cb6f6662914acba815d155 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Thu, 30 Jan 2025 17:47:57 -0600 Subject: [PATCH 16/25] Debugging lines... --- examples/Protonect.cpp | 2 +- src/cuda_registration.cu | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/examples/Protonect.cpp b/examples/Protonect.cpp index eb59fc123..bf281c6ff 100644 --- a/examples/Protonect.cpp +++ b/examples/Protonect.cpp @@ -358,7 +358,7 @@ int main(int argc, char *argv[]) if(use_cuda_registration) { registration = NULL; - libfreenect2::CudaRegistration* cudaRegistration = new libfreenect2::CudaRegistration(dev->getIrCameraParams(), dev->getColorCameraParams()); + cudaRegistration = new libfreenect2::CudaRegistration(dev->getIrCameraParams(), dev->getColorCameraParams()); } #endif diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index 5d3e1b97b..1a36f10df 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -292,23 +292,43 @@ CudaDeviceFrame::~CudaDeviceFrame() bool CudaDeviceFrame::allocateMemory() { + LOG_ERROR << "Alloc " << sizeof(unsigned char) << std::endl; CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); + //CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel)); cudaDeviceSynchronize(); CHECK_CUDA(cudaGetLastError()); + LOG_ERROR << "Alloced " << sizeof(unsigned char) << std::endl; return true; } bool CudaDeviceFrame::toHostFrame(Frame& frame) { - size_t frame_size = frame.width * frame.height * frame.bytes_per_pixel; + LOG_ERROR << "Copy " << frame.bytes_per_pixel << " what happened to data?" << std::endl; + /*size_t frame_size = width * height * bytes_per_pixel * sizeof(unsigned char);*/ + size_t frame_size = width * height * bytes_per_pixel; cudaMemcpyAsync(data, frame.data, frame_size, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); CHECK_CUDA(cudaGetLastError()); + /* Fill with horizontal gray stripes */ + /* + for(int c = 0; c < 4; c++) + { + for(int i = 0; i < width; i++) + { + for(int j = 0; j < height; j++) + { + frame.data[(i * height + j) * sizeof(unsigned char) + + c * (height * width)] = 20 * c; + } + } + } + */ + LOG_ERROR << "Copied " << sizeof(unsigned char) << std::endl; return true; } From 9e70e6f9ff160965f9df57a380d73236f8ff1b51 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Fri, 31 Jan 2025 12:54:05 -0600 Subject: [PATCH 17/25] Debugging... --- src/cuda_registration.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index 1a36f10df..6a1272fd3 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -288,18 +288,18 @@ CudaDeviceFrame::CudaDeviceFrame(size_t width, size_t height, size_t bytes_per_p CudaDeviceFrame::~CudaDeviceFrame() { CALL_CUDA(cudaFree(data)); + data = NULL; } bool CudaDeviceFrame::allocateMemory() { - LOG_ERROR << "Alloc " << sizeof(unsigned char) << std::endl; - CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); - //CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel)); + //LOG_ERROR << "Alloc " << sizeof(unsigned char) << std::endl; + CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); // sizeof(unsigned char) is 1 cudaDeviceSynchronize(); CHECK_CUDA(cudaGetLastError()); - LOG_ERROR << "Alloced " << sizeof(unsigned char) << std::endl; + //LOG_ERROR << "Alloced " << sizeof(unsigned char) << std::endl; return true; } From 29e2b6ee482044c745fe73bbe75f43fbbbec61f0 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Tue, 4 Feb 2025 18:04:47 -0600 Subject: [PATCH 18/25] Bug gone. I had dst and src inverted in cudadMemcpyAsync. --- doc/packet_pipeline.svg | 366 +++++++++++++++++++++++++++++++++++++++ doc/sequence.svg | 39 ++++- src/cuda_registration.cu | 8 +- 3 files changed, 402 insertions(+), 11 deletions(-) create mode 100644 doc/packet_pipeline.svg diff --git a/doc/packet_pipeline.svg b/doc/packet_pipeline.svg new file mode 100644 index 000000000..582fe0ff2 --- /dev/null +++ b/doc/packet_pipeline.svg @@ -0,0 +1,366 @@ + + + + + + + + + + + + + + + + + + + + CudaPacketPipeline + PacketPipeline + PacketPipelineComponents *comp_; + virtual PacketParser *getRgbPacketParser() const;virtual PacketParser *getIrPacketParser() const;virtual RgbPacketProcessor *getRgbPacketProcessor() const;virtual DepthPacketProcessor *getDepthPacketProcessor() const; + + + + + CudaPacketPipeline(const int deviceId) + + + CudaPacketPipeline + CudaDepthPacketProcessor + + + + + + + ~PacketPipelineComponents();void initialize(RgbPacketProcessor *rgb, DepthPacketProcessor *depth); + PacketPipelineComponents + RgbPacketStreamParser *rgb_parser_;DepthPacketStreamParser *depth_parser_;RgbPacketProcessor *rgb_processor_;BaseRgbPacketProcessor *async_rgb_processor_;DepthPacketProcessor *depth_processor_;BaseDepthPacketProcessor *async_depth_processor_; + + + + + + diff --git a/doc/sequence.svg b/doc/sequence.svg index 6a03f6136..28b8a7970 100644 --- a/doc/sequence.svg +++ b/doc/sequence.svg @@ -24,9 +24,9 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="1.4734708" - inkscape:cx="436.72396" - inkscape:cy="563.29586" + inkscape:zoom="2.0838024" + inkscape:cx="375.99535" + inkscape:cy="126.21158" inkscape:window-width="1920" inkscape:window-height="1044" inkscape:window-x="1920" @@ -217,12 +217,12 @@ style="white-space:pre;shape-inside:url(#rect6525);display:inline;fill:#2ca02c;stroke:#2ca02c;stroke-width:2.51339" /> setColorFrameListener(&listener) + y="141.28749">setColorFrameListener(&listener) + freenect2.openDevice(serial, pipeline); + + diff --git a/src/cuda_registration.cu b/src/cuda_registration.cu index 6a1272fd3..da1c1df41 100644 --- a/src/cuda_registration.cu +++ b/src/cuda_registration.cu @@ -295,6 +295,8 @@ bool CudaDeviceFrame::allocateMemory() { //LOG_ERROR << "Alloc " << sizeof(unsigned char) << std::endl; CHECK_CUDA(cudaMalloc(&data, width * height * bytes_per_pixel * sizeof(unsigned char))); // sizeof(unsigned char) is 1 + //rawdata = reinterpret_cast(buffer);IMAGE_SIZE*sizeof(float); IMAGE_SIZE = width * height + //data = rawdata; cudaDeviceSynchronize(); @@ -305,11 +307,11 @@ bool CudaDeviceFrame::allocateMemory() bool CudaDeviceFrame::toHostFrame(Frame& frame) { - LOG_ERROR << "Copy " << frame.bytes_per_pixel << " what happened to data?" << std::endl; + //LOG_ERROR << "Copy " << frame.bytes_per_pixel << " what happened to data?" << std::endl; /*size_t frame_size = width * height * bytes_per_pixel * sizeof(unsigned char);*/ size_t frame_size = width * height * bytes_per_pixel; - cudaMemcpyAsync(data, frame.data, frame_size, cudaMemcpyDeviceToHost); + cudaMemcpyAsync(frame.data, data, frame_size, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); @@ -328,7 +330,7 @@ bool CudaDeviceFrame::toHostFrame(Frame& frame) } } */ - LOG_ERROR << "Copied " << sizeof(unsigned char) << std::endl; + //LOG_ERROR << "Copied " << sizeof(unsigned char) << std::endl; return true; } From 53c6156cb2ba262cab8121768cbedd4090fcc4e0 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Thu, 13 Feb 2025 14:29:21 -0600 Subject: [PATCH 19/25] Set CMKAE_CUDA_ARCHITECTURES --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 39bba8717..21a10ff9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -354,6 +354,9 @@ IF(ENABLE_CUDA) SET(LIBFREENECT2_WITH_CUDA_SUPPORT 1) SET(HAVE_CUDA yes) ENABLE_LANGUAGE(CUDA) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 52 75) + endif() #FIND_PACKAGE(CUDAToolkit) STRING(REPLACE "\\" "/" NVCUDASAMPLES_ROOT "$ENV{NVCUDASAMPLES_ROOT}") From 885b9df8c801c68f08202477cf9617c9b36c7248 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Thu, 13 Mar 2025 13:37:38 -0600 Subject: [PATCH 20/25] CUDA 50 for our laptop. --- CMakeLists.txt | 2 +- examples/Protonect.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21a10ff9b..5d0013271 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,7 +355,7 @@ IF(ENABLE_CUDA) SET(HAVE_CUDA yes) ENABLE_LANGUAGE(CUDA) if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 52 75) + set(CMAKE_CUDA_ARCHITECTURES 50 52 75) endif() #FIND_PACKAGE(CUDAToolkit) diff --git a/examples/Protonect.cpp b/examples/Protonect.cpp index bf281c6ff..ce3d10b85 100644 --- a/examples/Protonect.cpp +++ b/examples/Protonect.cpp @@ -389,14 +389,18 @@ int main(int argc, char *argv[]) if (enable_rgb && enable_depth) { /// [registration] +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT if(use_cuda_registration) { cudaRegistration->apply(rgb, depth, &device_undistorted, &device_registered); } else { +#endif registration->apply(rgb, depth, &undistorted, ®istered); +#ifdef LIBFREENECT2_WITH_CUDA_SUPPORT } +#endif /// [registration] } From 0ab35693e117c762ddbe16d671ebef40dc81842a Mon Sep 17 00:00:00 2001 From: veroarriola Date: Thu, 13 Mar 2025 15:12:21 -0600 Subject: [PATCH 21/25] Arquitectura para 2a laptop --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d0013271..709504fc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,7 +355,7 @@ IF(ENABLE_CUDA) SET(HAVE_CUDA yes) ENABLE_LANGUAGE(CUDA) if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 50 52 75) + set(CMAKE_CUDA_ARCHITECTURES 50 52 75 89) endif() #FIND_PACKAGE(CUDAToolkit) From 6b789020e2110c44c3204b0572b376faf3f62f95 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Wed, 19 Mar 2025 17:50:41 -0600 Subject: [PATCH 22/25] RgbPacketProcessor creation --- doc/packet_pipeline.svg | 80 +++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/doc/packet_pipeline.svg b/doc/packet_pipeline.svg index 582fe0ff2..501989f32 100644 --- a/doc/packet_pipeline.svg +++ b/doc/packet_pipeline.svg @@ -24,9 +24,9 @@ inkscape:deskcolor="#d1d1d1" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="1.4467452" - inkscape:cx="397.78946" - inkscape:cy="245.37838" + inkscape:zoom="2.0460067" + inkscape:cx="293.00979" + inkscape:cy="312.80445" inkscape:window-width="1920" inkscape:window-height="1044" inkscape:window-x="1920" @@ -258,7 +258,7 @@ y="88.370827" /> ~PacketPipelineComponents();void initialize(RgbPacketProcessor *rgb, DepthPacketProcessor *depth); + TurboJpegRgbPacketProcessor + TegraJpegRgbPacketProcessor + VaapiRgbPacketProcessor + + + + RgbPacketProcessor *getDefaultRgbPacketProcessor() From b1a66ec73abcd064bafe39eb6b3a7d60cb6367bb Mon Sep 17 00:00:00 2001 From: veroarriola Date: Wed, 26 Mar 2025 16:11:26 -0600 Subject: [PATCH 23/25] Build instructions for development. --- doc/README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 doc/README.md diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 000000000..45bcc3ba5 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,40 @@ +# Compile and execute development version + +## With CUDA +* Edit ```CMakeLists.txt``` to assign ```CMAKE_CUDA_ARCHITECTURES``` in accordance with NVidia card. + +``` +mkdir Programs && cd Programs +git clone https://github.com/NVIDIA/cuda-samples.git +export CUDA_SAMPLES_HOME=./cuda-samples + +cd ~/Programs/ +mkdir buildcudadev && cd buildcudadev +cmake .. -DCMAKE_CUDA_FLAGS=-I\ $CUDA_SAMPLES_HOME/Common -DCMAKE_INSTALL_PREFIX=$HOME/freenect2cudadev +``` + +To find the libraries: + +``` +export FREENECT_INSTALL_DIR=$HOME/freenect2cudadev +export FREENECT_COMPILE_DIR=$HOME/Programs/libfreenect2/buildcudadev +export LD_LIBRARY_PATH=$FREENECT_INSTALL_DIR/lib:$LD_LIBRARY_PATH +export PATH=$FREENECT_COMPILE_DIR/bin:$PATH +``` + +Test cuda buffer access with: + +``` +Protonect cudaccess +``` + + +## Permissions for Kinect + +``` +sudo cp ../platform/linux/udev/90-kinect2.rules /etc/udev/rules.d/ +``` + +Replug Kinect after copying file. + + From e93b15e69208a07c9306610d6b47b26803634f71 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Wed, 26 Mar 2025 16:40:15 -0600 Subject: [PATCH 24/25] Corrected error in directories. --- doc/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/README.md b/doc/README.md index 45bcc3ba5..0fa2381a6 100644 --- a/doc/README.md +++ b/doc/README.md @@ -4,11 +4,13 @@ * Edit ```CMakeLists.txt``` to assign ```CMAKE_CUDA_ARCHITECTURES``` in accordance with NVidia card. ``` +cd mkdir Programs && cd Programs git clone https://github.com/NVIDIA/cuda-samples.git -export CUDA_SAMPLES_HOME=./cuda-samples +export CUDA_SAMPLES_HOME=$HOME/Programs/cuda-samples -cd ~/Programs/ +git clone https://github.com/blackzafiro/libfreenect2.git +cd ~/Programs/libfreenect2 mkdir buildcudadev && cd buildcudadev cmake .. -DCMAKE_CUDA_FLAGS=-I\ $CUDA_SAMPLES_HOME/Common -DCMAKE_INSTALL_PREFIX=$HOME/freenect2cudadev ``` From 4a785b28a8c64e1429445f48f8e618422ed1e2b4 Mon Sep 17 00:00:00 2001 From: veroarriola Date: Tue, 1 Apr 2025 12:24:11 -0600 Subject: [PATCH 25/25] Indications to avoid VAAPI --- doc/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/README.md b/doc/README.md index 0fa2381a6..47c5049ab 100644 --- a/doc/README.md +++ b/doc/README.md @@ -15,6 +15,11 @@ mkdir buildcudadev && cd buildcudadev cmake .. -DCMAKE_CUDA_FLAGS=-I\ $CUDA_SAMPLES_HOME/Common -DCMAKE_INSTALL_PREFIX=$HOME/freenect2cudadev ``` +Para la Lenovo ideapad Y700 hay que apagar VAAPI: +``` +cmake .. -DENABLE_VAAPPI=OFF -DCMAKE_CUDA_FLAGS=-I\ $CUDA_SAMPLES_HOME/Common -DCMAKE_INSTALL_PREFIX=$HOME/freenect2cudadev +``` + To find the libraries: ```