From 4a9c0d5bfd77cd3cb384958941fcbc278e316678 Mon Sep 17 00:00:00 2001 From: Ingo Wald Date: Sat, 13 Dec 2025 13:57:14 -0700 Subject: [PATCH 1/2] - bugfix to shrinknigRayQuery: node sorting was wrongly disabled - added 'twoLevel::' namespace (and corresponding abstraction) to shrinking radius query. Twolevle traversal has addtl 'enterBlas()' and 'leaveBlas()' lambdas that can modify ray and bvh node ptr when reaching tlas leaves --- cuBQL/traversal/rayQueries.h | 290 +++++++++++++++++++++++++++++------ 1 file changed, 243 insertions(+), 47 deletions(-) diff --git a/cuBQL/traversal/rayQueries.h b/cuBQL/traversal/rayQueries.h index 1fccf42..2b9710b 100644 --- a/cuBQL/traversal/rayQueries.h +++ b/cuBQL/traversal/rayQueries.h @@ -9,13 +9,18 @@ #include "cuBQL/traversal/fixedBoxQuery.h" namespace cuBQL { - namespace fixedRayQuery { - // ****************************************************************** - // INTERFACE - // (which functions this header file provides) - // ****************************************************************** - + // ****************************************************************** + // INTERFACE + // (which functions this header file provides) + // ****************************************************************** + + /*! \namespace fixedRayQuery *Fixed* ray queries are queries on rays + for whom the query interval [ray.tMax,ray.tMax] can never change + during traversal. The per-prim/per-leaf lambdas can at any point + _terminate_ a traveral, but ordering child nodes is not required + because ordering shouldn't matter */ + namespace fixedRayQuery { template inline __cubql_both void forEachLeaf(const Lambda &lambdaToExecuteForEachCandidate, @@ -29,14 +34,14 @@ namespace cuBQL { cuBQL::bvh3f bvh, cuBQL::ray3f ray, bool dbg=false); - + /*! traverse BVH with given fixed-length, axis-aligned ray, and call lambda for each prim encounterd. - + Traversal is UNORDERED (meaning it will NOT try to traverse front-to-back) and FIXED-SHAPE (ray will not shrink during traversal). - + Lambda is expected to return CUBQL_{CONTINUE|TERMINATE}_TRAVERSAL */ template @@ -48,11 +53,11 @@ namespace cuBQL { /*! traverse BVH with given fixed-length, axis-aligned ray, and call lambda for each prim encounterd. - + Traversal is UNORDERED (meaning it will NOT try to traverse front-to-back) and FIXED-SHAPE (ray will not shrink during traversal). - + Lambda is expected to return CUBQL_{CONTINUE|TERMINATE}_TRAVERSAL */ template @@ -63,7 +68,15 @@ namespace cuBQL { bool dbg=false); } + /*! \namespace shrinkingRayQuery *Shrinking* ray queries are queries + where ray.tMax can shrink during traversal, so hits found in one + subtree can shrink ray.tmax such that other subtrees may then + later get skipped */ namespace shrinkingRayQuery { + + /*! single level BVH ray traversal, provided lambda covers what + happens when a ray wants to intersect a given prim within that + bvh */ template inline __cubql_both float forEachLeaf(const Lambda &lambdaToCallOnEachLeaf, @@ -71,15 +84,71 @@ namespace cuBQL { ray_t ray, bool dbg=false); + /*! single level BVH ray traversal, provided lambda covers what + happens when a ray wants to intersect a given prim within that + bvh */ template inline __cubql_both void forEachPrim(const Lambda &lambdaToExecuteForEachCandidate, bvh_t bvh, ray_t &ray, bool dbg=false); + + namespace twoLevel { + /*! two-level BVH ray traversal, where the BVH is made up of a + "TLAS" (top-level acceleration structure) that itself contains + objects with "BLAS"es (bottom-level acceleration + structures). One of the lambdas describes what happens when a + ray enters a leaf in a BLAS (just like in the single-level BVH + traversal; the other describes what happens when a ray needs + to transition from TLAS to BLAS. That second lambda can modify + the current ray's org and dir to transform it into the BLASes + coordinate frame where required (transforming back is not + required, cubql will save/restore the original ray as + required), and is supposed to return a new bvh_t to be + traversed in the BLAS */ + template + inline __cubql_both + void forEachLeaf(const EnterBlasLambda &enterBlas, + const LeaveBlasLambda &leaveBlas, + const ProcessLeafLambda &processLeaf, + bvh_t bvh, + /*! REFERENCE to a ray, so 'enterBlas()' can modify it */ + ray_t &ray, + bool dbg=false); + + /*! two-level BVH ray traversal, where the BVH is made up of a + "TLAS" (top-level acceleration structure) that itself contains + objects with "BLAS"es (bottom-level acceleration + structures). One of the lambdas describes what happens when a + ray enters a leaf in a BLAS (just like in the single-level BVH + traversal; the other describes what happens when a ray needs + to transition from TLAS to BLAS. That second lambda can modify + the current ray's org and dir to transform it into the BLASes + coordinate frame where required (transforming back is not + required, cubql will save/restore the original ray as + required), and is supposed to return a new bvh_t to be + traversed in the BLAS */ + template + inline __cubql_both + void forEachPrim(const EnterBlasLambda &enterBlas, + const LeaveBlasLambda &leaveBlas, + const IntersectPrimLambda &intersectPrim, + bvh_t bvh, + /*! REFERENCE to a ray, so 'enterBlas()' can modify it */ + ray_t &ray, + bool dbg=false); + } + } // ::cuBQL::shrinkingRayQuery - - + + // ============================================================================= // *** IMPLEMENTATION *** // ============================================================================= @@ -176,7 +245,6 @@ namespace cuBQL { // ------------------------------------------------------------------ // traverse until there's nothing left to traverse: // ------------------------------------------------------------------ - // if (dbg) dout << "fixedBoxQuery::traverse" << endl; while (true) { // ------------------------------------------------------------------ @@ -196,14 +264,6 @@ namespace cuBQL { bvh3f::node_t n1 = bvh.nodes[n1Idx]; bool o0 = rayIntersectsBox(ray,n0.bounds); bool o1 = rayIntersectsBox(ray,n1.bounds); - - // if (dbg) { - // dout << "at node " << node.offset << endl; - // dout << "w/ query box " << queryBox << endl; - // dout << " " << n0.bounds << " -> " << (int)o0 << endl; - // dout << " " << n1.bounds << " -> " << (int)o1 << endl; - // } - if (o0) { if (o1) { *stackPtr++ = n1.admin; @@ -221,15 +281,11 @@ namespace cuBQL { } } - // if (dbg) - // dout << "at leaf ofs " << (int)node.offset << " cnt " << node.count << endl; if (node.count != 0) { // we're at a valid leaf: call the lambda and see if that gave // us a enw, closer cull radius int leafResult = lambdaToCallOnEachLeaf(bvh.primIDs+node.offset,node.count); - // if (dbg) - // dout << "leaf returned " << leafResult << endl; if (leafResult == CUBQL_TERMINATE_TRAVERSAL) return; } @@ -237,7 +293,6 @@ namespace cuBQL { // pop next un-traversed node from stack, discarding any nodes // that are more distant than whatever query radius we now have // ------------------------------------------------------------------ - // if (dbg) dout << "rem stack depth " << (stackPtr-traversalStack) << endl; if (stackPtr == traversalStack) return; node = *--stackPtr; @@ -286,15 +341,14 @@ namespace cuBQL { typename node_t::Admin traversalStack[64], *stackPtr = traversalStack; typename node_t::Admin node = bvh.nodes[0].admin; - if (ray.direction.x == 0.f) ray.direction.x = T(1e-20); - if (ray.direction.y == 0.f) ray.direction.y = T(1e-20); - if (ray.direction.z == 0.f) ray.direction.z = T(1e-20); + if (ray.direction.x == (T)0) ray.direction.x = T(1e-20); + if (ray.direction.y == (T)0) ray.direction.y = T(1e-20); + if (ray.direction.z == (T)0) ray.direction.z = T(1e-20); vec_t rcp_dir = rcp(ray.direction); // ------------------------------------------------------------------ // traverse until there's nothing left to traverse: // ------------------------------------------------------------------ - // if (dbg) dout << "fixedBoxQuery::traverse" << endl; while (true) { // ------------------------------------------------------------------ @@ -303,7 +357,6 @@ namespace cuBQL { // at which we need to pop // ------------------------------------------------------------------ while (true) { - // if (dbg) printf("node %i.%i\n",(int)node.offset,(int)node.count); if (node.count != 0) // it's a boy! - seriously: this is not a inner node, step // out of down-travesal and let leaf code pop in. @@ -317,22 +370,10 @@ namespace cuBQL { bool o0 = rayIntersectsBox(node_t0,ray,rcp_dir,n0.bounds); bool o1 = rayIntersectsBox(node_t1,ray,rcp_dir,n1.bounds); - // if (dbg) { - // dout << "at node " << node.offset << endl; - // dout << "w/ query box " << queryBox << endl; - // dout << " " << n0.bounds << " -> " << (int)o0 << endl; - // dout << " " << n1.bounds << " -> " << (int)o1 << endl; - // } - if (o0) { if (o1) { -#if 1 - *stackPtr++ = n1.admin; - node = n0.admin; -#else *stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin; node = (node_t0 < node_t1) ? n0.admin : n1.admin; -#endif } else { node = n0.admin; } @@ -347,8 +388,6 @@ namespace cuBQL { } } - // if (dbg) - // dout << "at leaf ofs " << (int)node.offset << " cnt " << node.count << endl; if (node.count != 0) { // we're at a valid leaf: call the lambda and see if that gave // us a enw, closer cull radius @@ -359,7 +398,6 @@ namespace cuBQL { // pop next un-traversed node from stack, discarding any nodes // that are more distant than whatever query radius we now have // ------------------------------------------------------------------ - // if (dbg) dout << "rem stack depth " << (stackPtr-traversalStack) << endl; if (stackPtr == traversalStack) return ray.tMax; node = *--stackPtr; @@ -381,5 +419,163 @@ namespace cuBQL { }; shrinkingRayQuery::forEachLeaf(perLeaf,bvh,ray,dbg); } + + + + /*! two-level BVH ray traversal, where the BVH is made up of a + "TLAS" (top-level acceleration structure) that itself contains + objects with "BLAS"es (bottom-level acceleration + structures). One of the lambdas describes what happens when a + ray enters a leaf in a BLAS (just like in the single-level BVH + traversal; the other describes what happens when a ray needs + to transition from TLAS to BLAS. That second lambda can modify + the current ray's org and dir to transform it into the BLASes + coordinate frame where required (transforming back is not + required, cubql will save/restore the original ray as + required), and is supposed to return a new bvh_t to be + traversed in the BLAS */ + template + inline __cubql_both + void shrinkingRayQuery::twoLevel:: + forEachLeaf(const EnterBlasLambda &enterBlas, + const LeaveBlasLambda &leaveBlas, + const ProcessLeafLambda &processLeaf, + bvh_t bvh, + /*! REFERENCE to a ray, so 'enterBlas()' can modify it */ + ray_t &ray, + bool dbg) + { + using node_t = typename bvh_t::node_t; + using T = typename bvh_t::scalar_t; + struct StackEntry { + uint32_t idx; + }; + typename node_t::Admin + traversalStack[64], + *stackPtr = traversalStack, + *blasStackBase = nullptr; + typename node_t::Admin node = bvh.nodes[0].admin; + + node_t *tlasSavedNodePtr = 0; + if (ray.direction.x == (T)0) ray.direction.x = T(1e-20); + if (ray.direction.y == (T)0) ray.direction.y = T(1e-20); + if (ray.direction.z == (T)0) ray.direction.z = T(1e-20); + vec_t rcp_dir = rcp(ray.direction); + + // ------------------------------------------------------------------ + // traverse until there's nothing left to traverse: + // ------------------------------------------------------------------ + // if (dbg) dout << "fixedBoxQuery::traverse" << endl; + while (true) { + + // ------------------------------------------------------------------ + // traverse INNER nodes downward; breaking out if we either find + // a leaf within the current search radius, or found a dead-end + // at which we need to pop + // ------------------------------------------------------------------ + while (true) { + // if (dbg) printf("node %i.%i\n",(int)node.offset,(int)node.count); + if (node.count != 0) + // it's a boy! - seriously: this is not a inner node, step + // out of down-travesal and let leaf code pop in. + break; + + uint32_t n0Idx = (uint32_t)node.offset+0; + uint32_t n1Idx = (uint32_t)node.offset+1; + node_t n0 = bvh.nodes[n0Idx]; + node_t n1 = bvh.nodes[n1Idx]; + float node_t0 = 0.f, node_t1 = 0.f; + bool o0 = rayIntersectsBox(node_t0,ray,rcp_dir,n0.bounds); + bool o1 = rayIntersectsBox(node_t1,ray,rcp_dir,n1.bounds); + + if (o0) { + if (o1) { + *stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin; + node = (node_t0 < node_t1) ? n0.admin : n1.admin; + } else { + node = n0.admin; + } + } else { + if (o1) { + node = n1.admin; + } else { + // both children are too far away; this is a dead end + node.count = 0; + break; + } + } + } + + if (node.count != 0) { + if (blasStackBase == nullptr) { + // we are _not_ in a BLAS, yet - let's enter + tlasSavedNodePtr = bvh.nodes; + bvh_t blas = enterBlas(ray,/*instID:*/node.offset); + bvh.nodes = blas.nodes; + blasStackBase = stackPtr; + } else { + // we're at a valid leaf: call the lambda and see if that gave + // us a new, closer cull radius + ray.tMax + = processLeaf(bvh.primIDs+node.offset,node.count); + } + } + // ------------------------------------------------------------------ + // pop next un-traversed node from stack, discarding any nodes + // that are more distant than whatever query radius we now have + // ------------------------------------------------------------------ + if (stackPtr == blasStackBase) { + leaveBlas(ray); + blasStackBase = nullptr; + bvh.nodes = tlasSavedNodePtr; + } + if (stackPtr == traversalStack) + return;// ray.tMax; + node = *--stackPtr; + } + } + + /*! two-level BVH ray traversal, where the BVH is made up of a + "TLAS" (top-level acceleration structure) that itself contains + objects with "BLAS"es (bottom-level acceleration + structures). One of the lambdas describes what happens when a + ray enters a leaf in a BLAS (just like in the single-level BVH + traversal; the other describes what happens when a ray needs + to transition from TLAS to BLAS. That second lambda can modify + the current ray's org and dir to transform it into the BLASes + coordinate frame where required (transforming back is not + required, cubql will save/restore the original ray as + required), and is supposed to return a new bvh_t to be + traversed in the BLAS */ + template + inline __cubql_both + void shrinkingRayQuery::twoLevel:: + forEachPrim(const EnterBlasLambda &enterBlas, + const LeaveBlasLambda &leaveBlas, + const IntersectPrimLambda &intersectPrim, + bvh_t bvh, + /*! REFERENCE to a ray, so 'enterBlas()' can modify it */ + ray_t &ray, + bool dbg) + { + auto perLeaf = [dbg,bvh,&ray, + enterBlas, + leaveBlas, + intersectPrim] + (const uint32_t *leaf, int count) { + for (int i=0;i Date: Sun, 14 Dec 2025 17:44:07 -0700 Subject: [PATCH 2/2] - added shrinkingRayQuery::twoLevel:: traversals for binarybvh. twoLevel variants get additional enterBlas() and leaveBlas() lambdas so traversal can traverse two nested BVHes at the same time. - unified Triangle3f and triangle_t<>; former is now just triangle_t. Same for TriangleIntersection and TriangleIntersection::compute() - various smaller clanups and warning fixes --- CMakeLists.txt | 4 - cuBQL/math/Ray.h | 6 +- cuBQL/math/affine.h | 2 +- cuBQL/queries/triangleData/Triangle.h | 84 ++++++++----- .../math/rayTriangleIntersections.h | 114 ++++++++--------- cuBQL/traversal/rayQueries.h | 115 +++++++++++++----- 6 files changed, 199 insertions(+), 126 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ae424ec..1ffa826 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,10 +133,6 @@ endif() message("#cuBQL: compiling with CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") add_subdirectory(cuBQL) -option(CUBQL_ENABLE_TESTING "Enable Testing?" OFF) if (NOT CUBQL_IS_SUBPROJECT) add_subdirectory(samples) - if (CUBQL_ENABLE_TESTING) - add_subdirectory(testing) - endif() endif() diff --git a/cuBQL/math/Ray.h b/cuBQL/math/Ray.h index 2ea8dd2..ae77c86 100644 --- a/cuBQL/math/Ray.h +++ b/cuBQL/math/Ray.h @@ -26,7 +26,7 @@ namespace cuBQL { using ray3f = ray_t; using ray3d = ray_t; - using Ray = ray_t; + using Ray = ray_t; template struct AxisAlignedRay { @@ -44,9 +44,9 @@ namespace cuBQL { inline __cubql_both bool rayIntersectsBox(ray_t ray, box_t box); - // ============================================================================= + // ======================================================================== // *** IMPLEMENTATION *** - // ============================================================================= + // ======================================================================== template inline __cubql_both ray_t::ray_t(typename ray_t::vec3 org, diff --git a/cuBQL/math/affine.h b/cuBQL/math/affine.h index e649039..1a24ca5 100644 --- a/cuBQL/math/affine.h +++ b/cuBQL/math/affine.h @@ -30,7 +30,7 @@ namespace cuBQL { p(ZeroTy()) {} - inline __cubql_both AffineSpaceT(const AffineSpaceT &other) = default; + inline AffineSpaceT(const AffineSpaceT &other) = default; inline __cubql_both AffineSpaceT(const L &other) { diff --git a/cuBQL/queries/triangleData/Triangle.h b/cuBQL/queries/triangleData/Triangle.h index 4fced27..c8fb104 100644 --- a/cuBQL/queries/triangleData/Triangle.h +++ b/cuBQL/queries/triangleData/Triangle.h @@ -11,24 +11,41 @@ namespace cuBQL { - // ============================================================================= + // ========================================================================= // *** INTERFACE *** - // ============================================================================= + // ========================================================================= - /*! a simple triangle consisting of three vertices. In order to not - overload this class with too many functions the actual - operations on triangles - such as intersectin with a ray, - computing distance to a point, etc - will be defined in the - respective queries */ - struct Triangle { - /*! returns an axis aligned bounding box enclosing this triangle */ - inline __cubql_both box3f bounds() const; - inline __cubql_both vec3f sample(float u, float v) const; - inline __cubql_both vec3f normal() const; + // /*! a simple triangle consisting of three vertices. In order to not + // overload this class with too many functions the actual + // operations on triangles - such as intersectin with a ray, + // computing distance to a point, etc - will be defined in the + // respective queries */ + // struct Triangle { + // /*! returns an axis aligned bounding box enclosing this triangle */ + // inline __cubql_both box3f bounds() const; + // inline __cubql_both vec3f sample(float u, float v) const; + // inline __cubql_both vec3f normal() const; - vec3f a, b, c; + // vec3f a, b, c; + // }; + + template + struct triangle_t + { + using vec3 = vec_t; + using box3 = box_t; + + inline __cubql_both box3 bounds() const; + inline __cubql_both vec3 sample(float u, float v) const; + inline __cubql_both vec3 normal() const; + + vec3 a; + vec3 b; + vec3 c; }; + using Triangle = triangle_t; + /*! a typical triangle mesh, with array of vertices and indices. This class will NOT do any allocation/deallocation, not use smart pointers - it's just a 'view' on what whoever else @@ -51,18 +68,10 @@ namespace cuBQL { int numIndices; }; - template - struct triangle_t - { - using vec3 = vec_t; - vec3 a; - vec3 b; - vec3 c; - }; - // ============================================================================= + // ======================================================================== // *** IMPLEMENTATION *** - // ============================================================================= + // ======================================================================== // ---------------------- TriangleMesh ---------------------- inline __cubql_both Triangle TriangleMesh::getTriangle(int i) const @@ -72,23 +81,38 @@ namespace cuBQL { } // ---------------------- Triangle ---------------------- - inline __cubql_both vec3f Triangle::normal() const + template + inline __cubql_both vec_t triangle_t::normal() const { return cross(b-a,c-a); } - inline __cubql_both box3f Triangle::bounds() const - { return box3f().including(a).including(b).including(c); } + template + inline __cubql_both box_t triangle_t::bounds() const + { return box_t().including(a).including(b).including(c); } - inline __cubql_both float area(Triangle tri) + template + inline __cubql_both float area(triangle_t tri) { return length(cross(tri.b-tri.a,tri.c-tri.a)); } - inline __cubql_both vec3f Triangle::sample(float u, float v) const + template + inline __cubql_both vec_t + triangle_t::sample(float u, float v) const { if (u+v >= 1.f) { u = 1.f-u; v = 1.f-v; } return (1.f-u-v)*a + u * b + v * c; } + // inline __cubql_both vec3f Triangle::sample(float u, float v) const + // { + // if (u+v >= 1.f) { u = 1.f-u; v = 1.f-v; } + // return (1.f-u-v)*a + u * b + v * c; + // } - inline __cubql_both dbgout operator<<(dbgout o, const Triangle &triangle) - { o << "{" << triangle.a << "," << triangle.b << "," << triangle.c << "}"; return o; } + template + inline __cubql_both + dbgout operator<<(dbgout o, const triangle_t &triangle) + { + o << "{" << triangle.a << "," << triangle.b << "," << triangle.c << "}"; + return o; + } } // ::cuBQL diff --git a/cuBQL/queries/triangleData/math/rayTriangleIntersections.h b/cuBQL/queries/triangleData/math/rayTriangleIntersections.h index 0f49e6f..863e4d9 100644 --- a/cuBQL/queries/triangleData/math/rayTriangleIntersections.h +++ b/cuBQL/queries/triangleData/math/rayTriangleIntersections.h @@ -8,45 +8,49 @@ namespace cuBQL { - // ============================================================================= + // ======================================================================== // *** INTERFACE *** - // ============================================================================= + // ======================================================================== - struct RayTriangleIntersection { - vec3f N; - float t,u,v; + // struct RayTriangleIntersection { + // vec3f N; + // float t,u,v; - inline __cubql_both bool compute(Ray ray, Triangle tri); - }; + // inline __cubql_both bool compute(Ray ray, Triangle tri); + // }; template struct RayTriangleIntersection_t { using vec3 = vec_t; T t=0,u=0,v=0; + vec3 N; inline __cubql_both bool compute(const ray_t &ray, - const triangle_t &tri); + const triangle_t &tri, + bool dbg=false); }; + using RayTriangleIntersection = RayTriangleIntersection_t; - // ============================================================================= + // ======================================================================== // *** IMPLEMENTATION *** - // ============================================================================= + // ======================================================================== template inline __cubql_both bool RayTriangleIntersection_t::compute(const ray_t &ray, - const triangle_t &tri) + const triangle_t &tri, + bool dbg) { using vec3 = vec_t; const vec3 v0(tri.a); const vec3 v1(tri.b); const vec3 v2(tri.c); - + const vec3 e1 = v1-v0; const vec3 e2 = v2-v0; - vec3 N = cross(e1,e2); + N = cross(e1,e2); if (N == vec3(T(0))) return false; @@ -59,7 +63,7 @@ namespace cuBQL { // t*dot(d,N) = -dot(o-v0,N) // t = -dot(o-v0,N)/dot(d,N) t = -dot(ray.origin-v0,N)/dot(ray.direction,N); - if (t < ray.tMin || t > ray.tMax) return false; + if (t <= ray.tMin || t >= ray.tMax) return false; vec3 P = (ray.origin - v0) + t*ray.direction; @@ -82,7 +86,7 @@ namespace cuBQL { // (P-v0) = [e1,e2]*(u,v,h) if (det(e1u,e1v,e2u,e2v) == T(0)) return false; -#if 1 +#if 0 T den = det(e1u,e2u,e1v,e2v); T sign = den < T(0) ? T(-1):T(1); den *= sign; @@ -103,60 +107,50 @@ namespace cuBQL { return true; } - inline __cubql_both - bool RayTriangleIntersection::compute(Ray ray, Triangle tri) - { - const vec3f v0 = tri.a; - const vec3f v1 = tri.b; - const vec3f v2 = tri.c; + // inline __cubql_both + // bool RayTriangleIntersection::compute(Ray ray, Triangle tri) + // { + // const vec3f v0 = tri.a; + // const vec3f v1 = tri.b; + // const vec3f v2 = tri.c; - const vec3f e1 = v1-v0; - const vec3f e2 = v2-v0; - - N = cross(e1,e2); - if (N == vec3f(0.f)) return false; + // const vec3f e1 = v1-v0; + // const vec3f e2 = v2-v0; - // N = normalize(N); - if (fabsf(dot(ray.direction,N)) < 1e-12f) return false; + // vec3f N = cross(e1,e2); + // if (fabsf(dot(ray.direction,N)) < 1e-12f) return false; - // P = o+td - // dot(P-v0,N) = 0 - // dot(o+td-v0,N) = 0 - // dot(td,N)+dot(o-v0,N)=0 - // t*dot(d,N) = -dot(o-v0,N) - // t = -dot(o-v0,N)/dot(d,N) - t = -dot(ray.origin-v0,N)/dot(ray.direction,N); + // t = -dot(ray.origin-v0,N)/dot(ray.direction,N); - if (t < ray.tMin || t > ray.tMax) return false; + // if (t <= 0.f || t >= ray.tMax) return false; - vec3f P = (ray.origin - v0) + t*ray.direction; + // vec3f P = ray.origin - v0 + t*ray.direction; - float e1u,e2u,Pu; - float e1v,e2v,Pv; - if (fabsf(N.x) >= max(fabsf(N.y),fabsf(N.z))) { - e1u = e1.y; e2u = e2.y; Pu = P.y; - e1v = e1.z; e2v = e2.z; Pv = P.z; - } else if (fabsf(N.y) > fabsf(N.z)) { - e1u = e1.x; e2u = e2.x; Pu = P.x; - e1v = e1.z; e2v = e2.z; Pv = P.z; - } else { - e1u = e1.x; e2u = e2.x; Pu = P.x; - e1v = e1.y; e2v = e2.y; Pv = P.y; - } - auto det = [](float a, float b, float c, float d) -> float - { return a*d - c*b; }; + // float e1u,e2u,Pu; + // float e1v,e2v,Pv; + // if (fabsf(N.x) >= max(fabsf(N.y),fabsf(N.z))) { + // e1u = e1.y; e2u = e2.y; Pu = P.y; + // e1v = e1.z; e2v = e2.z; Pv = P.z; + // } else if (fabsf(N.y) > fabsf(N.z)) { + // e1u = e1.x; e2u = e2.x; Pu = P.x; + // e1v = e1.z; e2v = e2.z; Pv = P.z; + // } else { + // e1u = e1.x; e2u = e2.x; Pu = P.x; + // e1v = e1.y; e2v = e2.y; Pv = P.y; + // } + // auto det = [](float a, float b, float c, float d) -> float + // { return a*d - c*b; }; - // P = v0 + u * e1 + v * e2 + h * N - // (P-v0) = [e1,e2]*(u,v,h) - if (det(e1u,e1v,e2u,e2v) == 0.f) return false; + // // P = v0 + u * e1 + v * e2 + h * N + // // (P-v0) = [e1,e2]*(u,v,h) + // if (det(e1u,e1v,e2u,e2v) == 0.f) return false; - u = det(Pu,e2u,Pv,e2v)/det(e1u,e2u,e1v,e2v); - v = det(e1u,Pu,e1v,Pv)/det(e1u,e2u,e1v,e2v); - - if ((u < 0.f) || (v < 0.f) || ((u+v) > 1.f)) return false; + // u = det(Pu,e2u,Pv,e2v)/det(e1u,e2u,e1v,e2v); + // v = det(e1u,Pu,e1v,Pv)/det(e1u,e2u,e1v,e2v); + // if ((u < 0.f) || (v < 0.f) || ((u+v) >= 1.f)) return false; - return true; - } + // return true; + // } diff --git a/cuBQL/traversal/rayQueries.h b/cuBQL/traversal/rayQueries.h index 2b9710b..9ee0067 100644 --- a/cuBQL/traversal/rayQueries.h +++ b/cuBQL/traversal/rayQueries.h @@ -149,9 +149,9 @@ namespace cuBQL { } // ::cuBQL::shrinkingRayQuery - // ============================================================================= + // ========================================================================= // *** IMPLEMENTATION *** - // ============================================================================= + // ========================================================================= template inline __cubql_both @@ -453,13 +453,16 @@ namespace cuBQL { struct StackEntry { uint32_t idx; }; + enum { STACK_DEPTH=128 }; typename node_t::Admin - traversalStack[64], + traversalStack[STACK_DEPTH], *stackPtr = traversalStack, *blasStackBase = nullptr; typename node_t::Admin node = bvh.nodes[0].admin; - node_t *tlasSavedNodePtr = 0; + node_t *tlasSavedNodePtr = 0; + uint32_t *tlasSavedPrimIDs = 0; + vec3f saved_dir, saved_org; if (ray.direction.x == (T)0) ray.direction.x = T(1e-20); if (ray.direction.y == (T)0) ray.direction.y = T(1e-20); @@ -478,11 +481,47 @@ namespace cuBQL { // at which we need to pop // ------------------------------------------------------------------ while (true) { - // if (dbg) printf("node %i.%i\n",(int)node.offset,(int)node.count); - if (node.count != 0) - // it's a boy! - seriously: this is not a inner node, step - // out of down-travesal and let leaf code pop in. - break; + if (dbg) printf("node %i.%i\n",(int)node.offset,(int)node.count); + if (node.count != 0) { + // it's a boy! - seriously: this is not a inner node; so + // we're either at a final leaf, or at an instance node + if (blasStackBase != nullptr) + // it's a real leaf, in a blas; break out here and let + // leaf code trigger. + break; + // it's not a real leaf, so this must be a instance node + tlasSavedNodePtr = bvh.nodes; + tlasSavedPrimIDs = bvh.primIDs; + if (node.count != 1) + printf("TWO-LEVEL BVH MUST BE BUILT WITH 1 PRIM PER LEAF!\n"); + if (dbg) + printf("inner-leaf primIDs %p ofs %i count %i\n", + bvh.primIDs, + (int)node.offset, + (int)node.count); + + int instID + = bvh.primIDs + ? bvh.primIDs[node.offset] + : node.offset; + + saved_dir = ray.direction; + saved_org = ray.origin; + bvh_t blas; + ray_t transformed_ray = ray; + enterBlas(transformed_ray,blas,instID); + ray.origin = transformed_ray.origin; + ray.direction = transformed_ray.direction; + rcp_dir = rcp(ray.direction); + bvh.nodes = blas.nodes; + bvh.primIDs = blas.primIDs; + blasStackBase = stackPtr; + node = bvh.nodes[0].admin; + // now check if those blas root node is _also_ a leaf: + if (node.count != 0) + break; + if (dbg) printf("new node %i.%i\n",(int)node.offset,(int)node.count); + } uint32_t n0Idx = (uint32_t)node.offset+0; uint32_t n1Idx = (uint32_t)node.offset+1; @@ -492,8 +531,16 @@ namespace cuBQL { bool o0 = rayIntersectsBox(node_t0,ray,rcp_dir,n0.bounds); bool o1 = rayIntersectsBox(node_t1,ray,rcp_dir,n1.bounds); + if (dbg) + printf("children L hit %i dist %f R hit %i dist %f\n", + int(o0),node_t0, + int(o1),node_t1); if (o0) { if (o1) { + if (stackPtr-traversalStack >= STACK_DEPTH) { + return; + } + *stackPtr++ = (node_t0 < node_t1) ? n1.admin : n0.admin; node = (node_t0 < node_t1) ? n0.admin : n1.admin; } else { @@ -510,28 +557,27 @@ namespace cuBQL { } } - if (node.count != 0) { - if (blasStackBase == nullptr) { - // we are _not_ in a BLAS, yet - let's enter - tlasSavedNodePtr = bvh.nodes; - bvh_t blas = enterBlas(ray,/*instID:*/node.offset); - bvh.nodes = blas.nodes; - blasStackBase = stackPtr; - } else { - // we're at a valid leaf: call the lambda and see if that gave - // us a new, closer cull radius - ray.tMax - = processLeaf(bvh.primIDs+node.offset,node.count); - } + if (node.count != 0 && blasStackBase != nullptr) { + // we're at a valid leaf: call the lambda and see if that gave + // us a new, closer cull radius + if (dbg) + printf("trav leaf-leaf primIDs %p offset %i count %i\n", + bvh.primIDs,(int)node.offset,(int)node.count); + ray.tMax + = processLeaf(bvh.primIDs,(int)node.offset,(int)node.count); } // ------------------------------------------------------------------ // pop next un-traversed node from stack, discarding any nodes // that are more distant than whatever query radius we now have // ------------------------------------------------------------------ if (stackPtr == blasStackBase) { - leaveBlas(ray); + leaveBlas(); + ray.direction = saved_dir; + ray.origin = saved_org; + rcp_dir = rcp(ray.direction); blasStackBase = nullptr; - bvh.nodes = tlasSavedNodePtr; + bvh.nodes = tlasSavedNodePtr; + bvh.primIDs = tlasSavedPrimIDs; } if (stackPtr == traversalStack) return;// ray.tMax; @@ -565,13 +611,26 @@ namespace cuBQL { ray_t &ray, bool dbg) { - auto perLeaf = [dbg,bvh,&ray, + auto perLeaf = [dbg,&bvh,&ray, enterBlas, leaveBlas, intersectPrim] - (const uint32_t *leaf, int count) { - for (int i=0;i