Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,7 @@
],
extra_compile_args={
"cxx": cxx_flags,
"nvcc": nvcc_flags + [
# The following definitions must be undefined
# since we need half-precision operation.
"nvcc": nvcc_flags if IS_HIP else nvcc_flags + [
"--extended-lambda",
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
Expand Down
6 changes: 1 addition & 5 deletions src/atlas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,7 @@ static void get_chart_connectivity(
));
CUDA_CHECK(cudaFree(cu_raw_lengths));

#if CUDART_VERSION >= 12090
auto reduce_op = ::cuda::std::plus();
#else
auto reduce_op = cub::Sum();
#endif
auto reduce_op = cub::Sum();


// 1.3 Reduce By Key (Aggregate duplicate chart pairs by summing lengths)
Expand Down
11 changes: 11 additions & 0 deletions src/clean_up.cu
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,24 @@ static __global__ void select_first_in_each_group_kernel(
}


#if defined(__HIP_PLATFORM_AMD__)
#include <rocprim/types/tuple.hpp>
struct int3_decomposer
{
__host__ __device__ ::rocprim::tuple<int&, int&, int&> operator()(int3& key) const
{
return ::rocprim::tuple<int&, int&, int&>{key.x, key.y, key.z};
}
};
#else
struct int3_decomposer
{
__host__ __device__ ::cuda::std::tuple<int&, int&, int&> operator()(int3& key) const
{
return {key.x, key.y, key.z};
}
};
#endif


void CuMesh::remove_duplicate_faces() {
Expand Down
108 changes: 54 additions & 54 deletions src/dtypes.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,24 @@ namespace cumesh {
struct __align__(16) Vec3f {
float x, y, z;

__device__ __forceinline__ Vec3f();
__device__ __forceinline__ Vec3f(float x, float y, float z);
__device__ __forceinline__ Vec3f(float3 v);
__device__ __forceinline__ Vec3f operator+(const Vec3f& o) const;
__device__ __forceinline__ Vec3f& operator+=(const Vec3f& o);
__device__ __forceinline__ Vec3f operator-(const Vec3f& o) const;
__device__ __forceinline__ Vec3f& operator-=(const Vec3f& o);
__device__ __forceinline__ Vec3f operator*(float s) const;
__device__ __forceinline__ Vec3f& operator*=(float s);
__device__ __forceinline__ Vec3f operator/(float s) const;
__device__ __forceinline__ Vec3f& operator/=(float s);
__device__ __forceinline__ float dot(const Vec3f& o) const;
__device__ __forceinline__ float norm() const;
__device__ __forceinline__ float norm2() const;
__device__ __forceinline__ Vec3f normalized() const;
__device__ __forceinline__ void normalize();
__device__ __forceinline__ Vec3f cross(const Vec3f& o) const;
__device__ __forceinline__ Vec3f slerp(const Vec3f& o, float t) const;
__host__ __device__ __forceinline__ Vec3f();
__host__ __device__ __forceinline__ Vec3f(float x, float y, float z);
__host__ __device__ __forceinline__ Vec3f(float3 v);
__host__ __device__ __forceinline__ Vec3f operator+(const Vec3f& o) const;
__host__ __device__ __forceinline__ Vec3f& operator+=(const Vec3f& o);
__host__ __device__ __forceinline__ Vec3f operator-(const Vec3f& o) const;
__host__ __device__ __forceinline__ Vec3f& operator-=(const Vec3f& o);
__host__ __device__ __forceinline__ Vec3f operator*(float s) const;
__host__ __device__ __forceinline__ Vec3f& operator*=(float s);
__host__ __device__ __forceinline__ Vec3f operator/(float s) const;
__host__ __device__ __forceinline__ Vec3f& operator/=(float s);
__host__ __device__ __forceinline__ float dot(const Vec3f& o) const;
__host__ __device__ __forceinline__ float norm() const;
__host__ __device__ __forceinline__ float norm2() const;
__host__ __device__ __forceinline__ Vec3f normalized() const;
__host__ __device__ __forceinline__ void normalize();
__host__ __device__ __forceinline__ Vec3f cross(const Vec3f& o) const;
__host__ __device__ __forceinline__ Vec3f slerp(const Vec3f& o, float t) const;
};


Expand All @@ -43,124 +43,124 @@ struct __align__(16) QEM
// e = [ 00, 01, 02, 03, 11, 12, 13, 22, 23, 33 ]
float e[10];

__device__ __forceinline__ QEM();
__device__ __forceinline__ QEM operator+(const QEM& o) const;
__device__ __forceinline__ QEM& operator+=(const QEM& o);
__device__ __forceinline__ QEM operator-(const QEM& o) const;
__device__ __forceinline__ QEM& operator-=(const QEM& o);
__device__ __forceinline__ void zero();
__device__ __forceinline__ void add_plane(float4 p);
__device__ __forceinline__ float evaluate(const Vec3f& p) const;
__device__ __forceinline__ bool solve_optimal(float3 &out, float &err) const;
__host__ __device__ __forceinline__ QEM();
__host__ __device__ __forceinline__ QEM operator+(const QEM& o) const;
__host__ __device__ __forceinline__ QEM& operator+=(const QEM& o);
__host__ __device__ __forceinline__ QEM operator-(const QEM& o) const;
__host__ __device__ __forceinline__ QEM& operator-=(const QEM& o);
__host__ __device__ __forceinline__ void zero();
__host__ __device__ __forceinline__ void add_plane(float4 p);
__host__ __device__ __forceinline__ float evaluate(const Vec3f& p) const;
__host__ __device__ __forceinline__ bool solve_optimal(float3 &out, float &err) const;
};


__device__ __forceinline__ Vec3f::Vec3f() {
__host__ __device__ __forceinline__ Vec3f::Vec3f() {
x = 0.0f;
y = 0.0f;
z = 0.0f;
}

__device__ __forceinline__ Vec3f::Vec3f(float x, float y, float z) {
__host__ __device__ __forceinline__ Vec3f::Vec3f(float x, float y, float z) {
this->x = x;
this->y = y;
this->z = z;
}

__device__ __forceinline__ Vec3f::Vec3f(float3 v) {
__host__ __device__ __forceinline__ Vec3f::Vec3f(float3 v) {
x = v.x;
y = v.y;
z = v.z;
}


__device__ __forceinline__ Vec3f Vec3f::operator+(const Vec3f& o) const {
__host__ __device__ __forceinline__ Vec3f Vec3f::operator+(const Vec3f& o) const {
return Vec3f(x + o.x, y + o.y, z + o.z);
}


__device__ __forceinline__ Vec3f& Vec3f::operator+=(const Vec3f& o) {
__host__ __device__ __forceinline__ Vec3f& Vec3f::operator+=(const Vec3f& o) {
x += o.x;
y += o.y;
z += o.z;
return *this;
}


__device__ __forceinline__ Vec3f Vec3f::operator-(const Vec3f& o) const {
__host__ __device__ __forceinline__ Vec3f Vec3f::operator-(const Vec3f& o) const {
return Vec3f(x - o.x, y - o.y, z - o.z);
}


__device__ __forceinline__ Vec3f& Vec3f::operator-=(const Vec3f& o) {
__host__ __device__ __forceinline__ Vec3f& Vec3f::operator-=(const Vec3f& o) {
x -= o.x;
y -= o.y;
z -= o.z;
return *this;
}


__device__ __forceinline__ Vec3f Vec3f::operator*(float s) const {
__host__ __device__ __forceinline__ Vec3f Vec3f::operator*(float s) const {
return Vec3f(x * s, y * s, z * s);
}


__device__ __forceinline__ Vec3f& Vec3f::operator*=(float s) {
__host__ __device__ __forceinline__ Vec3f& Vec3f::operator*=(float s) {
x *= s;
y *= s;
z *= s;
return *this;
}


__device__ __forceinline__ Vec3f Vec3f::operator/(float s) const {
__host__ __device__ __forceinline__ Vec3f Vec3f::operator/(float s) const {
return Vec3f(x / s, y / s, z / s);
}


__device__ __forceinline__ Vec3f& Vec3f::operator/=(float s) {
__host__ __device__ __forceinline__ Vec3f& Vec3f::operator/=(float s) {
x /= s;
y /= s;
z /= s;
return *this;
}


__device__ __forceinline__ float Vec3f::dot(const Vec3f& o) const {
__host__ __device__ __forceinline__ float Vec3f::dot(const Vec3f& o) const {
return x * o.x + y * o.y + z * o.z;
}


__device__ __forceinline__ float Vec3f::norm() const {
__host__ __device__ __forceinline__ float Vec3f::norm() const {
return sqrtf(x * x + y * y + z * z);
}


__device__ __forceinline__ float Vec3f::norm2() const {
__host__ __device__ __forceinline__ float Vec3f::norm2() const {
return x * x + y * y + z * z;
}


__device__ __forceinline__ Vec3f Vec3f::normalized() const {
__host__ __device__ __forceinline__ Vec3f Vec3f::normalized() const {
float inv_norm = rsqrtf(x * x + y * y + z * z);
return Vec3f(x * inv_norm, y * inv_norm, z * inv_norm);
}


__device__ __forceinline__ void Vec3f::normalize() {
__host__ __device__ __forceinline__ void Vec3f::normalize() {
float inv_norm = rsqrtf(x * x + y * y + z * z);
x *= inv_norm;
y *= inv_norm;
z *= inv_norm;
}


__device__ __forceinline__ Vec3f Vec3f::cross(const Vec3f& o) const {
__host__ __device__ __forceinline__ Vec3f Vec3f::cross(const Vec3f& o) const {
return Vec3f(y * o.z - z * o.y, z * o.x - x * o.z, x * o.y - y * o.x);
}


__device__ __forceinline__ Vec3f Vec3f::slerp(const Vec3f& o, float t) const {
__host__ __device__ __forceinline__ Vec3f Vec3f::slerp(const Vec3f& o, float t) const {
float dot_prod = this->dot(o);
dot_prod = fmaxf(fminf(dot_prod, 1.0f), -1.0f); // Clamp to [-1, 1]
float theta = acosf(dot_prod) * t;
Expand All @@ -169,48 +169,48 @@ __device__ __forceinline__ Vec3f Vec3f::slerp(const Vec3f& o, float t) const {
}


__device__ __forceinline__ QEM::QEM() {
__host__ __device__ __forceinline__ QEM::QEM() {
zero();
}


__device__ __forceinline__ QEM QEM::operator+(const QEM& o) const {
__host__ __device__ __forceinline__ QEM QEM::operator+(const QEM& o) const {
QEM res;
#pragma unroll
for (int i = 0; i < 10; ++i) res.e[i] = e[i] + o.e[i];
return res;
}


__device__ __forceinline__ QEM& QEM::operator+=(const QEM& o) {
__host__ __device__ __forceinline__ QEM& QEM::operator+=(const QEM& o) {
#pragma unroll
for (int i = 0; i < 10; ++i) e[i] += o.e[i];
return *this;
}


__device__ __forceinline__ QEM QEM::operator-(const QEM& o) const {
__host__ __device__ __forceinline__ QEM QEM::operator-(const QEM& o) const {
QEM res;
#pragma unroll
for (int i = 0; i < 10; ++i) res.e[i] = e[i] - o.e[i];
return res;
}


__device__ __forceinline__ QEM& QEM::operator-=(const QEM& o) {
__host__ __device__ __forceinline__ QEM& QEM::operator-=(const QEM& o) {
#pragma unroll
for (int i = 0; i < 10; ++i) e[i] -= o.e[i];
return *this;
}

__device__ __forceinline__ void QEM::zero() {
__host__ __device__ __forceinline__ void QEM::zero() {
#pragma unroll
for (int i = 0; i < 10; ++i) e[i] = 0.0f;
}


// Add plane p = (a,b,c,d) as outer product p * p^T
__device__ __forceinline__ void QEM::add_plane(float4 p) {
__host__ __device__ __forceinline__ void QEM::add_plane(float4 p) {
// upper triangle indices mapping:
// (0,0)->e[0]
// (0,1)->e[1]
Expand All @@ -237,7 +237,7 @@ __device__ __forceinline__ void QEM::add_plane(float4 p) {


// Evaluate v^T * Q * v for v = (x,y,z,1)
__device__ __forceinline__ float QEM::evaluate(const Vec3f& p) const {
__host__ __device__ __forceinline__ float QEM::evaluate(const Vec3f& p) const {
// compute v = [x,y,z,1]
float x = p.x, y = p.y, z = p.z, w = 1.0f;
// expand symmetric multiplication using stored upper triangular
Expand Down Expand Up @@ -271,7 +271,7 @@ __device__ __forceinline__ float QEM::evaluate(const Vec3f& p) const {
// Solve the linear system: A * [x y z]^T = -b, where
// A = top-left 3x3 of Q, b = [e03, e13, e23] (note signs)
// Return true if solved (matrix invertible), false otherwise. err returns the error at the solution.
__device__ __forceinline__ bool QEM::solve_optimal(float3 &out, float &err) const {
__host__ __device__ __forceinline__ bool QEM::solve_optimal(float3 &out, float &err) const {
// Build A (symmetric)
float A00 = e[0];
float A01 = e[1];
Expand Down