From f1fdff1b40170231a31ddce7f1357eeb070b5066 Mon Sep 17 00:00:00 2001 From: Matin Ghavami Date: Tue, 18 Nov 2025 13:26:57 -0500 Subject: [PATCH 01/19] initial implementation of geometry methods --- genmetaballs/src/cuda/core/geometry.cu | 34 +++++++++-- genmetaballs/src/cuda/core/geometry.cuh | 80 +++++++++++++++++++++---- 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/genmetaballs/src/cuda/core/geometry.cu b/genmetaballs/src/cuda/core/geometry.cu index 9e5c886..f78d317 100644 --- a/genmetaballs/src/cuda/core/geometry.cu +++ b/genmetaballs/src/cuda/core/geometry.cu @@ -1,9 +1,35 @@ #include "geometry.cuh" -Vec3D operator+(const Vec3D a, const Vec3D b) { - return {a.x + b.x, a.y + b.y, a.z + b.z}; + +__host__ __device__ Vec3D Rotation::apply(const Vec3D vec) const +{ + // v' = q * v * q^(-1) for unit quaternions + // where q^(-1) = (-x, -y, -z, w) + Vec3D q = {unit_quat_.x, unit_quat_.y, unit_quat_.z}; + float w = unit_quat_.w; + + // v' = 2*(q·v)*q + (w²-|q|²)*v + 2*w*(q×v) + float d = dot(q, vec); + Vec3D c = cross(q, vec); + + return 2.0f * d * q + (w * w - dot(q, q)) * vec + 2.0f * w * c; +} + +__host__ __device__ Rotation Rotation::compose(const Rotation& rot) const +{ + // Quaternion multiplication: q1 * q2 + float4 q1 = unit_quat_; + float4 q2 = rot.unit_quat_; + + return Rotation{ + {q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y, + q1.w * q2.y - q1.x * q2.z + q1.y * q2.w + q1.z * q2.x, + q1.w * q2.z + q1.x * q2.y - q1.y * q2.x + q1.z * q2.w, + q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z}}; } -Vec3D operator-(const Vec3D a, const Vec3D b) { - return {a.x - b.x, a.y - b.y, a.z - b.z}; +__host__ __device__ Rotation Rotation::inv() const +{ + // For unit quaternions, inverse = conjugate: (-x, -y, -z, w) + return Rotation{{-unit_quat_.x, -unit_quat_.y, -unit_quat_.z, unit_quat_.w}}; } diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index e1f6334..a3eee34 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -4,28 +4,86 @@ using Vec3D = float3; -Vec3D operator+(const Vec3D a, const Vec3D b); -Vec3D operator-(const Vec3D a, const Vec3D b); +inline Vec3D operator-(const Vec3D a) +{ + return {-a.x, -a.y, -a.z}; +} -class Rotation { +inline Vec3D operator+(const Vec3D a, const Vec3D b) +{ + return {a.x + b.x, a.y + b.y, a.z + b.z}; +} + +inline Vec3D operator-(const Vec3D a, const Vec3D b) +{ + return {a.x - b.x, a.y - b.y, a.z - b.z}; +} + +inline Vec3D operator*(const float scalar, const Vec3D a) +{ + return {a.x * scalar, a.y * scalar, a.z * scalar}; +} + +inline Vec3D operator*(const Vec3D a, const float scalar) +{ + return {a.x * scalar, a.y * scalar, a.z * scalar}; +} + +inline Vec3D operator/(const Vec3D a, const float scalar) +{ + return {a.x / scalar, a.y / scalar, a.z / scalar}; +} + +inline float dot(const Vec3D a, const Vec3D b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +inline Vec3D cross(const Vec3D a, const Vec3D b) +{ + return {a.y * b.z - a.z * b.y, + a.z * b.x - a.x * b.z, + a.x * b.y - a.y * b.x}; +} +class Rotation { private: - // ... - float rotmat_[9]; + float4 unit_quat_; public: - Vec3D apply(const Vec3D vec) const; - Rotation compose(const Rotation& rot) const; - Rotation inv() const; + Rotation() unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; + + __host__ __device__ Vec3D apply(const Vec3D vec) const; + + __host__ __device__ Rotation compose(const Rotation& rot) const; + + __host__ __device__ Rotation inv() const; }; struct Pose { Rotation rot; Vec3D tran; - Vec3D apply(const Vec3D vec) const; - Pose compose(const Pose& pose) const; - Pose inv() const; + __host__ __device__ inline Vec3D apply(const Vec3D vec) const + { + return tran + rot.apply(vec); + } + + __host__ __device__ inline Pose compose(const Pose &pose) const + { + /* + * If $A_i$ is the matrix corresponding to pose object `p_i`, then + * $A_1A_2$ is the matrix corresponding to the pose object + * `p_1.compose(p2)`. + */ + return {rot.compose(pose.rot), rot.apply(pose.tran) + tran}; + } + + __host__ __device__ inline Pose inv() const + { + auto rotinv = rot.inv(); + return {rotinv, -rotinv.apply(tran)}; + } }; struct Ray { From f143eb9419eee2b2cb6353069f6fc87d1c718276 Mon Sep 17 00:00:00 2001 From: Matin Ghavami Date: Tue, 18 Nov 2025 16:07:56 -0500 Subject: [PATCH 02/19] add bindings --- genmetaballs/src/cuda/bindings.cu | 63 ++++++++++++++++++++----- genmetaballs/src/cuda/core/geometry.cuh | 30 ++++++------ genmetaballs/src/cuda/core/utils.cuh | 2 +- tests/python_tests/test_geometry.py | 10 ++-- 4 files changed, 73 insertions(+), 32 deletions(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index f75a322..b74f891 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -20,19 +20,54 @@ NB_MODULE(_genmetaballs_bindings, m) { m.def("gpu_add", &gpu_add, "Add two lists elementwise on the GPU", nb::arg("a"), nb::arg("b")); - // exposing Vec3D - nb::class_(m, "Vec3D") + /* + * Geometry module bindings + */ + + nb::module_ geometry = m.def_submodule("geometry", "Geometry helpers for GenMetaballs"); + + nb::class_(geometry, "Vec3D") .def(nb::init<>()) .def(nb::init()) - .def_rw("x", &Vec3D::x) - .def_rw("y", &Vec3D::y) - .def_rw("z", &Vec3D::z) - .def(nb::self + nb::self) - .def(nb::self - nb::self) - .def("__repr__", - [](const Vec3D& v) { return nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z); }); - - // confidence submodule + .def_ro("x", &Vec3D::x) + .def_ro("y", &Vec3D::y) + .def_ro("z", &Vec3D::z) + .def("__add__", static_cast(&operator+)) + .def("__sub__", static_cast(&operator-)) + .def("__neg__", static_cast(&operator-)) + .def("__mul__", static_cast(&operator*)) + .def("__rmul__", static_cast(&operator*)) + .def("__truediv__", static_cast(&operator/)) + .def("__repr__", [](const Vec3D& v) { + return nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z); + }); + + geometry.def("dot", &dot, "Dot product of two `Vec3D`s", nb::arg("a"), nb::arg("b")); + geometry.def("cross", &cross, "Cross product of two `Vec3D`s", nb::arg("a"), nb::arg("b")); + + nb::class_(geometry, "Rotation") + .def(nb::init<>()) + .def("apply", &Rotation::apply, "Apply rotation to vector", nb::arg("vec")) + .def("compose", &Rotation::compose, "Compose with another rotation", nb::arg("rot")) + .def("inv", &Rotation::inv, "Inverse rotation"); + + nb::class_(geometry, "Pose") + .def(nb::init<>()) + .def_rw("rot", &Pose::rot) + .def_rw("tran", &Pose::tran) + .def("apply", &Pose::apply, "Apply pose to vector", nb::arg("vec")) + .def("compose", &Pose::compose, "Compose with another pose", nb::arg("pose")) + .def("inv", &Pose::inv, "Inverse pose"); + + nb::class_(geometry, "Ray") + .def(nb::init<>()) + .def_rw("start", &Ray::start) + .def_rw("direction", &Ray::direction); + + /* + * Confidence module bindings + */ + nb::module_ confidence = m.def_submodule("confidence"); nb::class_(confidence, "ZeroParameterConfidence") .def(nb::init<>()) @@ -42,8 +77,12 @@ NB_MODULE(_genmetaballs_bindings, m) { .def(nb::init()) .def("get_confidence", &TwoParameterConfidence::get_confidence); - // utils submodule + /* + * Utils module bindings + */ + nb::module_ utils = m.def_submodule("utils"); utils.def("sigmoid", sigmoid, nb::arg("x"), "Compute the sigmoid function: 1 / (1 + exp(-x))"); } // NB_MODULE(_genmetaballs_bindings) + diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index a3eee34..b890956 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -2,44 +2,46 @@ #include +#include "utils.cuh" + using Vec3D = float3; -inline Vec3D operator-(const Vec3D a) +CUDA_CALLABLE inline Vec3D operator-(const Vec3D a) { return {-a.x, -a.y, -a.z}; } -inline Vec3D operator+(const Vec3D a, const Vec3D b) +CUDA_CALLABLE inline Vec3D operator+(const Vec3D a, const Vec3D b) { return {a.x + b.x, a.y + b.y, a.z + b.z}; } -inline Vec3D operator-(const Vec3D a, const Vec3D b) +CUDA_CALLABLE inline Vec3D operator-(const Vec3D a, const Vec3D b) { return {a.x - b.x, a.y - b.y, a.z - b.z}; } -inline Vec3D operator*(const float scalar, const Vec3D a) +CUDA_CALLABLE inline Vec3D operator*(const float scalar, const Vec3D a) { return {a.x * scalar, a.y * scalar, a.z * scalar}; } -inline Vec3D operator*(const Vec3D a, const float scalar) +CUDA_CALLABLE inline Vec3D operator*(const Vec3D a, const float scalar) { return {a.x * scalar, a.y * scalar, a.z * scalar}; } -inline Vec3D operator/(const Vec3D a, const float scalar) +CUDA_CALLABLE inline Vec3D operator/(const Vec3D a, const float scalar) { return {a.x / scalar, a.y / scalar, a.z / scalar}; } -inline float dot(const Vec3D a, const Vec3D b) +CUDA_CALLABLE inline float dot(const Vec3D a, const Vec3D b) { return a.x * b.x + a.y * b.y + a.z * b.z; } -inline Vec3D cross(const Vec3D a, const Vec3D b) +CUDA_CALLABLE inline Vec3D cross(const Vec3D a, const Vec3D b) { return {a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, @@ -53,23 +55,23 @@ private: public: Rotation() unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; - __host__ __device__ Vec3D apply(const Vec3D vec) const; + CUDA_CALLABLE Vec3D apply(const Vec3D vec) const; - __host__ __device__ Rotation compose(const Rotation& rot) const; + CUDA_CALLABLE Rotation compose(const Rotation& rot) const; - __host__ __device__ Rotation inv() const; + CUDA_CALLABLE Rotation inv() const; }; struct Pose { Rotation rot; Vec3D tran; - __host__ __device__ inline Vec3D apply(const Vec3D vec) const + CUDA_CALLABLE inline Vec3D apply(const Vec3D vec) const { return tran + rot.apply(vec); } - __host__ __device__ inline Pose compose(const Pose &pose) const + CUDA_CALLABLE inline Pose compose(const Pose &pose) const { /* * If $A_i$ is the matrix corresponding to pose object `p_i`, then @@ -79,7 +81,7 @@ struct Pose { return {rot.compose(pose.rot), rot.apply(pose.tran) + tran}; } - __host__ __device__ inline Pose inv() const + CUDA_CALLABLE inline Pose inv() const { auto rotinv = rot.inv(); return {rotinv, -rotinv.apply(tran)}; diff --git a/genmetaballs/src/cuda/core/utils.cuh b/genmetaballs/src/cuda/core/utils.cuh index 1dcdf36..060bffd 100644 --- a/genmetaballs/src/cuda/core/utils.cuh +++ b/genmetaballs/src/cuda/core/utils.cuh @@ -55,4 +55,4 @@ public: __host__ __device__ constexpr auto size() const noexcept { return data_view_.size(); } -}; +}; // class Array2D diff --git a/tests/python_tests/test_geometry.py b/tests/python_tests/test_geometry.py index 6403ef2..40dc48e 100644 --- a/tests/python_tests/test_geometry.py +++ b/tests/python_tests/test_geometry.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from genmetaballs import _genmetaballs_bindings as _gmbb +from genmetaballs import _genmetaballs_bindings.geometry as geometry @pytest.fixture @@ -10,11 +10,11 @@ def rng() -> np.random.Generator: def test_vec3d_smoke() -> None: - _gmbb.Vec3D(0, 0, 0) + geometry.Vec3D(0, 0, 0) def test_vec3d_repr_returns_valid_string() -> None: - v = _gmbb.Vec3D(1.0, 2.0, 3.0) + v = geometry.Vec3D(1.0, 2.0, 3.0) repr_str = repr(v) assert isinstance(repr_str, str) assert repr_str == "Vec3D(1.0, 2.0, 3.0)" @@ -22,7 +22,7 @@ def test_vec3d_repr_returns_valid_string() -> None: def test_vec3d_add(rng: np.random.Generator) -> None: _a, _b = rng.uniform(size=3), rng.uniform(size=3) - a, b = _gmbb.Vec3D(*_a), _gmbb.Vec3D(*_b) + a, b = geometry.Vec3D(*_a), geometry.Vec3D(*_b) c = a + b _c = _a + _b assert all([np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])]) @@ -30,7 +30,7 @@ def test_vec3d_add(rng: np.random.Generator) -> None: def test_vec3d_sub(rng: np.random.Generator) -> None: _a, _b = rng.uniform(size=3), rng.uniform(size=3) - a, b = _gmbb.Vec3D(*_a), _gmbb.Vec3D(*_b) + a, b = geometry.Vec3D(*_a), geometry.Vec3D(*_b) c = a - b _c = _a - _b assert all([np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])]) From 827b5b1ae1ddbf06a13eab5840239e9426cd2fab Mon Sep 17 00:00:00 2001 From: mugamma Date: Wed, 19 Nov 2025 01:52:09 +0000 Subject: [PATCH 03/19] build working --- genmetaballs/src/cuda/core/geometry.cu | 14 +++++++++++--- genmetaballs/src/cuda/core/geometry.cuh | 6 +++++- tests/python_tests/test_geometry.py | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/genmetaballs/src/cuda/core/geometry.cu b/genmetaballs/src/cuda/core/geometry.cu index f78d317..35d4e50 100644 --- a/genmetaballs/src/cuda/core/geometry.cu +++ b/genmetaballs/src/cuda/core/geometry.cu @@ -1,7 +1,15 @@ +#include + #include "geometry.cuh" +CUDA_CALLABLE Rotation Rotation::from_quat(float x, float y, float z, float w) +{ + auto modulus = std::sqrt(x*x + y*y + z*z + w*w); + return Rotation{{x/modulus, y/modulus, z/modulus, w/modulus}}; +} + -__host__ __device__ Vec3D Rotation::apply(const Vec3D vec) const +CUDA_CALLABLE Vec3D Rotation::apply(const Vec3D vec) const { // v' = q * v * q^(-1) for unit quaternions // where q^(-1) = (-x, -y, -z, w) @@ -15,7 +23,7 @@ __host__ __device__ Vec3D Rotation::apply(const Vec3D vec) const return 2.0f * d * q + (w * w - dot(q, q)) * vec + 2.0f * w * c; } -__host__ __device__ Rotation Rotation::compose(const Rotation& rot) const +CUDA_CALLABLE Rotation Rotation::compose(const Rotation& rot) const { // Quaternion multiplication: q1 * q2 float4 q1 = unit_quat_; @@ -28,7 +36,7 @@ __host__ __device__ Rotation Rotation::compose(const Rotation& rot) const q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z}}; } -__host__ __device__ Rotation Rotation::inv() const +CUDA_CALLABLE Rotation Rotation::inv() const { // For unit quaternions, inverse = conjugate: (-x, -y, -z, w) return Rotation{{-unit_quat_.x, -unit_quat_.y, -unit_quat_.z, unit_quat_.w}}; diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index b890956..d3dd53f 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -52,8 +52,12 @@ class Rotation { private: float4 unit_quat_; + Rotation(float4 unit_quat): unit_quat_{unit_quat} {}; + public: - Rotation() unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; + Rotation(): unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; + + static Rotation from_quat(float x, float y, float z, float w); CUDA_CALLABLE Vec3D apply(const Vec3D vec) const; diff --git a/tests/python_tests/test_geometry.py b/tests/python_tests/test_geometry.py index 40dc48e..33a1acd 100644 --- a/tests/python_tests/test_geometry.py +++ b/tests/python_tests/test_geometry.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from genmetaballs import _genmetaballs_bindings.geometry as geometry +from genmetaballs._genmetaballs_bindings import geometry as geometry @pytest.fixture From e5876f6c1a882c2edd31c1825ff0695962eaa703 Mon Sep 17 00:00:00 2001 From: mugamma Date: Wed, 19 Nov 2025 02:39:50 +0000 Subject: [PATCH 04/19] add basic rotation tests --- genmetaballs/src/cuda/bindings.cu | 2 + tests/python_tests/test_geometry.py | 88 ++++++++++++++++++++++++++--- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index b74f891..73f636e 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -47,6 +47,8 @@ NB_MODULE(_genmetaballs_bindings, m) { nb::class_(geometry, "Rotation") .def(nb::init<>()) + .def_static("from_quat", &Rotation::from_quat, "Create rotation from quaternion", + nb::arg("x"), nb::arg("y"), nb::arg("z"), nb::arg("w")) .def("apply", &Rotation::apply, "Apply rotation to vector", nb::arg("vec")) .def("compose", &Rotation::compose, "Compose with another rotation", nb::arg("rot")) .def("inv", &Rotation::inv, "Inverse rotation"); diff --git a/tests/python_tests/test_geometry.py b/tests/python_tests/test_geometry.py index 33a1acd..7fbd4bb 100644 --- a/tests/python_tests/test_geometry.py +++ b/tests/python_tests/test_geometry.py @@ -1,5 +1,8 @@ +import operator as op + import numpy as np import pytest +from scipy.spatial.transform import Rotation as Rot from genmetaballs._genmetaballs_bindings import geometry as geometry @@ -20,17 +23,84 @@ def test_vec3d_repr_returns_valid_string() -> None: assert repr_str == "Vec3D(1.0, 2.0, 3.0)" -def test_vec3d_add(rng: np.random.Generator) -> None: +@pytest.mark.parametrize( + "np_op,vec3d_op", + [ + (np.add, op.add), + (np.subtract, op.sub), + (np.cross, geometry.cross), + ] + ) +def test_vec3d_ops(rng: np.random.Generator, np_op, vec3d_op) -> None: _a, _b = rng.uniform(size=3), rng.uniform(size=3) a, b = geometry.Vec3D(*_a), geometry.Vec3D(*_b) - c = a + b - _c = _a + _b - assert all([np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])]) - + c = vec3d_op(a, b) + _c = np_op(_a, _b) + assert np.allclose(_c, np.array([c.x, c.y, c.z])) -def test_vec3d_sub(rng: np.random.Generator) -> None: +def test_vec3d_dot(rng: np.random.Generator) -> None: _a, _b = rng.uniform(size=3), rng.uniform(size=3) a, b = geometry.Vec3D(*_a), geometry.Vec3D(*_b) - c = a - b - _c = _a - _b - assert all([np.isclose(c.x, _c[0]), np.isclose(c.y, _c[1]), np.isclose(c.z, _c[2])]) + assert np.allclose(np.dot(_a, _b), geometry.dot(a, b)) + +def test_rotation_apply(rng: np.random.Generator) -> None: + quats = rng.uniform(-1, 1, size=(100, 4)) + quats /= np.linalg.norm(quats, axis=1, keepdims=True) + vecs = rng.uniform(size=(100, 3)) + + for i in range(100): + rot_scipy = Rot.from_quat(quats[i]) + rot_geom = geometry.Rotation.from_quat(*quats[i]) + vec_geom = geometry.Vec3D(*vecs[i]) + + result_scipy = rot_scipy.apply(vecs[i]) + result_geom = rot_geom.apply(vec_geom) + + assert np.allclose(result_scipy, np.array([result_geom.x, result_geom.y, result_geom.z])) + +def test_rotation_compose(rng: np.random.Generator) -> None: + quats1 = rng.uniform(-1, 1, size=(100, 4)) + quats1 /= np.linalg.norm(quats1, axis=1, keepdims=True) + quats2 = rng.uniform(-1, 1, size=(100, 4)) + quats2 /= np.linalg.norm(quats2, axis=1, keepdims=True) + vecs = rng.uniform(size=(50, 3)) + + for i in range(100): + rot1_scipy = Rot.from_quat(quats1[i]) + rot2_scipy = Rot.from_quat(quats2[i]) + composed_scipy = rot1_scipy * rot2_scipy + + rot1_geom = geometry.Rotation.from_quat(*quats1[i]) + rot2_geom = geometry.Rotation.from_quat(*quats2[i]) + composed_geom = rot1_geom.compose(rot2_geom) + + for j in range(50): + vec_geom = geometry.Vec3D(*vecs[j]) + result_scipy = composed_scipy.apply(vecs[j]) + result_geom = composed_geom.apply(vec_geom) + assert np.allclose( + result_scipy, + np.array([result_geom.x, result_geom.y, result_geom.z]), + rtol=1e-5, + atol=1e-6, + ) + +def test_rotation_inv(rng: np.random.Generator) -> None: + quats = rng.uniform(-1, 1, size=(100, 4)) + quats /= np.linalg.norm(quats, axis=1, keepdims=True) + vecs = rng.uniform(size=(50, 3)) + + for i in range(100): + rot = geometry.Rotation.from_quat(*quats[i]) + rotinv = rot.inv() + composed = rot.compose(rotinv) + + for j in range(50): + vec = geometry.Vec3D(*vecs[j]) + vec_ = composed.apply(vec) + assert np.allclose( + np.array([vec.x, vec.y, vec.z]), + np.array([vec_.x, vec_.y, vec_.z]), + rtol=1e-5, + atol=1e-6, + ) From b6de85f0aeeaf09586ebbdf690c512de3c4d777c Mon Sep 17 00:00:00 2001 From: mugamma Date: Thu, 20 Nov 2025 15:56:32 +0000 Subject: [PATCH 05/19] add proper constructors for pose --- genmetaballs/src/cuda/bindings.cu | 7 +++- genmetaballs/src/cuda/core/geometry.cuh | 49 ++++++++++++++++++------- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index 73f636e..c999364 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -55,8 +55,11 @@ NB_MODULE(_genmetaballs_bindings, m) { nb::class_(geometry, "Pose") .def(nb::init<>()) - .def_rw("rot", &Pose::rot) - .def_rw("tran", &Pose::tran) + .def_static("from_components", &Pose::from_components, + "Create rotation from a rotation and a translation", + nb::arg("rot"), nb::arg("tran")) + .def_prop_ro("rot", &Pose::get_rot, "get the rotation component") + .def_prop_ro("tran", &Pose::get_tran, "get the translation component") .def("apply", &Pose::apply, "Apply pose to vector", nb::arg("vec")) .def("compose", &Pose::compose, "Compose with another pose", nb::arg("pose")) .def("inv", &Pose::inv, "Inverse pose"); diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index d3dd53f..464d602 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -52,12 +52,12 @@ class Rotation { private: float4 unit_quat_; - Rotation(float4 unit_quat): unit_quat_{unit_quat} {}; + CUDA_CALLABLE Rotation(float4 unit_quat): unit_quat_{unit_quat} {}; public: - Rotation(): unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; + CUDA_CALLABLE Rotation(): unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; - static Rotation from_quat(float x, float y, float z, float w); + static CUDA_CALLABLE Rotation from_quat(float x, float y, float z, float w); CUDA_CALLABLE Vec3D apply(const Vec3D vec) const; @@ -66,29 +66,52 @@ public: CUDA_CALLABLE Rotation inv() const; }; -struct Pose { - Rotation rot; - Vec3D tran; +class Pose { +private: + Rotation rot_; + Vec3D tran_; + + CUDA_CALLABLE Pose(const Rotation rot, const Vec3D tran): rot_{rot}, tran_{tran} {} + +public: + //these member functions are defined in class body to allow for possible inlining + + CUDA_CALLABLE Pose(): rot_{Rotation()}, tran_{0.0f, 0.0f, 0.0f} {} + + static CUDA_CALLABLE Pose from_components(const Rotation rot, const Vec3D tran) + { + return {rot, tran}; + } + + CUDA_CALLABLE Rotation get_rot() const + { + return rot_; + } + + CUDA_CALLABLE Vec3D get_tran() const + { + return tran_; + } - CUDA_CALLABLE inline Vec3D apply(const Vec3D vec) const + CUDA_CALLABLE Vec3D apply(const Vec3D vec) const { - return tran + rot.apply(vec); + return tran_ + rot_.apply(vec); } - CUDA_CALLABLE inline Pose compose(const Pose &pose) const + CUDA_CALLABLE Pose compose(const Pose &pose) const { /* * If $A_i$ is the matrix corresponding to pose object `p_i`, then * $A_1A_2$ is the matrix corresponding to the pose object * `p_1.compose(p2)`. */ - return {rot.compose(pose.rot), rot.apply(pose.tran) + tran}; + return {rot_.compose(pose.rot_), rot_.apply(pose.tran_) + tran_}; } - CUDA_CALLABLE inline Pose inv() const + CUDA_CALLABLE Pose inv() const { - auto rotinv = rot.inv(); - return {rotinv, -rotinv.apply(tran)}; + auto rotinv = rot_.inv(); + return {rotinv, -rotinv.apply(tran_)}; } }; From 17ff36c088eb15f4db8b91029ad7e608d2661142 Mon Sep 17 00:00:00 2001 From: mugamma Date: Thu, 20 Nov 2025 16:00:49 +0000 Subject: [PATCH 06/19] add pose correctness tests --- tests/python_tests/test_geometry.py | 92 ++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 8 deletions(-) diff --git a/tests/python_tests/test_geometry.py b/tests/python_tests/test_geometry.py index 7fbd4bb..a1c9161 100644 --- a/tests/python_tests/test_geometry.py +++ b/tests/python_tests/test_geometry.py @@ -3,6 +3,7 @@ import numpy as np import pytest from scipy.spatial.transform import Rotation as Rot +from scipy.spatial.transform import RigidTransform as Rigid from genmetaballs._genmetaballs_bindings import geometry as geometry @@ -32,16 +33,18 @@ def test_vec3d_repr_returns_valid_string() -> None: ] ) def test_vec3d_ops(rng: np.random.Generator, np_op, vec3d_op) -> None: - _a, _b = rng.uniform(size=3), rng.uniform(size=3) - a, b = geometry.Vec3D(*_a), geometry.Vec3D(*_b) - c = vec3d_op(a, b) - _c = np_op(_a, _b) - assert np.allclose(_c, np.array([c.x, c.y, c.z])) + _a, _b = rng.uniform(size=(100, 3)), rng.uniform(size=(100, 3)) + for i in range(100): + a, b = geometry.Vec3D(*_a[i]), geometry.Vec3D(*_b[i]) + c = vec3d_op(a, b) + _c = np_op(_a[i], _b[i]) + assert np.allclose(_c, np.array([c.x, c.y, c.z])) def test_vec3d_dot(rng: np.random.Generator) -> None: - _a, _b = rng.uniform(size=3), rng.uniform(size=3) - a, b = geometry.Vec3D(*_a), geometry.Vec3D(*_b) - assert np.allclose(np.dot(_a, _b), geometry.dot(a, b)) + _a, _b = rng.uniform(size=(100, 3)), rng.uniform(size=(100, 3)) + for i in range(100): + a, b = geometry.Vec3D(*_a[i]), geometry.Vec3D(*_b[i]) + assert np.allclose(np.dot(_a[i], _b[i]), geometry.dot(a, b)) def test_rotation_apply(rng: np.random.Generator) -> None: quats = rng.uniform(-1, 1, size=(100, 4)) @@ -104,3 +107,76 @@ def test_rotation_inv(rng: np.random.Generator) -> None: rtol=1e-5, atol=1e-6, ) + +def test_pose_apply(rng: np.random.Generator) -> None: + exp_coords = rng.uniform(size=(100, 6)) + vecs = rng.uniform(size=(100, 3)) + + for i in range(100): + pose_scipy = Rigid.from_exp_coords(exp_coords[i]) + + pose_geom = geometry.Pose.from_components( + rot=geometry.Rotation.from_quat(*pose_scipy.rotation.as_quat()), + tran=geometry.Vec3D(*pose_scipy.translation), + ) + vec_geom = geometry.Vec3D(*vecs[i]) + + result_scipy = pose_scipy.apply(vecs[i]) + result_geom = pose_geom.apply(vec_geom) + + assert np.allclose(result_scipy, np.array([result_geom.x, result_geom.y, result_geom.z])) + +def test_pose_compose(rng: np.random.Generator) -> None: + exp_coords1 = rng.uniform(size=(100, 6)) + exp_coords2 = rng.uniform(size=(100, 6)) + vecs = rng.uniform(size=(50, 3)) + + for i in range(100): + pose1_scipy = Rigid.from_exp_coords(exp_coords1[i]) + pose2_scipy = Rigid.from_exp_coords(exp_coords2[i]) + composed_scipy = pose1_scipy * pose2_scipy + + pose1_geom = geometry.Pose.from_components( + rot=geometry.Rotation.from_quat(*pose1_scipy.rotation.as_quat()), + tran=geometry.Vec3D(*pose1_scipy.translation), + ) + pose2_geom = geometry.Pose.from_components( + rot=geometry.Rotation.from_quat(*pose2_scipy.rotation.as_quat()), + tran=geometry.Vec3D(*pose2_scipy.translation), + ) + composed_geom = pose1_geom.compose(pose2_geom) + + for j in range(50): + vec_geom = geometry.Vec3D(*vecs[j]) + result_scipy = composed_scipy.apply(vecs[j]) + result_geom = composed_geom.apply(vec_geom) + assert np.allclose( + result_scipy, + np.array([result_geom.x, result_geom.y, result_geom.z]), + rtol=1e-5, + atol=1e-6, + ) + +def test_pose_inv(rng: np.random.Generator) -> None: + exp_coords = rng.uniform(size=(100, 6)) + vecs = rng.uniform(size=(50, 3)) + + for i in range(100): + pose_scipy = Rigid.from_exp_coords(exp_coords[i]) + + pose = geometry.Pose.from_components( + rot=geometry.Rotation.from_quat(*pose_scipy.rotation.as_quat()), + tran=geometry.Vec3D(*pose_scipy.translation), + ) + poseinv = pose.inv() + composed = pose.compose(poseinv) + + for j in range(50): + vec = geometry.Vec3D(*vecs[j]) + vec_ = composed.apply(vec) + assert np.allclose( + np.array([vec.x, vec.y, vec.z]), + np.array([vec_.x, vec_.y, vec_.z]), + rtol=1e-5, + atol=1e-6, + ) From 8fb9bcd1b8cd30e6017f4869424fc1d095f92a55 Mon Sep 17 00:00:00 2001 From: mugamma Date: Thu, 20 Nov 2025 18:12:55 +0000 Subject: [PATCH 07/19] format and lint --- .clang-tidy | 1 + genmetaballs/src/cuda/bindings.cu | 24 +++++----- genmetaballs/src/cuda/core/geometry.cu | 28 +++++------- genmetaballs/src/cuda/core/geometry.cuh | 58 +++++++++---------------- tests/python_tests/test_geometry.py | 53 ++++++++++++---------- 5 files changed, 75 insertions(+), 89 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index db4c130..1ad12ab 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -19,6 +19,7 @@ Checks: > -readability-avoid-const-params-in-decls, -readability-braces-around-statements, -readability-isolate-declaration, + -readability-math-missing-parentheses, -cppcoreguidelines-avoid-magic-numbers, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, -cppcoreguidelines-pro-bounds-pointer-arithmetic, diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index c999364..df3e2e1 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -23,7 +23,7 @@ NB_MODULE(_genmetaballs_bindings, m) { /* * Geometry module bindings */ - + nb::module_ geometry = m.def_submodule("geometry", "Geometry helpers for GenMetaballs"); nb::class_(geometry, "Vec3D") @@ -32,15 +32,14 @@ NB_MODULE(_genmetaballs_bindings, m) { .def_ro("x", &Vec3D::x) .def_ro("y", &Vec3D::y) .def_ro("z", &Vec3D::z) - .def("__add__", static_cast(&operator+)) - .def("__sub__", static_cast(&operator-)) - .def("__neg__", static_cast(&operator-)) - .def("__mul__", static_cast(&operator*)) - .def("__rmul__", static_cast(&operator*)) - .def("__truediv__", static_cast(&operator/)) - .def("__repr__", [](const Vec3D& v) { - return nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z); - }); + .def("__add__", static_cast(&operator+)) + .def("__sub__", static_cast(&operator-)) + .def("__neg__", static_cast(&operator-)) + .def("__mul__", static_cast(&operator*)) + .def("__rmul__", static_cast(&operator*)) + .def("__truediv__", static_cast(&operator/)) + .def("__repr__", + [](const Vec3D& v) { return nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z); }); geometry.def("dot", &dot, "Dot product of two `Vec3D`s", nb::arg("a"), nb::arg("b")); geometry.def("cross", &cross, "Cross product of two `Vec3D`s", nb::arg("a"), nb::arg("b")); @@ -56,8 +55,8 @@ NB_MODULE(_genmetaballs_bindings, m) { nb::class_(geometry, "Pose") .def(nb::init<>()) .def_static("from_components", &Pose::from_components, - "Create rotation from a rotation and a translation", - nb::arg("rot"), nb::arg("tran")) + "Create rotation from a rotation and a translation", nb::arg("rot"), + nb::arg("tran")) .def_prop_ro("rot", &Pose::get_rot, "get the rotation component") .def_prop_ro("tran", &Pose::get_tran, "get the translation component") .def("apply", &Pose::apply, "Apply pose to vector", nb::arg("vec")) @@ -90,4 +89,3 @@ NB_MODULE(_genmetaballs_bindings, m) { utils.def("sigmoid", sigmoid, nb::arg("x"), "Compute the sigmoid function: 1 / (1 + exp(-x))"); } // NB_MODULE(_genmetaballs_bindings) - diff --git a/genmetaballs/src/cuda/core/geometry.cu b/genmetaballs/src/cuda/core/geometry.cu index 35d4e50..c645bdf 100644 --- a/genmetaballs/src/cuda/core/geometry.cu +++ b/genmetaballs/src/cuda/core/geometry.cu @@ -2,15 +2,13 @@ #include "geometry.cuh" -CUDA_CALLABLE Rotation Rotation::from_quat(float x, float y, float z, float w) -{ - auto modulus = std::sqrt(x*x + y*y + z*z + w*w); - return Rotation{{x/modulus, y/modulus, z/modulus, w/modulus}}; +// NOLINTNEXTLINE(readability-convert-member-functions-to-static) +CUDA_CALLABLE Rotation Rotation::from_quat(float x, float y, float z, float w) { + auto modulus = std::sqrt(x * x + y * y + z * z + w * w); + return Rotation{{x / modulus, y / modulus, z / modulus, w / modulus}}; } - -CUDA_CALLABLE Vec3D Rotation::apply(const Vec3D vec) const -{ +CUDA_CALLABLE Vec3D Rotation::apply(const Vec3D vec) const { // v' = q * v * q^(-1) for unit quaternions // where q^(-1) = (-x, -y, -z, w) Vec3D q = {unit_quat_.x, unit_quat_.y, unit_quat_.z}; @@ -23,21 +21,19 @@ CUDA_CALLABLE Vec3D Rotation::apply(const Vec3D vec) const return 2.0f * d * q + (w * w - dot(q, q)) * vec + 2.0f * w * c; } -CUDA_CALLABLE Rotation Rotation::compose(const Rotation& rot) const -{ +// NOLINTNEXTLINE(readability-convert-member-functions-to-static) +CUDA_CALLABLE Rotation Rotation::compose(const Rotation& rot) const { // Quaternion multiplication: q1 * q2 float4 q1 = unit_quat_; float4 q2 = rot.unit_quat_; - return Rotation{ - {q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y, - q1.w * q2.y - q1.x * q2.z + q1.y * q2.w + q1.z * q2.x, - q1.w * q2.z + q1.x * q2.y - q1.y * q2.x + q1.z * q2.w, - q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z}}; + return Rotation{{q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y, + q1.w * q2.y - q1.x * q2.z + q1.y * q2.w + q1.z * q2.x, + q1.w * q2.z + q1.x * q2.y - q1.y * q2.x + q1.z * q2.w, + q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z}}; } -CUDA_CALLABLE Rotation Rotation::inv() const -{ +CUDA_CALLABLE Rotation Rotation::inv() const { // For unit quaternions, inverse = conjugate: (-x, -y, -z, w) return Rotation{{-unit_quat_.x, -unit_quat_.y, -unit_quat_.z, unit_quat_.w}}; } diff --git a/genmetaballs/src/cuda/core/geometry.cuh b/genmetaballs/src/cuda/core/geometry.cuh index 464d602..f64dbd2 100644 --- a/genmetaballs/src/cuda/core/geometry.cuh +++ b/genmetaballs/src/cuda/core/geometry.cuh @@ -6,56 +6,46 @@ using Vec3D = float3; -CUDA_CALLABLE inline Vec3D operator-(const Vec3D a) -{ +CUDA_CALLABLE inline Vec3D operator-(const Vec3D a) { return {-a.x, -a.y, -a.z}; } -CUDA_CALLABLE inline Vec3D operator+(const Vec3D a, const Vec3D b) -{ +CUDA_CALLABLE inline Vec3D operator+(const Vec3D a, const Vec3D b) { return {a.x + b.x, a.y + b.y, a.z + b.z}; } -CUDA_CALLABLE inline Vec3D operator-(const Vec3D a, const Vec3D b) -{ +CUDA_CALLABLE inline Vec3D operator-(const Vec3D a, const Vec3D b) { return {a.x - b.x, a.y - b.y, a.z - b.z}; } -CUDA_CALLABLE inline Vec3D operator*(const float scalar, const Vec3D a) -{ +CUDA_CALLABLE inline Vec3D operator*(const float scalar, const Vec3D a) { return {a.x * scalar, a.y * scalar, a.z * scalar}; } -CUDA_CALLABLE inline Vec3D operator*(const Vec3D a, const float scalar) -{ +CUDA_CALLABLE inline Vec3D operator*(const Vec3D a, const float scalar) { return {a.x * scalar, a.y * scalar, a.z * scalar}; } -CUDA_CALLABLE inline Vec3D operator/(const Vec3D a, const float scalar) -{ +CUDA_CALLABLE inline Vec3D operator/(const Vec3D a, const float scalar) { return {a.x / scalar, a.y / scalar, a.z / scalar}; } -CUDA_CALLABLE inline float dot(const Vec3D a, const Vec3D b) -{ +CUDA_CALLABLE inline float dot(const Vec3D a, const Vec3D b) { return a.x * b.x + a.y * b.y + a.z * b.z; } -CUDA_CALLABLE inline Vec3D cross(const Vec3D a, const Vec3D b) -{ - return {a.y * b.z - a.z * b.y, - a.z * b.x - a.x * b.z, - a.x * b.y - a.y * b.x}; +CUDA_CALLABLE inline Vec3D cross(const Vec3D a, const Vec3D b) { + return {a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x}; } class Rotation { private: float4 unit_quat_; - CUDA_CALLABLE Rotation(float4 unit_quat): unit_quat_{unit_quat} {}; + CUDA_CALLABLE Rotation(float4 unit_quat) : unit_quat_{unit_quat} {}; public: - CUDA_CALLABLE Rotation(): unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; + CUDA_CALLABLE Rotation() : unit_quat_{0.0f, 0.0f, 0.0f, 1.0f} {}; static CUDA_CALLABLE Rotation from_quat(float x, float y, float z, float w); @@ -71,35 +61,30 @@ private: Rotation rot_; Vec3D tran_; - CUDA_CALLABLE Pose(const Rotation rot, const Vec3D tran): rot_{rot}, tran_{tran} {} + CUDA_CALLABLE Pose(const Rotation rot, const Vec3D tran) : rot_{rot}, tran_{tran} {} public: - //these member functions are defined in class body to allow for possible inlining - - CUDA_CALLABLE Pose(): rot_{Rotation()}, tran_{0.0f, 0.0f, 0.0f} {} + // these member functions are defined in class body to allow for possible inlining - static CUDA_CALLABLE Pose from_components(const Rotation rot, const Vec3D tran) - { + CUDA_CALLABLE Pose() : rot_{Rotation()}, tran_{0.0f, 0.0f, 0.0f} {} + + static CUDA_CALLABLE Pose from_components(const Rotation rot, const Vec3D tran) { return {rot, tran}; } - CUDA_CALLABLE Rotation get_rot() const - { + CUDA_CALLABLE Rotation get_rot() const { return rot_; } - CUDA_CALLABLE Vec3D get_tran() const - { + CUDA_CALLABLE Vec3D get_tran() const { return tran_; } - CUDA_CALLABLE Vec3D apply(const Vec3D vec) const - { + CUDA_CALLABLE Vec3D apply(const Vec3D vec) const { return tran_ + rot_.apply(vec); } - CUDA_CALLABLE Pose compose(const Pose &pose) const - { + CUDA_CALLABLE Pose compose(const Pose& pose) const { /* * If $A_i$ is the matrix corresponding to pose object `p_i`, then * $A_1A_2$ is the matrix corresponding to the pose object @@ -108,8 +93,7 @@ public: return {rot_.compose(pose.rot_), rot_.apply(pose.tran_) + tran_}; } - CUDA_CALLABLE Pose inv() const - { + CUDA_CALLABLE Pose inv() const { auto rotinv = rot_.inv(); return {rotinv, -rotinv.apply(tran_)}; } diff --git a/tests/python_tests/test_geometry.py b/tests/python_tests/test_geometry.py index a1c9161..f2e32d0 100644 --- a/tests/python_tests/test_geometry.py +++ b/tests/python_tests/test_geometry.py @@ -2,8 +2,8 @@ import numpy as np import pytest -from scipy.spatial.transform import Rotation as Rot from scipy.spatial.transform import RigidTransform as Rigid +from scipy.spatial.transform import Rotation as Rot from genmetaballs._genmetaballs_bindings import geometry as geometry @@ -30,8 +30,8 @@ def test_vec3d_repr_returns_valid_string() -> None: (np.add, op.add), (np.subtract, op.sub), (np.cross, geometry.cross), - ] - ) + ], +) def test_vec3d_ops(rng: np.random.Generator, np_op, vec3d_op) -> None: _a, _b = rng.uniform(size=(100, 3)), rng.uniform(size=(100, 3)) for i in range(100): @@ -40,12 +40,14 @@ def test_vec3d_ops(rng: np.random.Generator, np_op, vec3d_op) -> None: _c = np_op(_a[i], _b[i]) assert np.allclose(_c, np.array([c.x, c.y, c.z])) + def test_vec3d_dot(rng: np.random.Generator) -> None: _a, _b = rng.uniform(size=(100, 3)), rng.uniform(size=(100, 3)) for i in range(100): a, b = geometry.Vec3D(*_a[i]), geometry.Vec3D(*_b[i]) assert np.allclose(np.dot(_a[i], _b[i]), geometry.dot(a, b)) + def test_rotation_apply(rng: np.random.Generator) -> None: quats = rng.uniform(-1, 1, size=(100, 4)) quats /= np.linalg.norm(quats, axis=1, keepdims=True) @@ -61,6 +63,7 @@ def test_rotation_apply(rng: np.random.Generator) -> None: assert np.allclose(result_scipy, np.array([result_geom.x, result_geom.y, result_geom.z])) + def test_rotation_compose(rng: np.random.Generator) -> None: quats1 = rng.uniform(-1, 1, size=(100, 4)) quats1 /= np.linalg.norm(quats1, axis=1, keepdims=True) @@ -82,11 +85,12 @@ def test_rotation_compose(rng: np.random.Generator) -> None: result_scipy = composed_scipy.apply(vecs[j]) result_geom = composed_geom.apply(vec_geom) assert np.allclose( - result_scipy, - np.array([result_geom.x, result_geom.y, result_geom.z]), - rtol=1e-5, - atol=1e-6, - ) + result_scipy, + np.array([result_geom.x, result_geom.y, result_geom.z]), + rtol=1e-5, + atol=1e-6, + ) + def test_rotation_inv(rng: np.random.Generator) -> None: quats = rng.uniform(-1, 1, size=(100, 4)) @@ -102,11 +106,12 @@ def test_rotation_inv(rng: np.random.Generator) -> None: vec = geometry.Vec3D(*vecs[j]) vec_ = composed.apply(vec) assert np.allclose( - np.array([vec.x, vec.y, vec.z]), - np.array([vec_.x, vec_.y, vec_.z]), - rtol=1e-5, - atol=1e-6, - ) + np.array([vec.x, vec.y, vec.z]), + np.array([vec_.x, vec_.y, vec_.z]), + rtol=1e-5, + atol=1e-6, + ) + def test_pose_apply(rng: np.random.Generator) -> None: exp_coords = rng.uniform(size=(100, 6)) @@ -126,6 +131,7 @@ def test_pose_apply(rng: np.random.Generator) -> None: assert np.allclose(result_scipy, np.array([result_geom.x, result_geom.y, result_geom.z])) + def test_pose_compose(rng: np.random.Generator) -> None: exp_coords1 = rng.uniform(size=(100, 6)) exp_coords2 = rng.uniform(size=(100, 6)) @@ -151,11 +157,12 @@ def test_pose_compose(rng: np.random.Generator) -> None: result_scipy = composed_scipy.apply(vecs[j]) result_geom = composed_geom.apply(vec_geom) assert np.allclose( - result_scipy, - np.array([result_geom.x, result_geom.y, result_geom.z]), - rtol=1e-5, - atol=1e-6, - ) + result_scipy, + np.array([result_geom.x, result_geom.y, result_geom.z]), + rtol=1e-5, + atol=1e-6, + ) + def test_pose_inv(rng: np.random.Generator) -> None: exp_coords = rng.uniform(size=(100, 6)) @@ -175,8 +182,8 @@ def test_pose_inv(rng: np.random.Generator) -> None: vec = geometry.Vec3D(*vecs[j]) vec_ = composed.apply(vec) assert np.allclose( - np.array([vec.x, vec.y, vec.z]), - np.array([vec_.x, vec_.y, vec_.z]), - rtol=1e-5, - atol=1e-6, - ) + np.array([vec.x, vec.y, vec.z]), + np.array([vec_.x, vec_.y, vec_.z]), + rtol=1e-5, + atol=1e-6, + ) From 39fbab7dc24c892565943be4e39bdd50b5e8a4bd Mon Sep 17 00:00:00 2001 From: mugamma Date: Fri, 21 Nov 2025 01:21:57 +0000 Subject: [PATCH 08/19] add constructor --- genmetaballs/src/cuda/core/fmb.cuh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index fe9a765..a205fd0 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -3,8 +3,12 @@ #include "geometry.cuh" struct FMB { - Pose pose; // mean + orientation + Pose pose; float3 extent; + + FMB(const Pose _pose, const float3 _extent) : pose{_pose}, extent{_extent} {} + + float quadratic_form(const Vec3D) const; }; template From 19f9d128514a416307372f8465798bc0030a8625 Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 23 Nov 2025 01:49:38 +0000 Subject: [PATCH 09/19] expose geometry module in `genmetaballs.core` --- genmetaballs/src/genmetaballs/core/__init__.py | 2 ++ tests/python_tests/test_geometry.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/genmetaballs/src/genmetaballs/core/__init__.py b/genmetaballs/src/genmetaballs/core/__init__.py index 341a681..23c7077 100644 --- a/genmetaballs/src/genmetaballs/core/__init__.py +++ b/genmetaballs/src/genmetaballs/core/__init__.py @@ -1,3 +1,4 @@ +from genmetaballs._genmetaballs_bindings import geometry from genmetaballs._genmetaballs_bindings.confidence import ( TwoParameterConfidence, ZeroParameterConfidence, @@ -7,5 +8,6 @@ __all__ = [ "ZeroParameterConfidence", "TwoParameterConfidence", + "geometry", "sigmoid", ] diff --git a/tests/python_tests/test_geometry.py b/tests/python_tests/test_geometry.py index f2e32d0..a150bc5 100644 --- a/tests/python_tests/test_geometry.py +++ b/tests/python_tests/test_geometry.py @@ -5,7 +5,7 @@ from scipy.spatial.transform import RigidTransform as Rigid from scipy.spatial.transform import Rotation as Rot -from genmetaballs._genmetaballs_bindings import geometry as geometry +from genmetaballs.core import geometry @pytest.fixture From 0427c873f0659300214bb7749e0f982aeb7b57be Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 23 Nov 2025 01:59:13 +0000 Subject: [PATCH 10/19] use nanobind paradigm to bind operators --- genmetaballs/src/cuda/bindings.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index df3e2e1..bf0975f 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -32,12 +32,12 @@ NB_MODULE(_genmetaballs_bindings, m) { .def_ro("x", &Vec3D::x) .def_ro("y", &Vec3D::y) .def_ro("z", &Vec3D::z) - .def("__add__", static_cast(&operator+)) - .def("__sub__", static_cast(&operator-)) - .def("__neg__", static_cast(&operator-)) - .def("__mul__", static_cast(&operator*)) - .def("__rmul__", static_cast(&operator*)) - .def("__truediv__", static_cast(&operator/)) + .def(nb::self + nb::self) + .def(nb::self - nb::self) + .def(-nb::self) + .def(nb::self * float()) + .def(float() * nb::self) + .def(nb::self / float()) .def("__repr__", [](const Vec3D& v) { return nb::str("Vec3D({}, {}, {})").format(v.x, v.y, v.z); }); From cacd2bc36ae92fedd13e9141f8c38d910e99f4d7 Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 23 Nov 2025 04:28:52 +0000 Subject: [PATCH 11/19] first version (builds) --- .clang-tidy | 1 + genmetaballs/src/cuda/bindings.cu | 19 ++++++++++++++ genmetaballs/src/cuda/core/fmb.cu | 11 ++++++++ genmetaballs/src/cuda/core/fmb.cuh | 36 +++++++++++++++++++++------ genmetaballs/src/cuda/core/forward.cu | 2 +- tests/python_tests/test_fmb.py | 25 +++++++++++++++++++ 6 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 genmetaballs/src/cuda/core/fmb.cu create mode 100644 tests/python_tests/test_fmb.py diff --git a/.clang-tidy b/.clang-tidy index 1ad12ab..d243816 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -18,6 +18,7 @@ Checks: > -readability-implicit-bool-conversion, -readability-avoid-const-params-in-decls, -readability-braces-around-statements, + -readability-convert-member-functions-to-static, -readability-isolate-declaration, -readability-math-missing-parentheses, -cppcoreguidelines-avoid-magic-numbers, diff --git a/genmetaballs/src/cuda/bindings.cu b/genmetaballs/src/cuda/bindings.cu index b07e8ad..22c6b20 100644 --- a/genmetaballs/src/cuda/bindings.cu +++ b/genmetaballs/src/cuda/bindings.cu @@ -1,9 +1,11 @@ #include #include #include +#include #include #include "core/confidence.cuh" +#include "core/fmb.cuh" #include "core/geometry.cuh" #include "core/utils.cuh" @@ -11,6 +13,23 @@ namespace nb = nanobind; NB_MODULE(_genmetaballs_bindings, m) { + /* + * FMB module bindings + */ + + nb::module_ fmb = m.def_submodule("fmb", "Fuzzy meta ball data types"); + + nb::class_(fmb, "FMB") + .def(nb::init()) + .def_prop_ro("pose", &FMB::get_pose) + .def_prop_ro("extent", + [](const FMB& self) { + auto extent = self.get_extent(); + return std::tuple{extent.x, extent.y, extent.z}; + }) + .def("quadratic_form", &FMB::quadratic_form, + "Evaluate the associated quadratic form at the given vector", nb::arg("vec")); + /* * Geometry module bindings */ diff --git a/genmetaballs/src/cuda/core/fmb.cu b/genmetaballs/src/cuda/core/fmb.cu new file mode 100644 index 0000000..328ef7f --- /dev/null +++ b/genmetaballs/src/cuda/core/fmb.cu @@ -0,0 +1,11 @@ +#include "fmb.cuh" +#include "geometry.cuh" +#include "utils.cuh" + +CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const { + const auto shftd_vec = vec - pose.get_tran(); + const auto rot_shftd_vec = pose.get_rot().apply(shftd_vec); + const auto scaled_rot_shftd_vec = + Vec3D(rot_shftd_vec.x / extent.x, rot_shftd_vec.y / extent.y, rot_shftd_vec.z / extent.z); + return dot(rot_shftd_vec, scaled_rot_shftd_vec); +} diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index a205fd0..f852e2a 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -1,18 +1,38 @@ #pragma once +#include + #include "geometry.cuh" +#include "utils.cuh" + +class FMB { +private: + // In Gaussian terms: + // - mean: pose.tran + // - cov: pose.rot.mat().inv() * diag(extent) * pose.rot.mat() + Pose pose_; + float3 extent_; -struct FMB { - Pose pose; - float3 extent; +public: + FMB(const Pose& pose, float x_extent, float y_extent, float z_extent) noexcept(false) + : pose_{pose} { + if (x_extent <= 0 || y_extent <= 0 || z_extent <= 0) + throw std::domain_error("a metaball cannot have negative extent"); + extent_ = {x_extent, y_extent, z_extent}; + } - FMB(const Pose _pose, const float3 _extent) : pose{_pose}, extent{_extent} {} + Pose get_pose() const { + return pose_; + } + float3 get_extent() const { + return extent_; + } - float quadratic_form(const Vec3D) const; + CUDA_CALLABLE float quadratic_form(const Vec3D) const; }; -template -class FMBs { +/*template +class FMBScene { private: containter_template fmbs_; containter_template log_weights_; @@ -21,4 +41,4 @@ public: FMBs(uint32_t size) : fmbs_(size), log_weights_(size) { // TODO: set all log_weights_ to 0 } -}; +};*/ diff --git a/genmetaballs/src/cuda/core/forward.cu b/genmetaballs/src/cuda/core/forward.cu index a71caf6..befec4d 100644 --- a/genmetaballs/src/cuda/core/forward.cu +++ b/genmetaballs/src/cuda/core/forward.cu @@ -35,7 +35,7 @@ __global__ render_kernel(const Getter fmb_getter, const Blender blender, for (const auto& fmb : fmb_getter->get_metaballs(ray)) { const auto& [t, d] = Intersector::intersect(fmb, ray); w = blender->blend(t, d, fmb, ray); - sumexpd += exp(d); + sumexpd += exp(d); // numerically unstable. use logsumexp tf += t; w0 += w; } diff --git a/tests/python_tests/test_fmb.py b/tests/python_tests/test_fmb.py new file mode 100644 index 0000000..411b32e --- /dev/null +++ b/tests/python_tests/test_fmb.py @@ -0,0 +1,25 @@ +import numpy as np +import pytest +from scipy.spatial.distance import mahalanobis +from scipy.spatial.transform import Rotation as Rot + +from genmetaballs.core.fmb import FMB +from genmetaballs.core.geometry import Pose, Rotation, Vec3D + + +@pytest.fixture +def rng() -> np.random.Generator: + return np.random.default_rng(0) + + +def test_fmb_quadratic_form(rng): + for _ in range(100): + quat = rng.uniform(size=4) + tran, extent, vec = rng.uniform(size=(3, 3)) + pose = Pose.from_components(Rotation.from_quat(quat), Vec3D(tran)) + scipy_rot_mat = Rot.from_quat(quat).as_matrix() + cov = scipy_rot_mat.T @ np.diag(extent) @ scipy_rot_mat + assert np.isclose( + FMB(pose, Vec3D(extent)).quadratic_form(vec), + mahalanobis(vec, tran, np.linalg.inv(cov)) ** 2, + ) From bada4b923d2ac7bfd235a00ad61f9f1f24876d41 Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 23 Nov 2025 04:32:27 +0000 Subject: [PATCH 12/19] remove gpu_add residual --- genmetaballs/src/genmetaballs/__init__.py | 3 --- genmetaballs/src/genmetaballs/gpu_add.py | 5 ----- 2 files changed, 8 deletions(-) delete mode 100644 genmetaballs/src/genmetaballs/gpu_add.py diff --git a/genmetaballs/src/genmetaballs/__init__.py b/genmetaballs/src/genmetaballs/__init__.py index 9c40d8c..e69de29 100644 --- a/genmetaballs/src/genmetaballs/__init__.py +++ b/genmetaballs/src/genmetaballs/__init__.py @@ -1,3 +0,0 @@ -from .gpu_add import gpu_add - -__all__ = ["gpu_add"] diff --git a/genmetaballs/src/genmetaballs/gpu_add.py b/genmetaballs/src/genmetaballs/gpu_add.py deleted file mode 100644 index 214b75f..0000000 --- a/genmetaballs/src/genmetaballs/gpu_add.py +++ /dev/null @@ -1,5 +0,0 @@ -from . import _genmetaballs_bindings as _gmbb - - -def gpu_add(a: list[float], b: list[float]) -> list[float]: - return _gmbb.gpu_add(a, b) From 946a9a23074f61499e1a9139faeb9b8c900b7119 Mon Sep 17 00:00:00 2001 From: mugamma Date: Sun, 23 Nov 2025 05:01:26 +0000 Subject: [PATCH 13/19] add FMB --- CMakeLists.txt | 2 ++ genmetaballs/src/cuda/core/fmb.cu | 8 ++++---- genmetaballs/src/genmetaballs/core/__init__.py | 3 ++- tests/python_tests/test_fmb.py | 10 ++++++---- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 664c437..a1fe0cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,8 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) add_library(genmetaballs_core genmetaballs/src/cuda/core/utils.cu genmetaballs/src/cuda/core/utils.cuh + genmetaballs/src/cuda/core/fmb.cuh + genmetaballs/src/cuda/core/fmb.cu genmetaballs/src/cuda/core/geometry.cuh genmetaballs/src/cuda/core/geometry.cu genmetaballs/src/cuda/core/confidence.cuh diff --git a/genmetaballs/src/cuda/core/fmb.cu b/genmetaballs/src/cuda/core/fmb.cu index 328ef7f..3261a15 100644 --- a/genmetaballs/src/cuda/core/fmb.cu +++ b/genmetaballs/src/cuda/core/fmb.cu @@ -3,9 +3,9 @@ #include "utils.cuh" CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const { - const auto shftd_vec = vec - pose.get_tran(); - const auto rot_shftd_vec = pose.get_rot().apply(shftd_vec); - const auto scaled_rot_shftd_vec = - Vec3D(rot_shftd_vec.x / extent.x, rot_shftd_vec.y / extent.y, rot_shftd_vec.z / extent.z); + const auto shftd_vec = vec - pose_.get_tran(); + const auto rot_shftd_vec = pose_.get_rot().apply(shftd_vec); + const auto scaled_rot_shftd_vec = Vec3D( + rot_shftd_vec.x / extent_.x, rot_shftd_vec.y / extent_.y, rot_shftd_vec.z / extent_.z); return dot(rot_shftd_vec, scaled_rot_shftd_vec); } diff --git a/genmetaballs/src/genmetaballs/core/__init__.py b/genmetaballs/src/genmetaballs/core/__init__.py index 23c7077..44d5b08 100644 --- a/genmetaballs/src/genmetaballs/core/__init__.py +++ b/genmetaballs/src/genmetaballs/core/__init__.py @@ -1,4 +1,4 @@ -from genmetaballs._genmetaballs_bindings import geometry +from genmetaballs._genmetaballs_bindings import fmb, geometry from genmetaballs._genmetaballs_bindings.confidence import ( TwoParameterConfidence, ZeroParameterConfidence, @@ -8,6 +8,7 @@ __all__ = [ "ZeroParameterConfidence", "TwoParameterConfidence", + "fmb", "geometry", "sigmoid", ] diff --git a/tests/python_tests/test_fmb.py b/tests/python_tests/test_fmb.py index 411b32e..211f99a 100644 --- a/tests/python_tests/test_fmb.py +++ b/tests/python_tests/test_fmb.py @@ -3,8 +3,10 @@ from scipy.spatial.distance import mahalanobis from scipy.spatial.transform import Rotation as Rot -from genmetaballs.core.fmb import FMB -from genmetaballs.core.geometry import Pose, Rotation, Vec3D +from genmetaballs.core import fmb, geometry + +FMB = fmb.FMB +Pose, Vec3D, Rotation = geometry.Pose, geometry.Vec3D, geometry.Rotation @pytest.fixture @@ -16,10 +18,10 @@ def test_fmb_quadratic_form(rng): for _ in range(100): quat = rng.uniform(size=4) tran, extent, vec = rng.uniform(size=(3, 3)) - pose = Pose.from_components(Rotation.from_quat(quat), Vec3D(tran)) + pose = Pose.from_components(Rotation.from_quat(*quat), Vec3D(*tran)) scipy_rot_mat = Rot.from_quat(quat).as_matrix() cov = scipy_rot_mat.T @ np.diag(extent) @ scipy_rot_mat assert np.isclose( - FMB(pose, Vec3D(extent)).quadratic_form(vec), + FMB(pose, *extent).quadratic_form(Vec3D(*vec)), mahalanobis(vec, tran, np.linalg.inv(cov)) ** 2, ) From 287f387399fd4068bb8cc1c2125d947d6bfc01ca Mon Sep 17 00:00:00 2001 From: mugamma Date: Tue, 25 Nov 2025 19:34:25 +0000 Subject: [PATCH 14/19] wip --- genmetaballs/src/cuda/core/fmb.cuh | 73 ++++++++++++++++++++++++++---- tests/cpp_tests/test_fmb.cu | 33 ++++++++++++++ 2 files changed, 98 insertions(+), 8 deletions(-) create mode 100644 tests/cpp_tests/test_fmb.cu diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index f852e2a..c099e80 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -2,6 +2,12 @@ #include +#include +#include +#include +#include + + #include "geometry.cuh" #include "utils.cuh" @@ -14,6 +20,8 @@ private: float3 extent_; public: + FMB(): pose_{}, extent_{1.0f, 1.0f, 1.0f} {}; + FMB(const Pose& pose, float x_extent, float y_extent, float z_extent) noexcept(false) : pose_{pose} { if (x_extent <= 0 || y_extent <= 0 || z_extent <= 0) @@ -21,24 +29,73 @@ public: extent_ = {x_extent, y_extent, z_extent}; } - Pose get_pose() const { + CUDA_CALLABLE Pose get_pose() const { return pose_; } - float3 get_extent() const { + CUDA_CALLABLE float3 get_extent() const { return extent_; } CUDA_CALLABLE float quadratic_form(const Vec3D) const; }; -/*template + class FMBScene { private: - containter_template fmbs_; - containter_template log_weights_; + thrust::device_vector fmbs_; + thrust::device_vector log_weights_; + + friend class FMBSceneHostView; public: - FMBs(uint32_t size) : fmbs_(size), log_weights_(size) { - // TODO: set all log_weights_ to 0 + + // constructors + __host__ FMBScene(size_t size): fmbs_(size), log_weights_(size) {} + + __device__ thrust::device_vector &get_fmbs() { return fmbs_; } + __device__ thrust::device_vector get_fmbs() const { return fmbs_; } + + __device__ thrust::device_vector &get_log_weights() { return log_weights_; } + __device__ thrust::device_vector get_log_weights() const { return log_weights_; } + + __device__ thrust::tuple operator[] (const uint32_t i) { + return thrust::make_tuple(fmbs_[i], log_weights_[i]); } -};*/ + + __device__ auto begin() { + return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin()); + } + + __device__ auto end() { + return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end()); + } + +}; + +class FMBSceneHostView { +private: + thrust::host_vector fmbs_; + thrust::host_vector log_weights_; + +public: + FMBSceneHostView(FMBScene &scene) : + fmbs_{scene.fmbs_}, log_weights_{scene.log_weights_} {} + + __device__ thrust::device_vector get_fmbs() { return fmbs_; } + + __device__ thrust::device_vector get_log_weights() const { return log_weights_; } + + __device__ thrust::tuple operator[] (const uint32_t i) { + return thrust::make_tuple(fmbs_[i], log_weights_[i]); + } + + __device__ auto begin() { + return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin()); + } + + __device__ auto end() { + return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end()); + } + +}; + diff --git a/tests/cpp_tests/test_fmb.cu b/tests/cpp_tests/test_fmb.cu new file mode 100644 index 0000000..67d785b --- /dev/null +++ b/tests/cpp_tests/test_fmb.cu @@ -0,0 +1,33 @@ +#include +#include +#include + +#include +#include + +#include "core/fmb.cuh" + +__global__ void dummy_kernel(FMBScene &scene, float *tot_extent_x) { + + float par_extent_x = 0; + + for(auto &[fmb, w] : scene) { + par_extent_x += fmb.get_extent().x; + } + + *tot_extent_x = par_extent_x; + +} + +TEST(FMBTests, KernelRangeBasedForLoopSmokeTest) { + + FMBScene dummy_scene(10); + thrust::device_vector device_res(1); + + dummy_kernel<<<1, 1>>>(dummy_scene, thrust::raw_pointer_cast(device_res.data())); + + thrust::host_vector host_res = device_res; + + EXPECT_EQ(host_res[0], 10.0f); + +} From 27a4532519fa74784701ec027c523acc836cfefe Mon Sep 17 00:00:00 2001 From: mugamma Date: Wed, 26 Nov 2025 03:23:28 +0000 Subject: [PATCH 15/19] correct fmb scene declaration --- genmetaballs/src/cuda/core/fmb.cuh | 105 ++++++++++++++++------------- tests/cpp_tests/test_fmb.cu | 2 +- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index c099e80..3325be3 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -1,11 +1,10 @@ #pragma once #include +#include -#include -#include -#include -#include +#include +#include #include "geometry.cuh" @@ -42,60 +41,70 @@ public: class FMBScene { private: - thrust::device_vector fmbs_; - thrust::device_vector log_weights_; - - friend class FMBSceneHostView; + FMB *fmbs_; + float *log_weights_; + size_t size_; public: // constructors - __host__ FMBScene(size_t size): fmbs_(size), log_weights_(size) {} - - __device__ thrust::device_vector &get_fmbs() { return fmbs_; } - __device__ thrust::device_vector get_fmbs() const { return fmbs_; } - - __device__ thrust::device_vector &get_log_weights() { return log_weights_; } - __device__ thrust::device_vector get_log_weights() const { return log_weights_; } + __host__ FMBScene(size_t size); - __device__ thrust::tuple operator[] (const uint32_t i) { - return thrust::make_tuple(fmbs_[i], log_weights_[i]); + CUDA_CALLABLE cuda::std::tuple operator[] (const uint32_t i) { + return cuda::std::tie(fmbs_[i], log_weights_[i]); } - __device__ auto begin() { - return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin()); + CUDA_CALLABLE cuda::std::tuple + operator[] (const uint32_t i) const { + return cuda::std::tie(fmbs_[i], log_weights_[i]); } - __device__ auto end() { - return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end()); + class Iterator { + private: + FMB *fmb_ptr_; + float *log_weight_ptr_; + + public: + + CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr): + fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} + CUDA_CALLABLE cuda::std::tuple operator*() { + return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); + } + CUDA_CALLABLE bool operator!=(const Iterator &other) const { + CUDA_CALLABLE return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; + } + CUDA_CALLABLE Iterator &operator++() { fmb_ptr_++, log_weight_ptr_++; return *this; } + + }; + + class ConstIterator { + private: + const FMB *fmb_ptr_; + const float *log_weight_ptr_; + + public: + + CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr): + fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} + CUDA_CALLABLE cuda::std::tuple operator*() const { + return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); + } + CUDA_CALLABLE bool operator!=(const ConstIterator &other) const { + return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; + } + CUDA_CALLABLE ConstIterator &operator++() { fmb_ptr_++, log_weight_ptr_++; return *this; } + + }; + + + CUDA_CALLABLE Iterator begin() { return Iterator(fmbs_, log_weights_); } + CUDA_CALLABLE Iterator end() { return Iterator(fmbs_ + size_, log_weights_ + size_); } + CUDA_CALLABLE ConstIterator begin() const { + return ConstIterator(fmbs_, log_weights_); } - -}; - -class FMBSceneHostView { -private: - thrust::host_vector fmbs_; - thrust::host_vector log_weights_; - -public: - FMBSceneHostView(FMBScene &scene) : - fmbs_{scene.fmbs_}, log_weights_{scene.log_weights_} {} - - __device__ thrust::device_vector get_fmbs() { return fmbs_; } - - __device__ thrust::device_vector get_log_weights() const { return log_weights_; } - - __device__ thrust::tuple operator[] (const uint32_t i) { - return thrust::make_tuple(fmbs_[i], log_weights_[i]); - } - - __device__ auto begin() { - return thrust::make_zip_iterator(fmbs_.begin(), log_weights_.begin()); - } - - __device__ auto end() { - return thrust::make_zip_iterator(fmbs_.end(), log_weights_.end()); + CUDA_CALLABLE ConstIterator end() const { + return ConstIterator(fmbs_ + size_, log_weights_ + size_); } }; - diff --git a/tests/cpp_tests/test_fmb.cu b/tests/cpp_tests/test_fmb.cu index 67d785b..1c764ba 100644 --- a/tests/cpp_tests/test_fmb.cu +++ b/tests/cpp_tests/test_fmb.cu @@ -11,7 +11,7 @@ __global__ void dummy_kernel(FMBScene &scene, float *tot_extent_x) { float par_extent_x = 0; - for(auto &[fmb, w] : scene) { + for(auto [fmb, w] : scene) { par_extent_x += fmb.get_extent().x; } From d039c255a45211709ca734bfb414d4c5ec32aa72 Mon Sep 17 00:00:00 2001 From: mugamma Date: Wed, 26 Nov 2025 04:57:47 +0000 Subject: [PATCH 16/19] add fmb scene --- genmetaballs/src/cuda/core/fmb.cu | 22 ++++++++++ genmetaballs/src/cuda/core/fmb.cuh | 69 +++++++++++++++--------------- tests/cpp_tests/test_fmb.cu | 24 +++++------ 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/genmetaballs/src/cuda/core/fmb.cu b/genmetaballs/src/cuda/core/fmb.cu index 3261a15..83d9cdc 100644 --- a/genmetaballs/src/cuda/core/fmb.cu +++ b/genmetaballs/src/cuda/core/fmb.cu @@ -9,3 +9,25 @@ CUDA_CALLABLE float FMB::quadratic_form(const Vec3D vec) const { rot_shftd_vec.x / extent_.x, rot_shftd_vec.y / extent_.y, rot_shftd_vec.z / extent_.z); return dot(rot_shftd_vec, scaled_rot_shftd_vec); } + +template <> +__host__ FMBScene::FMBScene(size_t size) + : fmbs_{new FMB[size]}, log_weights_{new float[size]}, size_{size} {} + +template <> +__host__ FMBScene::FMBScene(size_t size) : size_{size} { + CUDA_CHECK(cudaMalloc(&fmbs_, size * sizeof(FMB))); + CUDA_CHECK(cudaMalloc(&log_weights_, size * sizeof(float))); +} + +template <> +__host__ FMBScene::~FMBScene() { + delete[] fmbs_; + delete[] log_weights_; +} + +template <> +__host__ FMBScene::~FMBScene() { + CUDA_CHECK(cudaFree(fmbs_)); + CUDA_CHECK(cudaFree(log_weights_)); +} diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index 3325be3..c329316 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -1,11 +1,9 @@ #pragma once -#include -#include - #include #include - +#include +#include #include "geometry.cuh" #include "utils.cuh" @@ -19,7 +17,7 @@ private: float3 extent_; public: - FMB(): pose_{}, extent_{1.0f, 1.0f, 1.0f} {}; + FMB() : pose_{}, extent_{1.0f, 1.0f, 1.0f} {}; FMB(const Pose& pose, float x_extent, float y_extent, float z_extent) noexcept(false) : pose_{pose} { @@ -38,73 +36,76 @@ public: CUDA_CALLABLE float quadratic_form(const Vec3D) const; }; - +template class FMBScene { private: - FMB *fmbs_; - float *log_weights_; + FMB* fmbs_; + float* log_weights_; size_t size_; public: - - // constructors __host__ FMBScene(size_t size); - CUDA_CALLABLE cuda::std::tuple operator[] (const uint32_t i) { + __host__ ~FMBScene(); + + CUDA_CALLABLE cuda::std::tuple operator[](const uint32_t i) { return cuda::std::tie(fmbs_[i], log_weights_[i]); } - CUDA_CALLABLE cuda::std::tuple - operator[] (const uint32_t i) const { + CUDA_CALLABLE cuda::std::tuple operator[](const uint32_t i) const { return cuda::std::tie(fmbs_[i], log_weights_[i]); } class Iterator { private: - FMB *fmb_ptr_; - float *log_weight_ptr_; + FMB* fmb_ptr_; + float* log_weight_ptr_; public: - - CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr): - fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} + CUDA_CALLABLE Iterator(FMB* const fmb_ptr, float* const log_weight_ptr) + : fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} CUDA_CALLABLE cuda::std::tuple operator*() { return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); } - CUDA_CALLABLE bool operator!=(const Iterator &other) const { - CUDA_CALLABLE return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; + CUDA_CALLABLE bool operator!=(const Iterator& other) const { + return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; + } + CUDA_CALLABLE Iterator& operator++() { + fmb_ptr_++, log_weight_ptr_++; + return *this; } - CUDA_CALLABLE Iterator &operator++() { fmb_ptr_++, log_weight_ptr_++; return *this; } - }; class ConstIterator { private: - const FMB *fmb_ptr_; - const float *log_weight_ptr_; + const FMB* fmb_ptr_; + const float* log_weight_ptr_; public: - - CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr): - fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} + CUDA_CALLABLE ConstIterator(const FMB* const fmb_ptr, const float* const log_weight_ptr) + : fmb_ptr_{fmb_ptr}, log_weight_ptr_{log_weight_ptr} {} CUDA_CALLABLE cuda::std::tuple operator*() const { return cuda::std::tie(*fmb_ptr_, *log_weight_ptr_); } - CUDA_CALLABLE bool operator!=(const ConstIterator &other) const { + CUDA_CALLABLE bool operator!=(const ConstIterator& other) const { return fmb_ptr_ != other.fmb_ptr_ || log_weight_ptr_ != other.log_weight_ptr_; } - CUDA_CALLABLE ConstIterator &operator++() { fmb_ptr_++, log_weight_ptr_++; return *this; } - + CUDA_CALLABLE ConstIterator& operator++() { + fmb_ptr_++, log_weight_ptr_++; + return *this; + } }; - - CUDA_CALLABLE Iterator begin() { return Iterator(fmbs_, log_weights_); } - CUDA_CALLABLE Iterator end() { return Iterator(fmbs_ + size_, log_weights_ + size_); } + CUDA_CALLABLE Iterator begin() { + return Iterator(fmbs_, log_weights_); + } + CUDA_CALLABLE Iterator end() { + return Iterator(fmbs_ + size_, log_weights_ + size_); + } CUDA_CALLABLE ConstIterator begin() const { return ConstIterator(fmbs_, log_weights_); } CUDA_CALLABLE ConstIterator end() const { return ConstIterator(fmbs_ + size_, log_weights_ + size_); } - }; diff --git a/tests/cpp_tests/test_fmb.cu b/tests/cpp_tests/test_fmb.cu index 1c764ba..73d6602 100644 --- a/tests/cpp_tests/test_fmb.cu +++ b/tests/cpp_tests/test_fmb.cu @@ -1,33 +1,31 @@ #include #include #include - #include #include #include "core/fmb.cuh" +#include "core/utils.cuh" -__global__ void dummy_kernel(FMBScene &scene, float *tot_extent_x) { +__global__ void dummy_kernel(FMBScene& scene, int* num_fmbs) { - float par_extent_x = 0; - - for(auto [fmb, w] : scene) { - par_extent_x += fmb.get_extent().x; - } + int _num_fmbs = 0; - *tot_extent_x = par_extent_x; + for (auto [fmb, w] : scene) { + _num_fmbs += 1; + } + *num_fmbs = _num_fmbs; } TEST(FMBTests, KernelRangeBasedForLoopSmokeTest) { - FMBScene dummy_scene(10); - thrust::device_vector device_res(1); + FMBScene dummy_scene(10); + thrust::device_vector device_res(1); dummy_kernel<<<1, 1>>>(dummy_scene, thrust::raw_pointer_cast(device_res.data())); - thrust::host_vector host_res = device_res; - - EXPECT_EQ(host_res[0], 10.0f); + thrust::host_vector host_res = device_res; + EXPECT_EQ(host_res[0], 10); } From 8582c5f73021dabd9f4a270781f0350e6a536b8d Mon Sep 17 00:00:00 2001 From: mugamma Date: Mon, 1 Dec 2025 20:25:15 +0000 Subject: [PATCH 17/19] remove unused header --- genmetaballs/src/cuda/core/fmb.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/genmetaballs/src/cuda/core/fmb.cuh b/genmetaballs/src/cuda/core/fmb.cuh index c329316..ed82bed 100644 --- a/genmetaballs/src/cuda/core/fmb.cuh +++ b/genmetaballs/src/cuda/core/fmb.cuh @@ -3,7 +3,6 @@ #include #include #include -#include #include "geometry.cuh" #include "utils.cuh" From abcec38602745d521168c725bf92b01bd84ea852 Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Tue, 2 Dec 2025 15:29:25 -0500 Subject: [PATCH 18/19] Fix compile error in `test_getter.cu` (`Pose extr();` defines a function, whereas `Pose extr;` create a new variable) --- tests/cpp_tests/test_getter.cu | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/cpp_tests/test_getter.cu b/tests/cpp_tests/test_getter.cu index 0232f03..3b01128 100644 --- a/tests/cpp_tests/test_getter.cu +++ b/tests/cpp_tests/test_getter.cu @@ -11,13 +11,12 @@ #include "core/getter.cuh" #include "core/utils.cuh" - TEST(AllGetterTest, AllGetterHostTest) { // Extract types from TypeParam (std::vector or thrust::device_vector) constexpr uint32_t num_fmbs = 40; FMBScene scene(num_fmbs); - Pose extr(); + Pose extr; AllGetter getter(scene, extr); // Create test rays @@ -49,8 +48,6 @@ TEST(AllGetterTest, AllGetterHostTest) { << "Returned FMBs container size must match all_fmbs size"; } } - - // TODO smoke test getter on device //__global__ void test_get_metaballs_kernel_device(const FMBScene* fmbs, const Pose* extr, @@ -61,7 +58,7 @@ TEST(AllGetterTest, AllGetterHostTest) { // out_sizes[idx] = static_cast(fmbs_returned.size()); //} // -//TEST(AllGetterTest, AllGetterHostTest) { +// TEST(AllGetterTest, AllGetterHostTest) { // // Extract types from TypeParam (std::vector or thrust::device_vector) // constexpr uint32_t num_fmbs = 40; // FMBScene host_scene(num_fmbs); From 0037377e3e1a22d41c1be139f8435e6a51bda5b3 Mon Sep 17 00:00:00 2001 From: Xiaoyan Wang Date: Tue, 2 Dec 2025 16:21:18 -0500 Subject: [PATCH 19/19] Re-enable device tests for getter --- tests/cpp_tests/test_getter.cu | 98 ++++++++++++++++------------------ 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/tests/cpp_tests/test_getter.cu b/tests/cpp_tests/test_getter.cu index 3b01128..f245cdd 100644 --- a/tests/cpp_tests/test_getter.cu +++ b/tests/cpp_tests/test_getter.cu @@ -12,7 +12,6 @@ #include "core/utils.cuh" TEST(AllGetterTest, AllGetterHostTest) { - // Extract types from TypeParam (std::vector or thrust::device_vector) constexpr uint32_t num_fmbs = 40; FMBScene scene(num_fmbs); @@ -49,53 +48,50 @@ TEST(AllGetterTest, AllGetterHostTest) { } } -// TODO smoke test getter on device -//__global__ void test_get_metaballs_kernel_device(const FMBScene* fmbs, const Pose* extr, -// const Ray* rays, int num_rays, int* out_sizes) { -// int idx = threadIdx.x + blockIdx.x * blockDim.x; -// AllGetter getter(*fmbs, *extr); -// const auto& fmbs_returned = getter.get_metaballs(rays[idx]); -// out_sizes[idx] = static_cast(fmbs_returned.size()); -//} -// -// TEST(AllGetterTest, AllGetterHostTest) { -// // Extract types from TypeParam (std::vector or thrust::device_vector) -// constexpr uint32_t num_fmbs = 40; -// FMBScene host_scene(num_fmbs); -// FMBScene device_scene(num_fmbs); -// -// AllGetter getter(scene, Pose()); -// -// // Create test rays -// std::vector rays = { -// Ray{Vec3D{0.0f, 0.0f, 0.0f}, Vec3D{1.0f, 0.0f, 0.0f}}, -// Ray{Vec3D{1.0f, 1.0f, 1.0f}, Vec3D{0.0f, 1.0f, 0.0f}}, -// Ray{Vec3D{-1.0f, -1.0f, -1.0f}, Vec3D{0.0f, 0.0f, 1.0f}}, -// Ray{Vec3D{2.5f, -3.1f, 0.2f}, Vec3D{-0.5f, 0.6f, 0.0f}}, -// Ray{Vec3D{4.4f, 0.0f, -0.9f}, Vec3D{0.3f, -0.2f, 1.0f}}, -// Ray{Vec3D{5.0f, 2.2f, 1.1f}, Vec3D{-1.0f, 2.0f, 0.2f}}, -// Ray{Vec3D{0.0f, 7.0f, 6.0f}, Vec3D{0.0f, -1.0f, -1.0f}}, -// Ray{Vec3D{-2.0f, 0.0f, 0.0f}, Vec3D{0.2f, 1.1f, 0.7f}}, -// Ray{Vec3D{9.1f, -0.3f, 2.7f}, Vec3D{-0.3f, 0.1f, 0.0f}}, -// Ray{Vec3D{1.2f, 8.8f, -4.5f}, Vec3D{1.0f, 0.0f, 1.0f}}, -// }; -// -// // Test on GPU for device containers -// int num_rays = static_cast(rays.size()); -// thrust::device_vector device_sizes(num_rays); -// -// // Launch kernel that constructs getter and calls get_metaballs on the device -// test_get_metaballs_kernel_device -// <<<1, num_rays>>>(device_scene, d_extr, d_rays, num_rays, d_sizes); -// CUDA_CHECK(cudaGetLastError()); -// CUDA_CHECK(cudaDeviceSynchronize()); -// -// thrust::host_vector host_sizes = device_vector; -// -// // Verify that get_metaballs returns the correct size for all rays -// for (int i = 0; i < num_rays; ++i) { -// EXPECT_EQ(static_cast(host_sizes[i]), scene.size()) -// << "Device get_metaballs returned correct size for ray " << i; -// } -// -//} +__global__ void test_get_metaballs_kernel_device(const AllGetter fmb_getter, + const Ray* rays, int num_rays, int* out_sizes) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + const auto& fmbs_returned = fmb_getter.get_metaballs(rays[idx]); + out_sizes[idx] = static_cast(fmbs_returned.size()); +} + +TEST(AllGetterTest, AllGetterDeviceTest) { + constexpr uint32_t num_fmbs = 40; + FMBScene device_scene(num_fmbs); + Pose extr; + + AllGetter getter(device_scene, extr); + + // Create test rays + std::vector rays = { + Ray{Vec3D{0.0f, 0.0f, 0.0f}, Vec3D{1.0f, 0.0f, 0.0f}}, + Ray{Vec3D{1.0f, 1.0f, 1.0f}, Vec3D{0.0f, 1.0f, 0.0f}}, + Ray{Vec3D{-1.0f, -1.0f, -1.0f}, Vec3D{0.0f, 0.0f, 1.0f}}, + Ray{Vec3D{2.5f, -3.1f, 0.2f}, Vec3D{-0.5f, 0.6f, 0.0f}}, + Ray{Vec3D{4.4f, 0.0f, -0.9f}, Vec3D{0.3f, -0.2f, 1.0f}}, + Ray{Vec3D{5.0f, 2.2f, 1.1f}, Vec3D{-1.0f, 2.0f, 0.2f}}, + Ray{Vec3D{0.0f, 7.0f, 6.0f}, Vec3D{0.0f, -1.0f, -1.0f}}, + Ray{Vec3D{-2.0f, 0.0f, 0.0f}, Vec3D{0.2f, 1.1f, 0.7f}}, + Ray{Vec3D{9.1f, -0.3f, 2.7f}, Vec3D{-0.3f, 0.1f, 0.0f}}, + Ray{Vec3D{1.2f, 8.8f, -4.5f}, Vec3D{1.0f, 0.0f, 1.0f}}, + }; + + // Test on GPU for device containers + int num_rays = static_cast(rays.size()); + thrust::device_vector device_sizes(num_rays); + + // Launch kernel that constructs getter and calls get_metaballs on the device + test_get_metaballs_kernel_device<<<1, num_rays>>>( + getter, thrust::raw_pointer_cast(rays.data()), num_rays, + thrust::raw_pointer_cast(device_sizes.data())); + CUDA_CHECK(cudaGetLastError()); + CUDA_CHECK(cudaDeviceSynchronize()); + + thrust::host_vector host_sizes = device_sizes; + + // Verify that get_metaballs returns the correct size for all rays + for (int i = 0; i < num_rays; ++i) { + EXPECT_EQ(static_cast(host_sizes[i]), device_scene.size()) + << "Device get_metaballs returned correct size for ray " << i; + } +}