From 497fceb12026c384098fe92db490595177d09392 Mon Sep 17 00:00:00 2001 From: xinghanDM Date: Mon, 30 Mar 2026 10:50:06 +0800 Subject: [PATCH 1/5] fix(perception): stabilize ISAM graph; cut GC churn on UF track building --- tinynav/core/math_utils.py | 46 ++++++++++++++++++++++++++++----- tinynav/core/perception_node.py | 23 ++++++++++++++--- 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py index e5bf697..8657005 100644 --- a/tinynav/core/math_utils.py +++ b/tinynav/core/math_utils.py @@ -274,12 +274,46 @@ def uf_union(a, b, parent, rank): rank[ra] += 1 return ra -def uf_all_sets_list(parent): - root_to_members = {} - for i in range(len(parent)): - r = parent[i] - root_to_members.setdefault(r, []).append(i) - return list(root_to_members.values()) + +@njit(cache=True) +def uf_fill_roots(parent, out_roots): + """Write canonical root for each element; path-compresses parent in place.""" + n = len(parent) + for i in range(n): + out_roots[i] = uf_find(i, parent) + + +def uf_all_sets_list(parent, min_component_size=1, out_roots=None): + """ + Connected components as lists of member indices. Mutates parent (path compression). + + Uses argsort + run boundaries instead of defaultdict+lists to cut per-frame Python + object churn (many singletons + GC spikes in tight loops). + + min_component_size: drop components with fewer members (e.g. 2 for landmark tracks). + out_roots: optional (n,) int64 buffer; reuse from caller to avoid allocating roots each call. + """ + n = len(parent) + if n == 0: + return [] + if out_roots is not None and out_roots.shape[0] >= n: + roots = out_roots[:n] + else: + roots = np.empty(n, dtype=np.int64) + uf_fill_roots(parent, roots) + + order = np.argsort(roots, kind="mergesort") + sorted_r = roots[order] + diff = np.r_[True, sorted_r[1:] != sorted_r[:-1]] + starts = np.flatnonzero(diff) + out = [] + ms = int(min_component_size) + for k in range(len(starts)): + a = int(starts[k]) + b = int(starts[k + 1]) if k + 1 < len(starts) else n + if b - a >= ms: + out.append(order[a:b].tolist()) + return out diff --git a/tinynav/core/perception_node.py b/tinynav/core/perception_node.py index 1ead4e2..46283a7 100644 --- a/tinynav/core/perception_node.py +++ b/tinynav/core/perception_node.py @@ -147,6 +147,8 @@ def __init__(self, verbose_timer: bool = True): self.imu_measurements = deque(maxlen=1000) self.keyframe_queue = [] + # Reused for uf_all_sets_list to avoid per-frame np.empty on the hot path. + self._uf_track_roots_buf = np.empty(_N * _M, dtype=np.int64) self.logger.info("PerceptionNode initialized.") self.process_cnt = 0 @@ -243,7 +245,9 @@ async def process(self, left_msg, right_msg): if timestamp <= self.keyframe_queue[-1].latest_imu_timestamp: self.imu_measurements.popleft() - self.logger.warning("should only happen at beginning") + self.logger.debug( + "Dropping IMU sample <= latest_imu_timestamp (reorder/duplicate or stale queue)" + ) continue self.keyframe_queue[-1].preintegrated_imu.integrateMeasurement(accel, gyro, dt) #todo @@ -291,6 +295,7 @@ async def process(self, left_msg, right_msg): ) if len(self.keyframe_queue) > _N: self.keyframe_queue.pop(0) + _t_isam_processing0 = time.perf_counter() with Timer(name="[ISAM Processing]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.info): with Timer(name="[adding imu]", text="[{name}] Elapsed time: {milliseconds:.03f} ms", logger=self.logger.debug): # we have new graph each time @@ -413,7 +418,15 @@ async def process(self, left_msg, right_msg): self.logger.debug(f"{i} match {j} after Pnp filter count: {count}") with Timer(name="[found track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): - tracks = [track for track in uf_all_sets_list(parent) if len(track) >= 2] + _t_found_track0 = time.perf_counter() + tracks = uf_all_sets_list( + parent, + min_component_size=2, + out_roots=self._uf_track_roots_buf, + ) + self.found_track_time_pub.publish( + Float64(data=time.perf_counter() - _t_found_track0) + ) self.logger.debug(f"Found {len(tracks)} tracks after data association.") with Timer(name="[add track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): @@ -450,7 +463,11 @@ async def process(self, left_msg, right_msg): ) smart_factor.add(stereo_meas, X(pose_idx), calib) graph.add(smart_factor) - + + self.isam_processing_time_pub.publish( + Float64(data=time.perf_counter() - _t_isam_processing0) + ) + with Timer(name="[Solver]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): params = gtsam.LevenbergMarquardtParams() # set iteration limit From 9d80ae459b2377fb43b77faf1874a3d5fb4b91b2 Mon Sep 17 00:00:00 2001 From: xinghanDM Date: Tue, 31 Mar 2026 10:54:19 +0800 Subject: [PATCH 2/5] clean the code --- tinynav/core/math_utils.py | 19 +++++-------------- tinynav/core/perception_node.py | 13 +------------ 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py index 8657005..16ca9f8 100644 --- a/tinynav/core/math_utils.py +++ b/tinynav/core/math_utils.py @@ -277,29 +277,20 @@ def uf_union(a, b, parent, rank): @njit(cache=True) def uf_fill_roots(parent, out_roots): - """Write canonical root for each element; path-compresses parent in place.""" n = len(parent) for i in range(n): out_roots[i] = uf_find(i, parent) def uf_all_sets_list(parent, min_component_size=1, out_roots=None): - """ - Connected components as lists of member indices. Mutates parent (path compression). - - Uses argsort + run boundaries instead of defaultdict+lists to cut per-frame Python - object churn (many singletons + GC spikes in tight loops). - - min_component_size: drop components with fewer members (e.g. 2 for landmark tracks). - out_roots: optional (n,) int64 buffer; reuse from caller to avoid allocating roots each call. - """ n = len(parent) if n == 0: return [] - if out_roots is not None and out_roots.shape[0] >= n: - roots = out_roots[:n] - else: - roots = np.empty(n, dtype=np.int64) + roots = ( + out_roots[:n] + if out_roots is not None and out_roots.shape[0] >= n + else np.empty(n, dtype=np.int64) + ) uf_fill_roots(parent, roots) order = np.argsort(roots, kind="mergesort") diff --git a/tinynav/core/perception_node.py b/tinynav/core/perception_node.py index 46283a7..c7d75e1 100644 --- a/tinynav/core/perception_node.py +++ b/tinynav/core/perception_node.py @@ -245,9 +245,7 @@ async def process(self, left_msg, right_msg): if timestamp <= self.keyframe_queue[-1].latest_imu_timestamp: self.imu_measurements.popleft() - self.logger.debug( - "Dropping IMU sample <= latest_imu_timestamp (reorder/duplicate or stale queue)" - ) + self.logger.warning("should only happen at beginning") continue self.keyframe_queue[-1].preintegrated_imu.integrateMeasurement(accel, gyro, dt) #todo @@ -295,7 +293,6 @@ async def process(self, left_msg, right_msg): ) if len(self.keyframe_queue) > _N: self.keyframe_queue.pop(0) - _t_isam_processing0 = time.perf_counter() with Timer(name="[ISAM Processing]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.info): with Timer(name="[adding imu]", text="[{name}] Elapsed time: {milliseconds:.03f} ms", logger=self.logger.debug): # we have new graph each time @@ -418,15 +415,11 @@ async def process(self, left_msg, right_msg): self.logger.debug(f"{i} match {j} after Pnp filter count: {count}") with Timer(name="[found track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): - _t_found_track0 = time.perf_counter() tracks = uf_all_sets_list( parent, min_component_size=2, out_roots=self._uf_track_roots_buf, ) - self.found_track_time_pub.publish( - Float64(data=time.perf_counter() - _t_found_track0) - ) self.logger.debug(f"Found {len(tracks)} tracks after data association.") with Timer(name="[add track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): @@ -464,10 +457,6 @@ async def process(self, left_msg, right_msg): smart_factor.add(stereo_meas, X(pose_idx), calib) graph.add(smart_factor) - self.isam_processing_time_pub.publish( - Float64(data=time.perf_counter() - _t_isam_processing0) - ) - with Timer(name="[Solver]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): params = gtsam.LevenbergMarquardtParams() # set iteration limit From d0db1f872341b140118e3524df4eba10b1ecb211 Mon Sep 17 00:00:00 2001 From: xinghanDM Date: Tue, 31 Mar 2026 17:24:51 +0800 Subject: [PATCH 3/5] refactor --- tinynav/core/math_utils.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py index 16ca9f8..993a11e 100644 --- a/tinynav/core/math_utils.py +++ b/tinynav/core/math_utils.py @@ -294,15 +294,13 @@ def uf_all_sets_list(parent, min_component_size=1, out_roots=None): uf_fill_roots(parent, roots) order = np.argsort(roots, kind="mergesort") - sorted_r = roots[order] - diff = np.r_[True, sorted_r[1:] != sorted_r[:-1]] - starts = np.flatnonzero(diff) + sr = roots[order] + starts = np.flatnonzero(np.r_[True, sr[1:] != sr[:-1]]) out = [] - ms = int(min_component_size) for k in range(len(starts)): - a = int(starts[k]) - b = int(starts[k + 1]) if k + 1 < len(starts) else n - if b - a >= ms: + a = starts[k] + b = starts[k + 1] if k + 1 < len(starts) else n + if b - a >= min_component_size: out.append(order[a:b].tolist()) return out From 8f2e52b8b87b01024f7f273167d02d582a00676f Mon Sep 17 00:00:00 2001 From: xinghanDM Date: Tue, 31 Mar 2026 19:49:50 +0800 Subject: [PATCH 4/5] add fufpy instead --- pyproject.toml | 1 + tinynav/core/math_utils.py | 68 +++++---------------------------- tinynav/core/perception_node.py | 12 ++---- uv.lock | 15 ++++++++ 4 files changed, 28 insertions(+), 68 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index efea388..d9d6d64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "jupyterlab", "codetiming", "numba", + "fufpy>=0.1.1", "pygame", "cuda_python==12.6.0; platform_machine == 'aarch64'", "cuda-python==12.2.0; platform_machine == 'x86_64'", diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py index 993a11e..39193ae 100644 --- a/tinynav/core/math_utils.py +++ b/tinynav/core/math_utils.py @@ -4,6 +4,7 @@ from geometry_msgs.msg import TransformStamped from nav_msgs.msg import Odometry import cv2 +import fufpy from tinynav.core.func import lru_cache_numpy @njit(cache=True) @@ -237,71 +238,20 @@ def estimate_pose(kpts_prev, kpts_curr, depth, K, idx_valid=None): inlier_idx_original = idx_valid[inliers] return True, T, inliers_2d, inliers_3d, inlier_idx_original -# Disjoint Set (Union-Find) implementation with path compression and union by rank -@njit(cache=True) +# Union–find via fufpy (https://github.com/LuisScoccola/fufpy) def uf_init(n): - parent = np.empty(n, np.int64) - rank = np.zeros(n, np.int64) - for i in range(n): - parent[i] = i - return parent, rank - -@njit(cache=True) -def uf_find(i, parent): - root = i - while parent[root] != root: - root = parent[root] - while parent[i] != i: - p = parent[i] - parent[i] = root - i = p - return root + return fufpy.dynamic_partition_create(int(n)) -@njit(cache=True) -def uf_union(a, b, parent, rank): - ra = uf_find(a, parent) - rb = uf_find(b, parent) - if ra == rb: - return ra - if rank[ra] < rank[rb]: - parent[ra] = rb - return rb - elif rank[ra] > rank[rb]: - parent[rb] = ra - return ra - else: - parent[rb] = ra - rank[ra] += 1 - return ra +def uf_union(a, b, uf, _rank=None): + return fufpy.dynamic_partition_union(uf, int(a), int(b)) -@njit(cache=True) -def uf_fill_roots(parent, out_roots): - n = len(parent) - for i in range(n): - out_roots[i] = uf_find(i, parent) - - -def uf_all_sets_list(parent, min_component_size=1, out_roots=None): - n = len(parent) - if n == 0: - return [] - roots = ( - out_roots[:n] - if out_roots is not None and out_roots.shape[0] >= n - else np.empty(n, dtype=np.int64) - ) - uf_fill_roots(parent, roots) - order = np.argsort(roots, kind="mergesort") - sr = roots[order] - starts = np.flatnonzero(np.r_[True, sr[1:] != sr[:-1]]) +def uf_all_sets_list(uf, min_component_size=1): out = [] - for k in range(len(starts)): - a = starts[k] - b = starts[k + 1] if k + 1 < len(starts) else n - if b - a >= min_component_size: - out.append(order[a:b].tolist()) + for part in fufpy.dynamic_partition_parts(uf): + if part.size >= int(min_component_size): + out.append(np.sort(part).tolist()) return out diff --git a/tinynav/core/perception_node.py b/tinynav/core/perception_node.py index c7d75e1..b77cdad 100644 --- a/tinynav/core/perception_node.py +++ b/tinynav/core/perception_node.py @@ -147,8 +147,6 @@ def __init__(self, verbose_timer: bool = True): self.imu_measurements = deque(maxlen=1000) self.keyframe_queue = [] - # Reused for uf_all_sets_list to avoid per-frame np.empty on the hot path. - self._uf_track_roots_buf = np.empty(_N * _M, dtype=np.int64) self.logger.info("PerceptionNode initialized.") self.process_cnt = 0 @@ -340,7 +338,7 @@ async def process(self, left_msg, right_msg): with Timer(name="[init extract info]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): extract_info = [await self.superpoint.infer(kf.image) for kf in self.keyframe_queue[-_N:]] - parent, rank = uf_init(len(self.keyframe_queue[-_N:]) * _M) + uf = uf_init(len(self.keyframe_queue[-_N:]) * _M) self.logger.debug(f"Processing {len(self.keyframe_queue)} keyframes for data association.") @@ -410,16 +408,12 @@ async def process(self, left_msg, right_msg): if match_idx != -1: idx_prev = i * _M + k idx_curr = j * _M + match_idx - uf_union(idx_prev, idx_curr, parent, rank) + uf_union(idx_prev, idx_curr, uf) count += 1 self.logger.debug(f"{i} match {j} after Pnp filter count: {count}") with Timer(name="[found track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): - tracks = uf_all_sets_list( - parent, - min_component_size=2, - out_roots=self._uf_track_roots_buf, - ) + tracks = uf_all_sets_list(uf, min_component_size=2) self.logger.debug(f"Found {len(tracks)} tracks after data association.") with Timer(name="[add track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug): diff --git a/uv.lock b/uv.lock index 5457508..943f1dd 100644 --- a/uv.lock +++ b/uv.lock @@ -1008,6 +1008,19 @@ http = [ { name = "aiohttp" }, ] +[[package]] +name = "fufpy" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numba" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/26/d90fcf23c5cd5108403bf0f45c9b48df2b1574d7e44cc53fb2c132e3eb1e/fufpy-0.1.1.tar.gz", hash = "sha256:c05d336b3f2484170b08d83971d901d6a38d3f5e351f55ecff3061dd3bab99d3", size = 4631, upload-time = "2025-02-28T19:35:03.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/27/2f50a21216f50ebc84a77e286002e54bd1c515a79404d44810de4f95ac03/fufpy-0.1.1-py3-none-any.whl", hash = "sha256:e256b5426860b30b7ad592f7377abd1e63a999cf9642e4a80761bc6aebe1ecd5", size = 5116, upload-time = "2025-02-28T19:35:02.165Z" }, +] + [[package]] name = "gdown" version = "5.2.0" @@ -3996,6 +4009,7 @@ dependencies = [ { name = "cuda-python", version = "12.2.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64'" }, { name = "cuda-python", version = "12.6.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'aarch64'" }, { name = "einops" }, + { name = "fufpy" }, { name = "huggingface-hub" }, { name = "jupyterlab" }, { name = "matplotlib" }, @@ -4040,6 +4054,7 @@ requires-dist = [ { name = "cuda-python", marker = "platform_machine == 'aarch64'", specifier = "==12.6.0" }, { name = "cuda-python", marker = "platform_machine == 'x86_64'", specifier = "==12.2.0" }, { name = "einops" }, + { name = "fufpy", specifier = ">=0.1.1" }, { name = "huggingface-hub" }, { name = "jupyterlab" }, { name = "lerobot", extras = ["lekiwi"], marker = "extra == 'lekiwi'", specifier = "==0.3.3" }, From 218dfeb80b3df85755e3067435559f93e5c28964 Mon Sep 17 00:00:00 2001 From: xinghanDM Date: Tue, 31 Mar 2026 21:38:43 +0800 Subject: [PATCH 5/5] delete the version number of fufpy --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d9d6d64..46d351c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "jupyterlab", "codetiming", "numba", - "fufpy>=0.1.1", + "fufpy", "pygame", "cuda_python==12.6.0; platform_machine == 'aarch64'", "cuda-python==12.2.0; platform_machine == 'x86_64'",