From 497fceb12026c384098fe92db490595177d09392 Mon Sep 17 00:00:00 2001
From: xinghanDM <xinghan.li@deepmirror.com>
Date: Mon, 30 Mar 2026 10:50:06 +0800
Subject: [PATCH 1/5] fix(perception): stabilize ISAM graph; cut GC churn on UF
 track building

---
 tinynav/core/math_utils.py      | 46 ++++++++++++++++++++++++++++-----
 tinynav/core/perception_node.py | 23 ++++++++++++++---
 2 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py
index e5bf697..8657005 100644
--- a/tinynav/core/math_utils.py
+++ b/tinynav/core/math_utils.py
@@ -274,12 +274,46 @@ def uf_union(a, b, parent, rank):
         rank[ra] += 1
         return ra
 
-def uf_all_sets_list(parent):
-    root_to_members = {}
-    for i in range(len(parent)):
-        r = parent[i]
-        root_to_members.setdefault(r, []).append(i)
-    return list(root_to_members.values())
+
+@njit(cache=True)
+def uf_fill_roots(parent, out_roots):
+    """Write canonical root for each element; path-compresses parent in place."""
+    n = len(parent)
+    for i in range(n):
+        out_roots[i] = uf_find(i, parent)
+
+
+def uf_all_sets_list(parent, min_component_size=1, out_roots=None):
+    """
+    Connected components as lists of member indices. Mutates parent (path compression).
+
+    Uses argsort + run boundaries instead of defaultdict+lists to cut per-frame Python
+    object churn (many singletons + GC spikes in tight loops).
+
+    min_component_size: drop components with fewer members (e.g. 2 for landmark tracks).
+    out_roots: optional (n,) int64 buffer; reuse from caller to avoid allocating roots each call.
+    """
+    n = len(parent)
+    if n == 0:
+        return []
+    if out_roots is not None and out_roots.shape[0] >= n:
+        roots = out_roots[:n]
+    else:
+        roots = np.empty(n, dtype=np.int64)
+    uf_fill_roots(parent, roots)
+
+    order = np.argsort(roots, kind="mergesort")
+    sorted_r = roots[order]
+    diff = np.r_[True, sorted_r[1:] != sorted_r[:-1]]
+    starts = np.flatnonzero(diff)
+    out = []
+    ms = int(min_component_size)
+    for k in range(len(starts)):
+        a = int(starts[k])
+        b = int(starts[k + 1]) if k + 1 < len(starts) else n
+        if b - a >= ms:
+            out.append(order[a:b].tolist())
+    return out
 
 
 
diff --git a/tinynav/core/perception_node.py b/tinynav/core/perception_node.py
index 1ead4e2..46283a7 100644
--- a/tinynav/core/perception_node.py
+++ b/tinynav/core/perception_node.py
@@ -147,6 +147,8 @@ def __init__(self, verbose_timer: bool = True):
         self.imu_measurements = deque(maxlen=1000)
 
         self.keyframe_queue = []
+        # Reused for uf_all_sets_list to avoid per-frame np.empty on the hot path.
+        self._uf_track_roots_buf = np.empty(_N * _M, dtype=np.int64)
         self.logger.info("PerceptionNode initialized.")
         self.process_cnt = 0
 
@@ -243,7 +245,9 @@ async def process(self, left_msg, right_msg):
 
             if timestamp <= self.keyframe_queue[-1].latest_imu_timestamp:
                 self.imu_measurements.popleft()
-                self.logger.warning("should only happen at beginning")
+                self.logger.debug(
+                    "Dropping IMU sample <= latest_imu_timestamp (reorder/duplicate or stale queue)"
+                )
                 continue
 
             self.keyframe_queue[-1].preintegrated_imu.integrateMeasurement(accel, gyro, dt) #todo
@@ -291,6 +295,7 @@ async def process(self, left_msg, right_msg):
         )
         if len(self.keyframe_queue) > _N:
             self.keyframe_queue.pop(0)
+        _t_isam_processing0 = time.perf_counter()
         with Timer(name="[ISAM Processing]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.info):
             with Timer(name="[adding imu]", text="[{name}] Elapsed time: {milliseconds:.03f} ms", logger=self.logger.debug):
                 # we have new graph each time
@@ -413,7 +418,15 @@ async def process(self, left_msg, right_msg):
                         self.logger.debug(f"{i} match {j} after Pnp filter count: {count}")
 
             with Timer(name="[found track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
-                tracks = [track for track in uf_all_sets_list(parent) if len(track) >= 2]
+                _t_found_track0 = time.perf_counter()
+                tracks = uf_all_sets_list(
+                    parent,
+                    min_component_size=2,
+                    out_roots=self._uf_track_roots_buf,
+                )
+                self.found_track_time_pub.publish(
+                    Float64(data=time.perf_counter() - _t_found_track0)
+                )
                 self.logger.debug(f"Found {len(tracks)} tracks after data association.")
 
             with Timer(name="[add track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
@@ -450,7 +463,11 @@ async def process(self, left_msg, right_msg):
                         )
                         smart_factor.add(stereo_meas, X(pose_idx), calib)
                     graph.add(smart_factor)
-            
+
+        self.isam_processing_time_pub.publish(
+            Float64(data=time.perf_counter() - _t_isam_processing0)
+        )
+
         with Timer(name="[Solver]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
             params = gtsam.LevenbergMarquardtParams()
             # set iteration limit

From 9d80ae459b2377fb43b77faf1874a3d5fb4b91b2 Mon Sep 17 00:00:00 2001
From: xinghanDM <xinghan.li@deepmirror.com>
Date: Tue, 31 Mar 2026 10:54:19 +0800
Subject: [PATCH 2/5] clean the code

---
 tinynav/core/math_utils.py      | 19 +++++--------------
 tinynav/core/perception_node.py | 13 +------------
 2 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py
index 8657005..16ca9f8 100644
--- a/tinynav/core/math_utils.py
+++ b/tinynav/core/math_utils.py
@@ -277,29 +277,20 @@ def uf_union(a, b, parent, rank):
 
 @njit(cache=True)
 def uf_fill_roots(parent, out_roots):
-    """Write canonical root for each element; path-compresses parent in place."""
     n = len(parent)
     for i in range(n):
         out_roots[i] = uf_find(i, parent)
 
 
 def uf_all_sets_list(parent, min_component_size=1, out_roots=None):
-    """
-    Connected components as lists of member indices. Mutates parent (path compression).
-
-    Uses argsort + run boundaries instead of defaultdict+lists to cut per-frame Python
-    object churn (many singletons + GC spikes in tight loops).
-
-    min_component_size: drop components with fewer members (e.g. 2 for landmark tracks).
-    out_roots: optional (n,) int64 buffer; reuse from caller to avoid allocating roots each call.
-    """
     n = len(parent)
     if n == 0:
         return []
-    if out_roots is not None and out_roots.shape[0] >= n:
-        roots = out_roots[:n]
-    else:
-        roots = np.empty(n, dtype=np.int64)
+    roots = (
+        out_roots[:n]
+        if out_roots is not None and out_roots.shape[0] >= n
+        else np.empty(n, dtype=np.int64)
+    )
     uf_fill_roots(parent, roots)
 
     order = np.argsort(roots, kind="mergesort")
diff --git a/tinynav/core/perception_node.py b/tinynav/core/perception_node.py
index 46283a7..c7d75e1 100644
--- a/tinynav/core/perception_node.py
+++ b/tinynav/core/perception_node.py
@@ -245,9 +245,7 @@ async def process(self, left_msg, right_msg):
 
             if timestamp <= self.keyframe_queue[-1].latest_imu_timestamp:
                 self.imu_measurements.popleft()
-                self.logger.debug(
-                    "Dropping IMU sample <= latest_imu_timestamp (reorder/duplicate or stale queue)"
-                )
+                self.logger.warning("should only happen at beginning")
                 continue
 
             self.keyframe_queue[-1].preintegrated_imu.integrateMeasurement(accel, gyro, dt) #todo
@@ -295,7 +293,6 @@ async def process(self, left_msg, right_msg):
         )
         if len(self.keyframe_queue) > _N:
             self.keyframe_queue.pop(0)
-        _t_isam_processing0 = time.perf_counter()
         with Timer(name="[ISAM Processing]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.info):
             with Timer(name="[adding imu]", text="[{name}] Elapsed time: {milliseconds:.03f} ms", logger=self.logger.debug):
                 # we have new graph each time
@@ -418,15 +415,11 @@ async def process(self, left_msg, right_msg):
                         self.logger.debug(f"{i} match {j} after Pnp filter count: {count}")
 
             with Timer(name="[found track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
-                _t_found_track0 = time.perf_counter()
                 tracks = uf_all_sets_list(
                     parent,
                     min_component_size=2,
                     out_roots=self._uf_track_roots_buf,
                 )
-                self.found_track_time_pub.publish(
-                    Float64(data=time.perf_counter() - _t_found_track0)
-                )
                 self.logger.debug(f"Found {len(tracks)} tracks after data association.")
 
             with Timer(name="[add track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
@@ -464,10 +457,6 @@ async def process(self, left_msg, right_msg):
                         smart_factor.add(stereo_meas, X(pose_idx), calib)
                     graph.add(smart_factor)
 
-        self.isam_processing_time_pub.publish(
-            Float64(data=time.perf_counter() - _t_isam_processing0)
-        )
-
         with Timer(name="[Solver]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
             params = gtsam.LevenbergMarquardtParams()
             # set iteration limit

From d0db1f872341b140118e3524df4eba10b1ecb211 Mon Sep 17 00:00:00 2001
From: xinghanDM <xinghan.li@deepmirror.com>
Date: Tue, 31 Mar 2026 17:24:51 +0800
Subject: [PATCH 3/5] refactor

---
 tinynav/core/math_utils.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py
index 16ca9f8..993a11e 100644
--- a/tinynav/core/math_utils.py
+++ b/tinynav/core/math_utils.py
@@ -294,15 +294,13 @@ def uf_all_sets_list(parent, min_component_size=1, out_roots=None):
     uf_fill_roots(parent, roots)
 
     order = np.argsort(roots, kind="mergesort")
-    sorted_r = roots[order]
-    diff = np.r_[True, sorted_r[1:] != sorted_r[:-1]]
-    starts = np.flatnonzero(diff)
+    sr = roots[order]
+    starts = np.flatnonzero(np.r_[True, sr[1:] != sr[:-1]])
     out = []
-    ms = int(min_component_size)
     for k in range(len(starts)):
-        a = int(starts[k])
-        b = int(starts[k + 1]) if k + 1 < len(starts) else n
-        if b - a >= ms:
+        a = starts[k]
+        b = starts[k + 1] if k + 1 < len(starts) else n
+        if b - a >= min_component_size:
             out.append(order[a:b].tolist())
     return out
 

From 8f2e52b8b87b01024f7f273167d02d582a00676f Mon Sep 17 00:00:00 2001
From: xinghanDM <xinghan.li@deepmirror.com>
Date: Tue, 31 Mar 2026 19:49:50 +0800
Subject: [PATCH 4/5] add fufpy instead

---
 pyproject.toml                  |  1 +
 tinynav/core/math_utils.py      | 68 +++++----------------------------
 tinynav/core/perception_node.py | 12 ++----
 uv.lock                         | 15 ++++++++
 4 files changed, 28 insertions(+), 68 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index efea388..d9d6d64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "jupyterlab",
     "codetiming",
     "numba",
+    "fufpy>=0.1.1",
     "pygame",
     "cuda_python==12.6.0; platform_machine == 'aarch64'",
     "cuda-python==12.2.0; platform_machine == 'x86_64'",
diff --git a/tinynav/core/math_utils.py b/tinynav/core/math_utils.py
index 993a11e..39193ae 100644
--- a/tinynav/core/math_utils.py
+++ b/tinynav/core/math_utils.py
@@ -4,6 +4,7 @@
 from geometry_msgs.msg import TransformStamped
 from nav_msgs.msg import Odometry
 import cv2
+import fufpy
 from tinynav.core.func import lru_cache_numpy
 
 @njit(cache=True)
@@ -237,71 +238,20 @@ def estimate_pose(kpts_prev, kpts_curr, depth, K, idx_valid=None):
     inlier_idx_original = idx_valid[inliers]
     return True, T, inliers_2d, inliers_3d, inlier_idx_original
 
-# Disjoint Set (Union-Find) implementation with path compression and union by rank
-@njit(cache=True)
+# Union–find via fufpy (https://github.com/LuisScoccola/fufpy)
 def uf_init(n):
-    parent = np.empty(n, np.int64)
-    rank = np.zeros(n, np.int64)
-    for i in range(n):
-        parent[i] = i
-    return parent, rank
-
-@njit(cache=True)
-def uf_find(i, parent):
-    root = i
-    while parent[root] != root:
-        root = parent[root]
-    while parent[i] != i:
-        p = parent[i]
-        parent[i] = root
-        i = p
-    return root
+    return fufpy.dynamic_partition_create(int(n))
 
-@njit(cache=True)
-def uf_union(a, b, parent, rank):
-    ra = uf_find(a, parent)
-    rb = uf_find(b, parent)
-    if ra == rb:
-        return ra
-    if rank[ra] < rank[rb]:
-        parent[ra] = rb
-        return rb
-    elif rank[ra] > rank[rb]:
-        parent[rb] = ra
-        return ra
-    else:
-        parent[rb] = ra
-        rank[ra] += 1
-        return ra
 
+def uf_union(a, b, uf, _rank=None):
+    return fufpy.dynamic_partition_union(uf, int(a), int(b))
 
-@njit(cache=True)
-def uf_fill_roots(parent, out_roots):
-    n = len(parent)
-    for i in range(n):
-        out_roots[i] = uf_find(i, parent)
-
-
-def uf_all_sets_list(parent, min_component_size=1, out_roots=None):
-    n = len(parent)
-    if n == 0:
-        return []
-    roots = (
-        out_roots[:n]
-        if out_roots is not None and out_roots.shape[0] >= n
-        else np.empty(n, dtype=np.int64)
-    )
-    uf_fill_roots(parent, roots)
 
-    order = np.argsort(roots, kind="mergesort")
-    sr = roots[order]
-    starts = np.flatnonzero(np.r_[True, sr[1:] != sr[:-1]])
+def uf_all_sets_list(uf, min_component_size=1):
     out = []
-    for k in range(len(starts)):
-        a = starts[k]
-        b = starts[k + 1] if k + 1 < len(starts) else n
-        if b - a >= min_component_size:
-            out.append(order[a:b].tolist())
+    for part in fufpy.dynamic_partition_parts(uf):
+        if part.size >= int(min_component_size):
+            out.append(np.sort(part).tolist())
     return out
 
 
diff --git a/tinynav/core/perception_node.py b/tinynav/core/perception_node.py
index c7d75e1..b77cdad 100644
--- a/tinynav/core/perception_node.py
+++ b/tinynav/core/perception_node.py
@@ -147,8 +147,6 @@ def __init__(self, verbose_timer: bool = True):
         self.imu_measurements = deque(maxlen=1000)
 
         self.keyframe_queue = []
-        # Reused for uf_all_sets_list to avoid per-frame np.empty on the hot path.
-        self._uf_track_roots_buf = np.empty(_N * _M, dtype=np.int64)
         self.logger.info("PerceptionNode initialized.")
         self.process_cnt = 0
 
@@ -340,7 +338,7 @@ async def process(self, left_msg, right_msg):
 
             with Timer(name="[init extract info]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
                 extract_info = [await self.superpoint.infer(kf.image) for kf in self.keyframe_queue[-_N:]]
-            parent, rank = uf_init(len(self.keyframe_queue[-_N:]) * _M)
+                uf = uf_init(len(self.keyframe_queue[-_N:]) * _M)
 
             self.logger.debug(f"Processing {len(self.keyframe_queue)} keyframes for data association.")
             
@@ -410,16 +408,12 @@ async def process(self, left_msg, right_msg):
                             if match_idx != -1:
                                 idx_prev = i * _M + k
                                 idx_curr = j * _M + match_idx
-                                uf_union(idx_prev, idx_curr, parent, rank)
+                                uf_union(idx_prev, idx_curr, uf)
                                 count += 1
                         self.logger.debug(f"{i} match {j} after Pnp filter count: {count}")
 
             with Timer(name="[found track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
-                tracks = uf_all_sets_list(
-                    parent,
-                    min_component_size=2,
-                    out_roots=self._uf_track_roots_buf,
-                )
+                tracks = uf_all_sets_list(uf, min_component_size=2)
                 self.logger.debug(f"Found {len(tracks)} tracks after data association.")
 
             with Timer(name="[add track]", text="[{name}] Elapsed time: {milliseconds:.0f} ms", logger=self.logger.debug):
diff --git a/uv.lock b/uv.lock
index 5457508..943f1dd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1008,6 +1008,19 @@ http = [
     { name = "aiohttp" },
 ]
 
+[[package]]
+name = "fufpy"
+version = "0.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numba" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5f/26/d90fcf23c5cd5108403bf0f45c9b48df2b1574d7e44cc53fb2c132e3eb1e/fufpy-0.1.1.tar.gz", hash = "sha256:c05d336b3f2484170b08d83971d901d6a38d3f5e351f55ecff3061dd3bab99d3", size = 4631, upload-time = "2025-02-28T19:35:03.955Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/27/2f50a21216f50ebc84a77e286002e54bd1c515a79404d44810de4f95ac03/fufpy-0.1.1-py3-none-any.whl", hash = "sha256:e256b5426860b30b7ad592f7377abd1e63a999cf9642e4a80761bc6aebe1ecd5", size = 5116, upload-time = "2025-02-28T19:35:02.165Z" },
+]
+
 [[package]]
 name = "gdown"
 version = "5.2.0"
@@ -3996,6 +4009,7 @@ dependencies = [
     { name = "cuda-python", version = "12.2.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'x86_64'" },
     { name = "cuda-python", version = "12.6.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'aarch64'" },
     { name = "einops" },
+    { name = "fufpy" },
     { name = "huggingface-hub" },
     { name = "jupyterlab" },
     { name = "matplotlib" },
@@ -4040,6 +4054,7 @@ requires-dist = [
     { name = "cuda-python", marker = "platform_machine == 'aarch64'", specifier = "==12.6.0" },
     { name = "cuda-python", marker = "platform_machine == 'x86_64'", specifier = "==12.2.0" },
     { name = "einops" },
+    { name = "fufpy", specifier = ">=0.1.1" },
     { name = "huggingface-hub" },
     { name = "jupyterlab" },
     { name = "lerobot", extras = ["lekiwi"], marker = "extra == 'lekiwi'", specifier = "==0.3.3" },

From 218dfeb80b3df85755e3067435559f93e5c28964 Mon Sep 17 00:00:00 2001
From: xinghanDM <xinghan.li@deepmirror.com>
Date: Tue, 31 Mar 2026 21:38:43 +0800
Subject: [PATCH 5/5] delete the version number of fufpy

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d9d6d64..46d351c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
     "jupyterlab",
     "codetiming",
     "numba",
-    "fufpy>=0.1.1",
+    "fufpy",
     "pygame",
     "cuda_python==12.6.0; platform_machine == 'aarch64'",
     "cuda-python==12.2.0; platform_machine == 'x86_64'",