From c7415fd922a21e9a1e00910f000af9767ff8a1ac Mon Sep 17 00:00:00 2001 From: "Qingyuan (Andy) Li" Date: Thu, 20 Nov 2025 01:48:49 -0500 Subject: [PATCH 1/6] first-pass at dino aggregated frame descriptors, tested on kmd demo --- roman/align/results.py | 24 ++++++++--- roman/align/submap_align.py | 3 ++ roman/map/fastsam_wrapper.py | 66 +++++++++++++++++++++++------ roman/map/map.py | 60 +++++++++++++------------- roman/map/mapper.py | 6 ++- roman/map/observation.py | 2 +- roman/map/run.py | 18 ++++---- roman/object/segment.py | 4 +- roman/params/fastsam_params.py | 6 +++ roman/params/submap_align_params.py | 12 +++--- roman/utils.py | 1 + 11 files changed, 136 insertions(+), 66 deletions(-) diff --git a/roman/align/results.py b/roman/align/results.py index c007f8a..d232c66 100644 --- a/roman/align/results.py +++ b/roman/align/results.py @@ -21,6 +21,7 @@ class SubmapAlignResults: clipper_angle_mat: np.array clipper_dist_mat: np.array clipper_num_associations: np.array + similarity_mat: np.array submap_yaw_diff_mat: np.array associated_objs_mat: np.array T_ij_mat: np.array @@ -52,17 +53,24 @@ def time_to_secs_nsecs(t, as_dict=False): return {'seconds': seconds, 'nanoseconds': nanoseconds} def plot_align_results(results: SubmapAlignResults, dpi=500): - + + show_sim = results.similarity_mat is not None + # if no ground truth, can only show number of associations if None in results.submap_io.input_gt_pose_yaml: - fig, ax = plt.subplots(1, 1, figsize=(4,4), dpi=dpi) - mp = ax.imshow(results.clipper_num_associations, cmap='viridis', vmin=0) + fig, ax = plt.subplots(2 if show_sim else 1, 1, figsize=(8 if show_sim else 4,4), dpi=dpi) + mp = ax[0, 0].imshow(results.clipper_num_associations, cmap='viridis', vmin=0) fig.colorbar(mp, fraction=0.04, pad=0.04) ax.set_title("Number of Associations") + + if show_sim: + mp = ax[1, 0].imshow(results.similarity_mat, cmap='viridis', vmin=0) + fig.colorbar(mp, fraction=0.04, pad=0.04) + ax[1, 0].set_title("Similarity Score") + fig.suptitle(f"{results.submap_io.run_name}: {results.submap_io.robot_names[0]}, {results.submap_io.robot_names[1]}") return - fig, ax = plt.subplots(3, 2, figsize=(8, 12), dpi=dpi) fig.subplots_adjust(wspace=.3) fig.suptitle(f"{results.submap_io.run_name}: {results.submap_io.robot_names[0]}, {results.submap_io.robot_names[1]}") @@ -95,6 +103,11 @@ def plot_align_results(results: SubmapAlignResults, dpi=500): mp = ax[2, 0].imshow(results.clipper_num_associations, cmap='viridis', vmin=0) fig.colorbar(mp, fraction=0.04, pad=0.04) ax[2, 0].set_title("Number of Associations") + + if show_sim: + mp = ax[2, 1].imshow(results.similarity_mat, cmap='viridis', vmin=0) + fig.colorbar(mp, fraction=0.04, pad=0.04) + ax[2, 1].set_title("Similarity Score") for i in range(len(ax)): for j in range(len(ax[i])): @@ -102,7 +115,8 @@ def plot_align_results(results: SubmapAlignResults, dpi=500): ax[i,j].set_ylabel("submap index (robot 1)") ax[i,j].grid(False) - fig.delaxes(ax[2, 1]) + if not show_sim: + fig.delaxes(ax[2, 1]) def save_submap_align_results(results: SubmapAlignResults, submaps, roman_maps: List[ROMANMap]): plot_align_results(results) diff --git a/roman/align/submap_align.py b/roman/align/submap_align.py index 178f744..c53280d 100644 --- a/roman/align/submap_align.py +++ b/roman/align/submap_align.py @@ -74,6 +74,7 @@ def submap_align(sm_params: SubmapAlignParams, sm_io: SubmapAlignInputOutput): clipper_angle_mat = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan clipper_dist_mat = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan clipper_num_associations = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan + similarity_mat = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan robots_nearby_mat = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan clipper_percent_associations = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan submap_yaw_diff_mat = np.zeros((len(submaps[0]), len(submaps[1])))*np.nan @@ -184,6 +185,7 @@ def submap_align(sm_params: SubmapAlignParams, sm_io: SubmapAlignInputOutput): clipper_dist_mat[i, j] = np.nan clipper_num_associations[i, j] = len(associations) + similarity_mat[i, j] = submap_sim clipper_percent_associations[i, j] = len(associations) / np.mean([len(submap_i), len(submap_j)]) if np.mean([len(submap_i), len(submap_j)]) > 0 else 0.0 T_ij_mat[i, j] = T_ij @@ -198,6 +200,7 @@ def submap_align(sm_params: SubmapAlignParams, sm_io: SubmapAlignInputOutput): clipper_angle_mat=clipper_angle_mat, clipper_dist_mat=clipper_dist_mat, clipper_num_associations=clipper_num_associations, + similarity_mat=similarity_mat if sm_params.submap_descriptor is not None else None, submap_yaw_diff_mat=submap_yaw_diff_mat, T_ij_mat=T_ij_mat, T_ij_hat_mat=T_ij_hat_mat, diff --git a/roman/map/fastsam_wrapper.py b/roman/map/fastsam_wrapper.py index 56399e2..4247048 100644 --- a/roman/map/fastsam_wrapper.py +++ b/roman/map/fastsam_wrapper.py @@ -143,6 +143,7 @@ def from_params(cls, params: FastSAMParams, depth_cam_params: CameraParams): allow_tblr_edges=[True, True, True, True], area_bounds=[img_area / (params.min_mask_len_div**2), img_area / (params.max_mask_len_div**2)], semantics=params.semantics, + frame_descriptor=params.frame_descriptor, triangle_ignore_masks=params.triangle_ignore_masks ) @@ -159,6 +160,7 @@ def setup_filtering(self, allow_tblr_edges = [True, True, True, True], keep_mask_minimal_intersection=0.3, semantics: str = None, + frame_descriptor: str = None, triangle_ignore_masks=None ): """ @@ -203,6 +205,9 @@ def setup_filtering(self, else: raise ValueError(f"Invalid semantics option: {semantics}. Choose from 'clip', 'dino', or 'none'.") self.semantic_patches_shape = None + self.frame_descriptor_type = frame_descriptor + if frame_descriptor is not None: + assert self.semantics.lower() == 'dino', "Frame descriptor only supported with DINO semantics." if triangle_ignore_masks is not None: self.constant_ignore_mask = np.zeros((self.depth_cam_params.height, self.depth_cam_params.width), dtype=np.uint8) @@ -262,16 +267,16 @@ def setup_rgbd_params( self.erosion_element = None self.plane_filter_params = plane_filter_params - def run(self, t, pose, img, depth_data=None, plot=False): + def run(self, t, pose, img, depth_data=None): """ Takes and image and returns filtered FastSAM masks as Observations. Args: img (cv image): camera image - plot (bool, optional): Returns plots if true. Defaults to False. Returns: self.observations (list): list of Observations + frame_descriptor (np.ndarray): semantic descriptor of the frame if frame_descriptor is not None, else None """ self.observations = [] @@ -294,19 +299,26 @@ def run(self, t, pose, img, depth_data=None, plot=False): # run fastsam masks = self._process_img(img, ignore_mask=ignore_mask, keep_mask=keep_mask) - + if self.semantics == 'dino': # Process the image for DINO dino_shape = 768 img_bgr = cv.cvtColor(img, cv.COLOR_BGR2RGB) preprocessed = self.semantics_preprocess(images=img_bgr, return_tensors="pt").to(self.device) dino_output = self.semantics_model(**preprocessed) - dino_features = self.get_per_pixel_features( + dino_output_patches = self.get_output_patches( model_output=dino_output.last_hidden_state, img_shape=img.shape, feature_dim=dino_shape ) - + dino_features = self.get_per_pixel_features( + model_output_patches=dino_output_patches, + img_shape=img.shape + ) + + frame_descriptor = None + if self.frame_descriptor_type is not None: + frame_descriptor = self.get_frame_descriptor(dino_output_patches) for mask in masks: @@ -406,7 +418,7 @@ def run(self, t, pose, img, depth_data=None, plot=False): processed_img = self.semantics_preprocess(Image.fromarray(img_bbox, mode='RGB')).to(self.device) clip_embedding = self.semantics_model.encode_image(processed_img.unsqueeze(dim=0)) clip_embedding = clip_embedding.squeeze().cpu().detach().numpy() - self.observations.append(Observation(t, pose, mask, mask_downsampled, ptcld, clip_embedding=clip_embedding)) + self.observations.append(Observation(t, pose, mask, mask_downsampled, ptcld, semantic_descriptor=clip_embedding)) elif self.semantics == 'dino': assert mask.shape[0] == dino_features.shape[0] and mask.shape[1] == dino_features.shape[1], \ "Mask and DINO features must have the same shape." @@ -414,12 +426,12 @@ def run(self, t, pose, img, depth_data=None, plot=False): dino_mask = dino_mask.cpu().detach().numpy() mean_dino = np.mean(dino_mask, axis=0) # dino_shape mean_dino = mean_dino / np.linalg.norm(mean_dino) # normalize - self.observations.append(Observation(t, pose, mask, mask_downsampled, ptcld, clip_embedding=mean_dino)) + self.observations.append(Observation(t, pose, mask, mask_downsampled, ptcld, semantic_descriptor=mean_dino)) else: self.observations.append(Observation(t, pose, mask, mask_downsampled, ptcld)) - return self.observations + return self.observations, frame_descriptor def apply_rotation(self, img, unrotate=False): if self.rotate_img is None: @@ -575,11 +587,10 @@ def _process_img(self, image_bgr, ignore_mask=None, keep_mask=None): return [] return segmask - - def get_per_pixel_features(self, model_output: ArrayLike, img_shape: ArrayLike, - feature_dim: int) -> ArrayLike: + + def get_output_patches(self, model_output: ArrayLike, img_shape: ArrayLike, feature_dim: int) -> ArrayLike: """ - Extract (Dino) per-pixel features + Extract (Dino) output patches Args: model_output (ArrayLike): Last hidden state of (Dino) model @@ -600,6 +611,19 @@ def get_per_pixel_features(self, model_output: ArrayLike, img_shape: ArrayLike, model_output_patches = model_output_flat_patches.reshape(self.semantic_patches_shape) + return model_output_patches # 1 x h x w x feature_dim + + def get_per_pixel_features(self, model_output_patches: ArrayLike, img_shape: ArrayLike) -> ArrayLike: + """ + Extract (Dino) per-pixel features + + Args: + model_output_patches (ArrayLike): Reshaped (Dino) output patches + img_shape (ArrayLike): Original image shape + + Returns: + ArrayLike: Reshaped (Dino) output + """ # interpolate the feature map to match the size of the original image per_pixel_features = torch.nn.functional.interpolate( model_output_patches.permute(0, 3, 1, 2), # permute to be batch, channels, height, width @@ -612,3 +636,21 @@ def get_per_pixel_features(self, model_output: ArrayLike, img_shape: ArrayLike, return per_pixel_features # h x w x feature_dim + def get_frame_descriptor(self, dino_features: torch.Tensor) -> np.ndarray: + with torch.no_grad(): # prevent memory leak + dino_features_flat = dino_features.view(-1, dino_features.shape[-1]) + if self.frame_descriptor_type == 'dino-gap': + frame_descriptor = torch.sum(dino_features_flat, dim=0) + elif self.frame_descriptor_type == 'dino-gmp': + frame_descriptor = torch.max(dino_features_flat, dim=0).values + elif self.frame_descriptor_type == 'dino-gem': + cubed_descriptor = torch.mean(dino_features_flat ** 3, dim=0) + frame_descriptor = torch.sign(cubed_descriptor) * \ + (torch.abs(cubed_descriptor).clamp(min=1e-12) ** (1.0 / 3)) # avoid NaN from negative or zero root + else: + raise ValueError(f"frame descriptor must be one of 'dino-gap', 'dino-gmp', or 'dino-gem'.") + + frame_descriptor /= torch.norm(frame_descriptor) + + return frame_descriptor.cpu().detach().numpy() + \ No newline at end of file diff --git a/roman/map/map.py b/roman/map/map.py index d903495..f28cdb7 100644 --- a/roman/map/map.py +++ b/roman/map/map.py @@ -22,6 +22,7 @@ class ROMANMap: segments: List[Segment] trajectory: List[np.ndarray] times: np.ndarray + descriptors: List[np.ndarray] = None poses_are_flu: bool = True def __post_init__(self): @@ -36,6 +37,7 @@ def minimal_data(self): segments=[seg.minimal_data() for seg in self.segments], trajectory=self.trajectory, times=self.times, + descriptors=self.descriptors, poses_are_flu=self.poses_are_flu ) @@ -53,19 +55,9 @@ def make_picklable(self): def from_pickle(cls, pickle_file: str): # extract pickled data with open(os.path.expanduser(pickle_file), 'rb') as f: - pickle_data = pickle.load(f) - - if type(pickle_data) == cls: - roman_map = pickle_data - else: - assert len(pickle_data) == 3 - mapper, poses, times = pickle_data - roman_map = cls( - segments=mapper.get_segment_map(), - trajectory=poses, - times=times - ) - return roman_map + roman_map = pickle.load(f) + assert(type(roman_map) == cls) + return roman_map @classmethod def concatenate(cls, roman_maps: list): @@ -82,6 +74,8 @@ def concatenate(cls, roman_maps: list): segments=reference.segments + other.segments, trajectory=reference.trajectory + other.trajectory, times=reference.times + other.times, + descriptors = reference.descriptors + other.descriptors if \ + reference.descriptors is not None and other.descriptors is not None else None, poses_are_flu=reference.poses_are_flu ) @@ -122,12 +116,20 @@ def position_gt(self): def has_gt(self): return self.pose_flu_gt is not None + @property + def first_seen(self): + return min([seg.first_seen for seg in self.segments]) + + @property + def last_seen(self): + return max([seg.last_seen for seg in self.segments]) + @property def segments_as_global_points(self): # self.pose_gravity_aligned returns T_odom_center # which is transformation from center frame to odom frame # so this transforms segments back to the global (odom) frame - T_odom_center = self.pose_gravity_aligned_gt if self.pose_flu_gt is not None else self.pose_gravity_aligned + T_odom_center = self.pose_gravity_aligned_gt if self.has_gt else self.pose_gravity_aligned return transform(T_odom_center, np.vstack([seg.center.T for seg in self.segments])) # (1, 3) -> (N, 3) def __len__(self): @@ -174,19 +176,9 @@ def load_roman_map(map_file: str) -> ROMANMap: """ # extract pickled data with open(os.path.expanduser(map_file), 'rb') as f: - pickle_data = pickle.load(f) - - if type(pickle_data) == ROMANMap: - roman_map = pickle_data - else: - assert len(pickle_data) == 3 - mapper, poses, times = pickle_data - roman_map = ROMANMap( - segments=mapper.get_segment_map(), - trajectory=poses, - times=times - ) - return roman_map + roman_map = pickle.load(f) + assert type(roman_map) == ROMANMap + return roman_map def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, gt_flu_pose_data: PoseData=None) -> List[Submap]: @@ -233,10 +225,8 @@ def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, pose_flu_gt=gt_flu_pose_data.pose(submap_time_roman_map) if gt_flu_pose_data is not None else None ) - # sm.pose is the pose of center w.r.t. odom, which is T_odom_center (center is child, odom is parent frame) - # T_odom_center is also transformation from center frame to odom frame - # so its inverse, T_center_odom, is transformation from odom frame to center frame - # which transforms the segments into the center frame (centered w.r.t submap centroid) since they are in the odom frame + # sm.pose is the pose of center w.r.t. odom, which is T_odom_center, inverse is T_center_odom + # transforms the segments into the center frame (centered w.r.t submap centroid) since they are in the odom frame T_center_odom = np.linalg.inv(sm.pose_gravity_aligned) for seg in sm.segments: seg.transform(T_center_odom) @@ -291,6 +281,14 @@ def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, # compute mean semantic for each submap for submap in submaps: submap.descriptor = np.mean([seg.semantic_descriptor for seg in submap.segments], axis=0).flatten() + elif submap_params.submap_descriptor == 'mean_frame_descriptor': + assert roman_map.descriptors is not None, "ROMAN map must have frame descriptors to compute submap descriptors from them." + map_times_np = np.array(roman_map.times) + descriptors_np = np.vstack(roman_map.descriptors) + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + submap.descriptor = descriptors_np[frame_mask].mean(axis=0) + return submaps diff --git a/roman/map/mapper.py b/roman/map/mapper.py index 13c3e06..5b1f567 100644 --- a/roman/map/mapper.py +++ b/roman/map/mapper.py @@ -43,14 +43,17 @@ def __init__(self, params: MapperParams, camera_params: CameraParams): self.last_pose = None self.poses_flu_history = [] self.times_history = [] + self.frame_descriptors_history = [] self._T_camera_flu = np.eye(4) - def update(self, t: float, pose: np.array, observations: List[Observation]): + def update(self, t: float, pose: np.array, observations: List[Observation], frame_descriptor: np.ndarray): # have T_WC, want T_WB # T_WB = T_WC @ T_CB self.poses_flu_history.append(pose @ self._T_camera_flu) self.times_history.append(t) + if frame_descriptor is not None: + self.frame_descriptors_history.append(frame_descriptor) # store last pose self.last_pose = pose.copy() @@ -320,6 +323,7 @@ def get_roman_map(self) -> ROMANMap: segments=segment_map, trajectory=self.poses_flu_history, times=self.times_history, + descriptors=self.frame_descriptors_history if self.frame_descriptors_history else None, poses_are_flu=True ) diff --git a/roman/map/observation.py b/roman/map/observation.py index 4b10c6f..e025153 100644 --- a/roman/map/observation.py +++ b/roman/map/observation.py @@ -20,7 +20,7 @@ class Observation(): mask: np.ndarray = None mask_downsampled: np.ndarray = None point_cloud: np.ndarray = None # n-by-3 matrix. Each row is a 3D point. - clip_embedding: np.ndarray = None + semantic_descriptor: np.ndarray = None voxel_grid: Dict[float, VoxelGrid] = field(default_factory=dict) _transformed_points: np.ndarray = None _pcd: o3d.geometry.PointCloud = None diff --git a/roman/map/run.py b/roman/map/run.py index 4030e59..0d018b4 100644 --- a/roman/map/run.py +++ b/roman/map/run.py @@ -112,10 +112,10 @@ def update(self, t: float): img_output = None update_t0 = time.time() - img_time, observations, pose_odom_camera, img = self.update_fastsam(t) + img_time, observations, pose_odom_camera, img, frame_descriptor = self.update_fastsam(t) update_t1 = time.time() if observations is not None and pose_odom_camera is not None and img is not None: - img_output = self.update_segment_track(img_time, observations, pose_odom_camera, img) + img_output = self.update_segment_track(img_time, observations, pose_odom_camera, img, frame_descriptor) update_t2 = time.time() self.processing_times.map_times.append(update_t2 - update_t1) @@ -137,12 +137,12 @@ def update_fastsam(self, t): else: depth_data = self.depth_data.img(img_t) except NoDataNearTimeException: - return None, None, None, None + return None, None, None, None, None - observations = self.fastsam.run(img_t, T_odom_camera, img, depth_data=depth_data) - return img_t, observations, T_odom_camera, img + observations, frame_descriptor = self.fastsam.run(img_t, T_odom_camera, img, depth_data=depth_data) + return img_t, observations, T_odom_camera, img, frame_descriptor - def update_segment_track(self, t, observations, pose_odom_camera, img): + def update_segment_track(self, t, observations, pose_odom_camera, img, frame_descriptor): # collect reprojected masks reprojected_bboxs = [] @@ -155,11 +155,13 @@ def update_segment_track(self, t, observations, pose_odom_camera, img): reprojected_bboxs.append((segment.id, bbox)) if len(observations) > 0: - self.mapper.update(t, pose_odom_camera, observations) + self.mapper.update(t, pose_odom_camera, observations, frame_descriptor) else: self.mapper.poses_flu_history.append(pose_odom_camera @ self.mapper._T_camera_flu) self.mapper.times_history.append(t) - + if frame_descriptor is not None: + self.mapper.frame_descriptors_history.append(frame_descriptor) + if self.viz_map or self.viz_observations or self.viz_3d: img_ret = self.draw(t, img, pose_odom_camera, observations, reprojected_bboxs) diff --git a/roman/object/segment.py b/roman/object/segment.py index 1019dc0..a529195 100644 --- a/roman/object/segment.py +++ b/roman/object/segment.py @@ -107,8 +107,8 @@ def update(self, observation: Observation, integrate_points=True): # Integrate point measurements if integrate_points: self._integrate_points_from_observation(observation) - if observation.clip_embedding is not None: - self._add_semantic_descriptor(observation.clip_embedding) + if observation.semantic_descriptor is not None: + self._add_semantic_descriptor(observation.semantic_descriptor) self.num_sightings += 1 self.observations.append(observation.copy(include_mask=False)) diff --git a/roman/params/fastsam_params.py b/roman/params/fastsam_params.py index 8b7336f..09e6ee1 100644 --- a/roman/params/fastsam_params.py +++ b/roman/params/fastsam_params.py @@ -40,6 +40,7 @@ class FastSAMParams: plane_filter_params (tuple): parameters for plane filtering rotate_img (str): how to rotate the image ('CW', 'CCW', '180') semantics (str): which semantics to use for observations ('clip', 'dino', or 'none') + frame_descriptor (str): type of frame descriptor to use ('dino-gem', 'dino-gap','dino-gmp', or 'none') yolo_imgsz (Tuple[int, int]): size of the YOLO image depth_scale (float): depth scale factor for processing depth images max_depth (float): maximum depth before rejecting observation points @@ -71,11 +72,16 @@ class FastSAMParams: plane_filter_params: tuple = tuple([3.0, 1.0, 0.2]) rotate_img: str = None semantics: str = 'dino' + frame_descriptor: str = 'dino-gem' yolo_imgsz: Tuple[int, int] = (256, 256) depth_scale: float = 1e3 max_depth: float = 7.5 triangle_ignore_masks: List[Tuple[Tuple[int,int], Tuple[int,int], Tuple[int,int]]] = None + def __post_init__(self): + if self.frame_descriptor.lower() == 'none': + self.frame_descriptor = None + @classmethod def from_yaml(cls, yaml_path: str, run: str = None): with open(yaml_path) as f: diff --git a/roman/params/submap_align_params.py b/roman/params/submap_align_params.py index 14a75ac..67cebf0 100644 --- a/roman/params/submap_align_params.py +++ b/roman/params/submap_align_params.py @@ -35,23 +35,23 @@ class SubmapAlignParams: use_avg_time_as_segment_ref_time: bool = True # If true, use (first_seen + last_seen) / 2 for each segment reference time # (only applicable if force_fill_submaps == True or submap_pruning_method == 'time') - force_fill_submaps: bool = False # If true, force all submaps to be filled with segments + force_fill_submaps: bool = False # If true, force all submaps to be filled with segments submap_max_size: int = 40 # Maximum number of segments in a submap (to save computation) # the following is applicable only if force_fill_submaps is true submap_overlap: int = int(0.5 * submap_max_size) # Number of overlapping segments between submaps # the following are applicable only if force_fill_submaps is false ----------- - submap_radius: float = 15.0 # Radius of submap in meters. If set to None, segments + submap_radius: float = 15.0 # Radius of submap in meters. If set to None, segments # are never excluded from submaps based on distance # (though they may still be pruned) - submap_center_dist: float = 10.0 # Distance between submap centers in meters - submap_center_time: float = 50.0 # time threshold between segments and submap center times + submap_center_dist: float = 10.0 # Distance between submap centers in meters + submap_center_time: float = 50.0 # time threshold between segments and submap center times submap_pruning_method: str = 'distance' # Metric for pruning segments in a submap: # ('time', 'distance') -> max gets pruned # ---------------------------------------------------------------------------- - submap_descriptor: str = None # Type of submap descriptor. Either 'none' or 'mean_semantic'. - submap_descriptor_thresh: float = 0.8 # ROMAN object matching will only be run if submap + submap_descriptor: str = None # Type of submap descriptor. Either 'none', 'mean_semantic', or 'mean_frame_descriptor'. + submap_descriptor_thresh: float = 0.8 # ROMAN object matching will only be run if submap # descriptor cosine similarity is above this threshold. diff --git a/roman/utils.py b/roman/utils.py index 6bef1ff..38c8774 100644 --- a/roman/utils.py +++ b/roman/utils.py @@ -12,6 +12,7 @@ import matplotlib.pyplot as plt import numpy as np +import torch from typing import List from scipy.spatial.transform import Rotation as Rot from os.path import expandvars, expanduser From 6513434b6c0126211546c71fd16495beff29ab83 Mon Sep 17 00:00:00 2001 From: "Qingyuan (Andy) Li" Date: Fri, 21 Nov 2025 17:41:39 -0500 Subject: [PATCH 2/6] add stacked descriptors and maximum piecewise similarity --- roman/align/submap_align.py | 3 +- roman/map/map.py | 78 +++++++++++++++++++++++------ roman/params/submap_align_params.py | 67 ++++++++++++------------- 3 files changed, 98 insertions(+), 50 deletions(-) diff --git a/roman/align/submap_align.py b/roman/align/submap_align.py index c53280d..e3d1df4 100644 --- a/roman/align/submap_align.py +++ b/roman/align/submap_align.py @@ -129,8 +129,7 @@ def submap_align(sm_params: SubmapAlignParams, sm_io: SubmapAlignInputOutput): submap_yaw_diff_mat[i, j] = np.abs(np.rad2deg(relative_yaw_angle)) if sm_params.submap_descriptor is not None: - submap_sim = np.sum(submap_i.descriptor * submap_j.descriptor) / \ - (np.linalg.norm(submap_i.descriptor) * np.linalg.norm(submap_j.descriptor)) + submap_sim = Submap.similarity(submap_i, submap_j) else: submap_sim = np.inf # always try to register object maps if no descriptor is used diff --git a/roman/map/map.py b/roman/map/map.py index f28cdb7..a80aaeb 100644 --- a/roman/map/map.py +++ b/roman/map/map.py @@ -1,12 +1,11 @@ import numpy as np -from numpy.linalg import norm from scipy.spatial.transform import Rotation as Rot import os import pickle from copy import deepcopy from dataclasses import dataclass -from typing import List +from typing import List, Tuple import json from robotdatapy.data.pose_data import PoseData @@ -134,6 +133,28 @@ def segments_as_global_points(self): def __len__(self): return len(self.segments) + + @classmethod + def similarity(cls, submap1, submap2): + desc1 = submap1.descriptor + desc2 = submap2.descriptor + + if len(desc1.shape) == len(desc2.shape) == 1: + # 1-d cosine similarity + norm_prod = np.linalg.norm(desc1) * np.linalg.norm(desc2) + if np.isclose(norm_prod, 0.0, atol=1e-9, rtol=0.0): return 0.0 + return np.dot(desc1, desc2) / norm_prod + + elif len(desc1.shape) == len(desc2.shape) == 2: + # maximum piecewise cosine similarity + sims = [] + desc1 = desc1.reshape(desc1.shape[0], 1, desc1.shape[1]) # (N1, 1, D) + desc2 = desc2.reshape(1, desc2.shape[0], desc2.shape[1]) # (1, N2, D) + norm_prods = np.linalg.norm(desc1, axis=2) * np.linalg.norm(desc2, axis=2) # (N1, N2) + sims = np.sum(desc1 * desc2, axis=2) / norm_prods # (N1, N2, D) -> (N1, N2) + sims[np.isclose(norm_prods, 0.0, atol=1e-9, rtol=0.0)] = 0.0 + return np.max(sims) + @dataclass class SubmapParams: @@ -145,10 +166,10 @@ class SubmapParams: distance: float = 10.0 time_threshold: float = np.inf pruning_method: str = 'time' - use_avg_time_as_segment_ref_time: bool = True object_center_ref: str = 'mean' use_minimal_data: bool = True submap_descriptor: str = None + frame_descriptor_dist: float = None @classmethod def from_submap_align_params(cls, submap_align_params: SubmapAlignParams): @@ -160,8 +181,8 @@ def from_submap_align_params(cls, submap_align_params: SubmapAlignParams): distance=submap_align_params.submap_center_dist, time_threshold=submap_align_params.submap_center_time, pruning_method=submap_align_params.submap_pruning_method, - use_avg_time_as_segment_ref_time=submap_align_params.use_avg_time_as_segment_ref_time, - submap_descriptor=submap_align_params.submap_descriptor + submap_descriptor=submap_align_params.submap_descriptor, + frame_descriptor_dist=submap_align_params.frame_descriptor_dist, ) def load_roman_map(map_file: str) -> ROMANMap: @@ -203,14 +224,14 @@ def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, if submap_params.force_fill_submaps: submaps = [] - segments_sorted_by_time = sorted(roman_map.segments, key=lambda seg: seg.reference_time(use_avg_time=submap_params.use_avg_time_as_segment_ref_time)) + segments_sorted_by_time = sorted(roman_map.segments, key=lambda seg: seg.reference_time()) for i in range(0, len(segments_sorted_by_time), submap_params.max_size - submap_params.overlap): sm_segments = segments_sorted_by_time[i:i + submap_params.max_size] if len(sm_segments) == 0: continue - segment_times = [seg.reference_time(use_avg_time=submap_params.use_avg_time_as_segment_ref_time) for seg in sm_segments] + segment_times = [seg.reference_time() for seg in sm_segments] submap_time = np.average(segment_times) submap_roman_map_index = np.argmin(np.abs(roman_map.times - submap_time)) @@ -259,7 +280,8 @@ def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, ) for seg in roman_map.segments: - if (submap_params.radius is None or (norm(seg.center.flatten() - sm.pose_flu[:3,3]) < submap_params.radius)) \ + if (submap_params.radius is None or \ + (np.linalg.norm(seg.center.flatten() - sm.pose_flu[:3,3]) < submap_params.radius)) \ and meets_time_constraints(seg): sm.segments.append(deepcopy(seg)) @@ -269,26 +291,54 @@ def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, if submap_params.max_size is not None: if submap_params.pruning_method == 'time': # time-based pruning - pruning_key = lambda seg: abs(seg.reference_time(use_avg_time=submap_params.use_avg_time_as_segment_ref_time) - submaps[i].time) + pruning_key = lambda seg: abs(seg.reference_time() - submaps[i].time) else: # distance-based pruning - pruning_key = lambda seg: norm(seg.center.flatten()) + pruning_key = lambda seg: np.linalg.norm(seg.center.flatten()) segments_sorted_by_key = sorted(sm.segments, key=pruning_key) sm.segments = segments_sorted_by_key[:submap_params.max_size] submaps = [submap for submap in submaps if len(submap.segments) > 0] + if submap_params.submap_descriptor == 'mean_semantic': # compute mean semantic for each submap for submap in submaps: submap.descriptor = np.mean([seg.semantic_descriptor for seg in submap.segments], axis=0).flatten() - elif submap_params.submap_descriptor == 'mean_frame_descriptor': + + elif submap_params.submap_descriptor is not None: + # using frame descriptors assert roman_map.descriptors is not None, "ROMAN map must have frame descriptors to compute submap descriptors from them." map_times_np = np.array(roman_map.times) descriptors_np = np.vstack(roman_map.descriptors) - for submap in submaps: - frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) - submap.descriptor = descriptors_np[frame_mask].mean(axis=0) + if submap_params.submap_descriptor == 'mean_frame_descriptor': + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + submap.descriptor = descriptors_np[frame_mask].mean(axis=0) + + elif submap_params.submap_descriptor == 'stacked_frame_descriptors': + if submap_params.frame_descriptor_dist is None: + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + submap.descriptor = descriptors_np[frame_mask] + else: + dist_thresh = submap_params.frame_descriptor_dist + map_pos_np = np.array([pose[:3,3] for pose in roman_map.trajectory]) + + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + frame_descriptors = descriptors_np[frame_mask] + frame_pos = map_pos_np[frame_mask] + + stacked_descriptors = [] + last_pose = None + for fd, fp in zip(frame_descriptors, frame_pos): + if (last_pose is None or np.linalg.norm(fp - last_pose) >= dist_thresh): + stacked_descriptors.append(fd) + last_pose = fp + + submap.descriptor = np.vstack(stacked_descriptors) + return submaps diff --git a/roman/params/submap_align_params.py b/roman/params/submap_align_params.py index 67cebf0..8d44498 100644 --- a/roman/params/submap_align_params.py +++ b/roman/params/submap_align_params.py @@ -13,7 +13,7 @@ import numpy as np from dataclasses import dataclass, field -from typing import List +from typing import List, Tuple, Union import os import yaml @@ -25,43 +25,42 @@ @dataclass class SubmapAlignParams: - dim: int = 3 # 2 or 3. 2D or 3D object map registration - method: str = 'roman' # by default, use semantic + pca + volume + gravity - # same as in ROMAN paper. - # See get_object_registration for other methods - fusion_method: str = 'geometric_mean' # How to fuse similarity scores. (geometric_mean, - # arithmetic_mean, product) - - use_avg_time_as_segment_ref_time: bool = True # If true, use (first_seen + last_seen) / 2 for each segment reference time - # (only applicable if force_fill_submaps == True or submap_pruning_method == 'time') - - force_fill_submaps: bool = False # If true, force all submaps to be filled with segments - submap_max_size: int = 40 # Maximum number of segments in a submap (to save computation) + dim: int = 3 # 2 or 3. 2D or 3D object map registration + method: str = 'roman' # by default, use semantic + pca + volume + gravity + # same as in ROMAN paper. + # See get_object_registration for other methods + fusion_method: str = 'geometric_mean' # How to fuse similarity scores. (geometric_mean, + # arithmetic_mean, product) - # the following is applicable only if force_fill_submaps is true - submap_overlap: int = int(0.5 * submap_max_size) # Number of overlapping segments between submaps + force_fill_submaps: bool = False # If true, force all submaps to be filled with segments + submap_max_size: int = 40 # Maximum number of segments in a submap (to save computation) - # the following are applicable only if force_fill_submaps is false ----------- - submap_radius: float = 15.0 # Radius of submap in meters. If set to None, segments - # are never excluded from submaps based on distance - # (though they may still be pruned) - submap_center_dist: float = 10.0 # Distance between submap centers in meters - submap_center_time: float = 50.0 # time threshold between segments and submap center times - submap_pruning_method: str = 'distance' # Metric for pruning segments in a submap: - # ('time', 'distance') -> max gets pruned - # ---------------------------------------------------------------------------- - submap_descriptor: str = None # Type of submap descriptor. Either 'none', 'mean_semantic', or 'mean_frame_descriptor'. - submap_descriptor_thresh: float = 0.8 # ROMAN object matching will only be run if submap - # descriptor cosine similarity is above this threshold. + # the following is applicable only if force_fill_submaps is true ----------------------------------- + submap_overlap: int = int(0.5 * submap_max_size) # Number of overlapping segments between submaps + # the following are applicable only if force_fill_submaps is false --------------------------------- + submap_radius: float = 15.0 # Radius of submap in meters. If set to None, segments + # are never excluded from submaps based on distance + # (though they may still be pruned) + submap_center_dist: float = 10.0 # Distance between submap centers in meters + submap_center_time: float = 50.0 # time threshold between segments and submap center times + submap_pruning_method: str = 'distance' # Metric for pruning segments in a submap: + # ('time', 'distance') -> max gets pruned + # -------------------------------------------------------------------------------------------------- + submap_descriptor: Union[str, None] = None # Type of submap descriptor. Either 'none', 'mean_semantic', 'mean_frame_descriptor', + # or 'stacked_frame_descriptors'. + frame_descriptor_dist: float = None # If submap_descriptor=='stacked_frame_descriptors', dist threshold to sequentially + # add a new frame descriptors to each submap descriptor + submap_descriptor_thresh: float = 0.8 # ROMAN object matching will only be run if submap + # descriptor cosine similarity is above this threshold. - single_robot_lc: bool = False # If true, do not try and perform loop closures with submaps - # nearby in time - single_robot_lc_time_thresh: float = 50.0 # Time threshold for single robot loop closure - force_rm_lc_roll_pitch: bool = True # If true, remove parts of rotation about x or y axes - force_rm_upside_down: bool = True # If true, assumes upside down submap rotations are incorrect - use_object_bottom_middle: bool = False # If true, uses the bottom middle of the object as a reference - # point for registration rather than the center of the object + single_robot_lc: bool = False # If true, do not try and perform loop closures with submaps + # nearby in time + single_robot_lc_time_thresh: float = 50.0 # Time threshold for single robot loop closure + force_rm_lc_roll_pitch: bool = True # If true, remove parts of rotation about x or y axes + force_rm_upside_down: bool = True # If true, assumes upside down submap rotations are incorrect + use_object_bottom_middle: bool = False # If true, uses the bottom middle of the object as a reference + # point for registration rather than the center of the object # registration params sigma: float = 0.4 From 1dd897898a5fbe4d3b38b2ded00d99a8c1499aaf Mon Sep 17 00:00:00 2001 From: "Qingyuan (Andy) Li" Date: Fri, 21 Nov 2025 22:36:12 -0500 Subject: [PATCH 3/6] tested demo params --- params/demo/fastsam.yaml | 3 +-- params/demo/submap_align.yaml | 4 +++- params/demo_no_gpu/fastsam.yaml | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/params/demo/fastsam.yaml b/params/demo/fastsam.yaml index 4a75fc1..c4074f7 100644 --- a/params/demo/fastsam.yaml +++ b/params/demo/fastsam.yaml @@ -16,5 +16,4 @@ semantics: 'dino' yolo_imgsz: [256, 256] depth_scale: 1000.0 max_depth: 7.5 - -frame_descriptor: 'dino-gmp' \ No newline at end of file +frame_descriptor: 'dino-gem' \ No newline at end of file diff --git a/params/demo/submap_align.yaml b/params/demo/submap_align.yaml index af2482b..f154d85 100644 --- a/params/demo/submap_align.yaml +++ b/params/demo/submap_align.yaml @@ -14,4 +14,6 @@ cosine_min: 0.5 cosine_max: 0.7 semantics_dim: 768 -submap_descriptor: 'mean_frame_descriptor' \ No newline at end of file +submap_descriptor: 'stacked_frame_descriptors' +frame_descriptor_dist: 10.0 +submap_descriptor_thresh: 0.8 \ No newline at end of file diff --git a/params/demo_no_gpu/fastsam.yaml b/params/demo_no_gpu/fastsam.yaml index 18510b5..fae3e41 100644 --- a/params/demo_no_gpu/fastsam.yaml +++ b/params/demo_no_gpu/fastsam.yaml @@ -15,4 +15,5 @@ plane_filter_params: [3.0, 1.0, 0.2] semantics: 'none' yolo_imgsz: [256, 256] depth_scale: 1000.0 -max_depth: 7.5 \ No newline at end of file +max_depth: 7.5 +frame_descriptor: 'none' \ No newline at end of file From fc45e469fe37430da4f150efe605a1553464ca62 Mon Sep 17 00:00:00 2001 From: "Qingyuan (Andy) Li" Date: Sat, 22 Nov 2025 21:14:40 -0500 Subject: [PATCH 4/6] pr changes --- roman/align/results.py | 7 ++++--- roman/map/fastsam_wrapper.py | 3 ++- roman/map/map.py | 1 - roman/map/mapper.py | 3 +++ roman/map/run.py | 9 ++------- roman/utils.py | 1 - 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/roman/align/results.py b/roman/align/results.py index d232c66..245f67c 100644 --- a/roman/align/results.py +++ b/roman/align/results.py @@ -58,13 +58,14 @@ def plot_align_results(results: SubmapAlignResults, dpi=500): # if no ground truth, can only show number of associations if None in results.submap_io.input_gt_pose_yaml: - fig, ax = plt.subplots(2 if show_sim else 1, 1, figsize=(8 if show_sim else 4,4), dpi=dpi) + fig, ax = plt.subplots(2 if show_sim else 1, 1, figsize=(8 if show_sim else 4, 4), dpi=dpi) + ax = np.array(ax).reshape(-1, 1) mp = ax[0, 0].imshow(results.clipper_num_associations, cmap='viridis', vmin=0) fig.colorbar(mp, fraction=0.04, pad=0.04) ax.set_title("Number of Associations") if show_sim: - mp = ax[1, 0].imshow(results.similarity_mat, cmap='viridis', vmin=0) + mp = ax[1, 0].imshow(results.similarity_mat, cmap='viridis', vmin=0.0, vmax=1.0) fig.colorbar(mp, fraction=0.04, pad=0.04) ax[1, 0].set_title("Similarity Score") @@ -105,7 +106,7 @@ def plot_align_results(results: SubmapAlignResults, dpi=500): ax[2, 0].set_title("Number of Associations") if show_sim: - mp = ax[2, 1].imshow(results.similarity_mat, cmap='viridis', vmin=0) + mp = ax[2, 1].imshow(results.similarity_mat, cmap='viridis', vmin=0.0, vmax=1.0) fig.colorbar(mp, fraction=0.04, pad=0.04) ax[2, 1].set_title("Similarity Score") diff --git a/roman/map/fastsam_wrapper.py b/roman/map/fastsam_wrapper.py index a19c72c..6f2785a 100644 --- a/roman/map/fastsam_wrapper.py +++ b/roman/map/fastsam_wrapper.py @@ -272,7 +272,7 @@ def run(self, t, pose, img, depth_data=None): # run fastsam masks = self._process_img(img, ignore_mask=ignore_mask, keep_mask=keep_mask) - + if self.semantics == 'dino': # Process the image for DINO dino_shape = 768 @@ -288,6 +288,7 @@ def run(self, t, pose, img, depth_data=None): model_output_patches=dino_output_patches, img_shape=img.shape ) + dino_features = self.unapply_rotation(dino_features) frame_descriptor = None if self.frame_descriptor_type is not None: diff --git a/roman/map/map.py b/roman/map/map.py index a80aaeb..7a2b5ce 100644 --- a/roman/map/map.py +++ b/roman/map/map.py @@ -147,7 +147,6 @@ def similarity(cls, submap1, submap2): elif len(desc1.shape) == len(desc2.shape) == 2: # maximum piecewise cosine similarity - sims = [] desc1 = desc1.reshape(desc1.shape[0], 1, desc1.shape[1]) # (N1, 1, D) desc2 = desc2.reshape(1, desc2.shape[0], desc2.shape[1]) # (1, N2, D) norm_prods = np.linalg.norm(desc1, axis=2) * np.linalg.norm(desc2, axis=2) # (N1, N2) diff --git a/roman/map/mapper.py b/roman/map/mapper.py index bdb7d6e..3ee08af 100644 --- a/roman/map/mapper.py +++ b/roman/map/mapper.py @@ -53,6 +53,9 @@ def update(self, t: float, pose: np.array, observations: List[Observation], fram if frame_descriptor is not None: self.frame_descriptors_history.append(frame_descriptor) + if len(observations) == 0: # nothing to update + return + # store last pose self.last_pose = pose.copy() diff --git a/roman/map/run.py b/roman/map/run.py index 0d018b4..c37e3b5 100644 --- a/roman/map/run.py +++ b/roman/map/run.py @@ -154,13 +154,8 @@ def update_segment_track(self, t, observations, pose_odom_camera, img, frame_des if bbox is not None: reprojected_bboxs.append((segment.id, bbox)) - if len(observations) > 0: - self.mapper.update(t, pose_odom_camera, observations, frame_descriptor) - else: - self.mapper.poses_flu_history.append(pose_odom_camera @ self.mapper._T_camera_flu) - self.mapper.times_history.append(t) - if frame_descriptor is not None: - self.mapper.frame_descriptors_history.append(frame_descriptor) + # update mapper with new frame + self.mapper.update(t, pose_odom_camera, observations, frame_descriptor) if self.viz_map or self.viz_observations or self.viz_3d: img_ret = self.draw(t, img, pose_odom_camera, observations, reprojected_bboxs) diff --git a/roman/utils.py b/roman/utils.py index 38c8774..6bef1ff 100644 --- a/roman/utils.py +++ b/roman/utils.py @@ -12,7 +12,6 @@ import matplotlib.pyplot as plt import numpy as np -import torch from typing import List from scipy.spatial.transform import Rotation as Rot from os.path import expandvars, expanduser From a3c005a494b6d9ffc1afa1af637e73c13cfaa52d Mon Sep 17 00:00:00 2001 From: "Qingyuan (Andy) Li" Date: Thu, 27 Nov 2025 03:45:42 -0500 Subject: [PATCH 5/6] refactor submap descriptor extraction for roman_ros --- roman/map/map.py | 73 +++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/roman/map/map.py b/roman/map/map.py index 7a2b5ce..20ac7d9 100644 --- a/roman/map/map.py +++ b/roman/map/map.py @@ -199,6 +199,40 @@ def load_roman_map(map_file: str) -> ROMANMap: roman_map = pickle.load(f) assert type(roman_map) == ROMANMap return roman_map + +def extract_submap_descriptors(submaps: List[Submap], descriptor_times: List[float], descriptors: List[np.ndarray], \ + poses: List[np.ndarray], submap_params: SubmapParams): + assert descriptors is not None, "ROMAN map must have frame descriptors to compute submap descriptors from them." + map_times_np = np.array(descriptor_times) + descriptors_np = np.vstack(descriptors) + + if submap_params.submap_descriptor == 'mean_frame_descriptor': + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + submap.descriptor = descriptors_np[frame_mask].mean(axis=0) + + elif submap_params.submap_descriptor == 'stacked_frame_descriptors': + if submap_params.frame_descriptor_dist is None: + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + submap.descriptor = descriptors_np[frame_mask] + else: + dist_thresh = submap_params.frame_descriptor_dist + map_pos_np = np.array([pose[:3,3] for pose in poses]) + + for submap in submaps: + frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) + frame_descriptors = descriptors_np[frame_mask] + frame_pos = map_pos_np[frame_mask] + + stacked_descriptors = [] + last_pose = None + for fd, fp in zip(frame_descriptors, frame_pos): + if (last_pose is None or np.linalg.norm(fp - last_pose) >= dist_thresh): + stacked_descriptors.append(fd) + last_pose = fp + + submap.descriptor = np.vstack(stacked_descriptors) def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, gt_flu_pose_data: PoseData=None) -> List[Submap]: @@ -305,38 +339,13 @@ def submaps_from_roman_map(roman_map: ROMANMap, submap_params: SubmapParams, submap.descriptor = np.mean([seg.semantic_descriptor for seg in submap.segments], axis=0).flatten() elif submap_params.submap_descriptor is not None: - # using frame descriptors - assert roman_map.descriptors is not None, "ROMAN map must have frame descriptors to compute submap descriptors from them." - map_times_np = np.array(roman_map.times) - descriptors_np = np.vstack(roman_map.descriptors) - - if submap_params.submap_descriptor == 'mean_frame_descriptor': - for submap in submaps: - frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) - submap.descriptor = descriptors_np[frame_mask].mean(axis=0) - - elif submap_params.submap_descriptor == 'stacked_frame_descriptors': - if submap_params.frame_descriptor_dist is None: - for submap in submaps: - frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) - submap.descriptor = descriptors_np[frame_mask] - else: - dist_thresh = submap_params.frame_descriptor_dist - map_pos_np = np.array([pose[:3,3] for pose in roman_map.trajectory]) - - for submap in submaps: - frame_mask = (map_times_np >= submap.first_seen) & (map_times_np <= submap.last_seen) - frame_descriptors = descriptors_np[frame_mask] - frame_pos = map_pos_np[frame_mask] - - stacked_descriptors = [] - last_pose = None - for fd, fp in zip(frame_descriptors, frame_pos): - if (last_pose is None or np.linalg.norm(fp - last_pose) >= dist_thresh): - stacked_descriptors.append(fd) - last_pose = fp - - submap.descriptor = np.vstack(stacked_descriptors) + extract_submap_descriptors( + submaps=submaps, + descriptor_times=roman_map.times, + descriptors=roman_map.descriptors, + poses=roman_map.trajectory, + submap_params=submap_params + ) return submaps From 8c35f84fe368babb04cd9a1036eab65e52524aa3 Mon Sep 17 00:00:00 2001 From: Mason Peterson Date: Mon, 1 Dec 2025 16:19:02 -0500 Subject: [PATCH 6/6] update setup.py version/info --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index ccb3170..9a4ff3a 100644 --- a/setup.py +++ b/setup.py @@ -9,11 +9,11 @@ setup( name='roman', - version='0.1.3', + version='0.1.4', description=project_description, - url='url', - author='Mason Peterson, Lucas Jia, and Yulun Tian', - author_email='masonbp@mit.edu, yixuany@mit.edu, yut034@ucsd.edu', + url='https://acl.mit.edu/roman', + author='Mason Peterson, Lucas Jia, Yulun Tian, and Andy Li', + author_email='masonbp@mit.edu, yixuany@mit.edu, yut034@ucsd.edu, andyli27@mit.edu', license='MIT', packages=find_packages(), install_requires=[