diff --git a/second/core/box_np_ops.py b/second/core/box_np_ops.py index 145ebac7..b024696c 100644 --- a/second/core/box_np_ops.py +++ b/second/core/box_np_ops.py @@ -4,12 +4,14 @@ import numpy as np from spconv.utils import rbbox_iou, rbbox_intersection -from second.core.geometry import points_in_convex_polygon_3d_jit, points_count_convex_polygon_3d_jit - - -def riou_cc(rbboxes, qrbboxes, standup_thresh=0.0): - # less than 50ms when used in second one thread. 10x slower than gpu - boxes_corners = center_to_corner_box2d(rbboxes[:, :2], rbboxes[:, 2:4], +from second.core.geometry import points_in_convex_polygon_3d_jit, points_count_convex_polygon_3d_jit + + +eps = 1e-8 + +def riou_cc(rbboxes, qrbboxes, standup_thresh=0.0): + # less than 50ms when used in second one thread. 10x slower than gpu + boxes_corners = center_to_corner_box2d(rbboxes[:, :2], rbboxes[:, 2:4], rbboxes[:, 4]) boxes_standup = corner_to_standup_nd(boxes_corners) qboxes_corners = center_to_corner_box2d(qrbboxes[:, :2], qrbboxes[:, 2:4], @@ -50,29 +52,29 @@ def second_box_encode(boxes, xa, ya, za, wa, la, ha, ra, *cas = np.split(anchors, box_ndim, axis=1) xg, yg, zg, wg, lg, hg, rg, *cgs = np.split(boxes, box_ndim, axis=1) else: - xa, ya, za, wa, la, ha, ra = np.split(anchors, box_ndim, axis=1) - xg, yg, zg, wg, lg, hg, rg = np.split(boxes, box_ndim, axis=1) - - diagonal = np.sqrt(la**2 + wa**2) # 4.3 - xt = (xg - xa) / diagonal - yt = (yg - ya) / diagonal - zt = (zg - za) / ha # 1.6 - lt = np.log(lg / la) - wt = np.log(wg / wa) - ht = np.log(hg / ha) - rt = rg - ra - cts = [g - a for g, a in zip(cgs, cas)] - if smooth_dim: + xa, ya, za, wa, la, ha, ra = np.split(anchors, box_ndim, axis=1) + xg, yg, zg, wg, lg, hg, rg = np.split(boxes, box_ndim, axis=1) + + diagonal = np.sqrt(la**2 + wa**2 + eps) # 4.3 + xt = (xg - xa) / diagonal + yt = (yg - ya) / diagonal + zt = (zg - za) / ha # 1.6 + lt = np.log(lg / la + eps) + wt = np.log(wg / wa + eps) + ht = np.log(hg / ha + eps) + rt = rg - ra + cts = [g - a for g, a in zip(cgs, cas)] + if smooth_dim: lt = lg / la - 1 - wt = wg / wa - 1 - ht = hg / ha - 1 - else: - lt = np.log(lg / la) - wt = np.log(wg / wa) - ht = np.log(hg / ha) - if encode_angle_to_vector: - rgx = np.cos(rg) - rgy = np.sin(rg) + wt = wg / wa - 1 + ht = hg / ha - 1 + else: + lt = np.log(lg / la + eps) + wt = np.log(wg / wa + eps) + ht = np.log(hg / ha + eps) + if encode_angle_to_vector: + rgx = np.cos(rg) + rgy = np.sin(rg) rax = np.cos(ra) ray = np.sin(ra) rtx = rgx - rax @@ -106,13 +108,13 @@ def second_box_decode(box_encodings, xa, ya, za, wa, la, ha, ra = np.split(anchors, box_ndim, axis=-1) if encode_angle_to_vector: xt, yt, zt, wt, lt, ht, rtx, rty = np.split(box_encodings, box_ndim + 1, axis=-1) - else: - xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, box_ndim, axis=-1) - - diagonal = np.sqrt(la**2 + wa**2) - xg = xt * diagonal + xa - yg = yt * diagonal + ya - zg = zt * ha + za + else: + xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, box_ndim, axis=-1) + + diagonal = np.sqrt(la**2 + wa**2 + eps) + xg = xt * diagonal + xa + yg = yt * diagonal + ya + zg = zt * ha + za if smooth_dim: lg = (lt + 1) * la wg = (wt + 1) * wa @@ -144,21 +146,21 @@ def bev_box_encode(boxes, encode_angle_to_vector: bool. increase aos performance, decrease other performance. """ - # need to convert boxes to z-center format - xa, ya, wa, la, ra = np.split(anchors, 5, axis=-1) - xg, yg, wg, lg, rg = np.split(boxes, 5, axis=-1) - diagonal = np.sqrt(la**2 + wa**2) # 4.3 - xt = (xg - xa) / diagonal - yt = (yg - ya) / diagonal - if smooth_dim: - lt = lg / la - 1 - wt = wg / wa - 1 - else: - lt = np.log(lg / la) - wt = np.log(wg / wa) - if encode_angle_to_vector: - rgx = np.cos(rg) - rgy = np.sin(rg) + # need to convert boxes to z-center format + xa, ya, wa, la, ra = np.split(anchors, 5, axis=-1) + xg, yg, wg, lg, rg = np.split(boxes, 5, axis=-1) + diagonal = np.sqrt(la**2 + wa**2 + eps) # 4.3 + xt = (xg - xa) / diagonal + yt = (yg - ya) / diagonal + if smooth_dim: + lt = lg / la - 1 + wt = wg / wa - 1 + else: + lt = np.log(lg / la + eps) + wt = np.log(wg / wa + eps) + if encode_angle_to_vector: + rgx = np.cos(rg) + rgy = np.sin(rg) rax = np.cos(ra) ray = np.sin(ra) rtx = rgx - rax @@ -181,13 +183,13 @@ def bev_box_decode(box_encodings, # need to convert box_encodings to z-bottom format xa, ya, wa, la, ra = np.split(anchors, 5, axis=-1) if encode_angle_to_vector: - xt, yt, wt, lt, rtx, rty = np.split(box_encodings, 6, axis=-1) - else: - xt, yt, wt, lt, rt = np.split(box_encodings, 5, axis=-1) - diagonal = np.sqrt(la**2 + wa**2) - xg = xt * diagonal + xa - yg = yt * diagonal + ya - if smooth_dim: + xt, yt, wt, lt, rtx, rty = np.split(box_encodings, 6, axis=-1) + else: + xt, yt, wt, lt, rt = np.split(box_encodings, 5, axis=-1) + diagonal = np.sqrt(la**2 + wa**2 + eps) + xg = xt * diagonal + xa + yg = yt * diagonal + ya + if smooth_dim: lg = (lt + 1) * la wg = (wt + 1) * wa else: diff --git a/second/data/nuscenes_dataset.py b/second/data/nuscenes_dataset.py index 24775704..c078b5a4 100644 --- a/second/data/nuscenes_dataset.py +++ b/second/data/nuscenes_dataset.py @@ -72,7 +72,7 @@ def __init__(self, "pedestrian": "pedestrian", } # we only eval these classes in kitti self.version = self._metadata["version"] - self.eval_version = "cvpr_2019" + self.eval_version = "detection_cvpr_2019" self._with_velocity = False def __len__(self): @@ -82,9 +82,11 @@ def __len__(self): def ground_truth_annotations(self): if "gt_boxes" not in self._nusc_infos[0]: return None - from nuscenes.eval.detection.config import eval_detection_configs - cls_range_map = eval_detection_configs[self. - eval_version]["class_range"] + from nuscenes.eval.detection.config import config_factory + from nuscenes.eval.detection.config import DetectionConfig + cfg = config_factory(self.eval_version) + cls_range_map = cfg.class_range + gt_annos = [] for info in self._nusc_infos: gt_names = info["gt_names"] @@ -329,7 +331,7 @@ def evaluation_nusc(self, detections, output_dir): box.velocity = np.array([*velocity, 0.0]) boxes = _lidar_nusc_box_to_global( token2info[det["metadata"]["token"]], boxes, - mapped_class_names, "cvpr_2019") + mapped_class_names, "detection_cvpr_2019") for i, box in enumerate(boxes): name = mapped_class_names[box.label] velocity = box.velocity[:2].tolist() @@ -545,16 +547,17 @@ def _second_det_to_nusc_box(detection): return box_list -def _lidar_nusc_box_to_global(info, boxes, classes, eval_version="cvpr_2019"): +def _lidar_nusc_box_to_global(info, boxes, classes, eval_version="detection_cvpr_2019"): import pyquaternion box_list = [] for box in boxes: # Move box to ego vehicle coord system box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation'])) box.translate(np.array(info['lidar2ego_translation'])) - from nuscenes.eval.detection.config import eval_detection_configs - # filter det in ego. - cls_range_map = eval_detection_configs[eval_version]["class_range"] + from nuscenes.eval.detection.config import config_factory + from nuscenes.eval.detection.config import DetectionConfig + cfg = config_factory(eval_version) + cls_range_map = cfg.class_range radius = np.linalg.norm(box.center[:2], 2) det_range = cls_range_map[classes[box.label]] if radius > det_range: @@ -781,11 +784,13 @@ def create_nuscenes_infos(root_path, version="v1.0-trainval", max_sweeps=10): def get_box_mean(info_path, class_name="vehicle.car", - eval_version="cvpr_2019"): + eval_version="detection_cvpr_2019"): with open(info_path, 'rb') as f: nusc_infos = pickle.load(f)["infos"] - from nuscenes.eval.detection.config import eval_detection_configs - cls_range_map = eval_detection_configs[eval_version]["class_range"] + from nuscenes.eval.detection.config import config_factory + from nuscenes.eval.detection.config import DetectionConfig + cfg = config_factory(eval_version) + cls_range_map = cfg.class_range gt_boxes_list = [] gt_vels_list = [] @@ -867,8 +872,10 @@ def render_nusc_result(nusc, results, sample_token): def cluster_trailer_box(info_path, class_name="bus"): with open(info_path, 'rb') as f: nusc_infos = pickle.load(f)["infos"] - from nuscenes.eval.detection.config import eval_detection_configs - cls_range_map = eval_detection_configs["cvpr_2019"]["class_range"] + from nuscenes.eval.detection.config import config_factory + from nuscenes.eval.detection.config import DetectionConfig + cfg = config_factory("detection_cvpr_2019") + cls_range_map = cfg.class_range gt_boxes_list = [] for info in nusc_infos: gt_boxes = info["gt_boxes"] diff --git a/second/pytorch/core/box_torch_ops.py b/second/pytorch/core/box_torch_ops.py index 8e65d841..f5003568 100644 --- a/second/pytorch/core/box_torch_ops.py +++ b/second/pytorch/core/box_torch_ops.py @@ -12,6 +12,8 @@ from second.core.non_max_suppression.nms_cpu import rotate_nms_cc import spconv +eps = 1e-8 + def second_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=False): """box encode for VoxelNet Args: @@ -27,7 +29,7 @@ def second_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=F xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1) xg, yg, zg, wg, lg, hg, rg = torch.split(boxes, 1, dim=-1) - diagonal = torch.sqrt(la**2 + wa**2) + diagonal = torch.sqrt(la**2 + wa**2 + eps) xt = (xg - xa) / diagonal yt = (yg - ya) / diagonal zt = (zg - za) / ha @@ -37,9 +39,9 @@ def second_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=F wt = wg / wa - 1 ht = hg / ha - 1 else: - lt = torch.log(lg / la) - wt = torch.log(wg / wa) - ht = torch.log(hg / ha) + lt = torch.log(lg / la + eps) + wt = torch.log(wg / wa + eps) + ht = torch.log(hg / ha + eps) if encode_angle_to_vector: rgx = torch.cos(rg) rgy = torch.sin(rg) @@ -78,7 +80,7 @@ def second_box_decode(box_encodings, anchors, encode_angle_to_vector=False, smoo # za = za + ha / 2 # xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1) - diagonal = torch.sqrt(la**2 + wa**2) + diagonal = torch.sqrt(la**2 + wa**2 + eps) xg = xt * diagonal + xa yg = yt * diagonal + ya zg = zt * ha + za @@ -109,15 +111,15 @@ def bev_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=Fals """ xa, ya, wa, la, ra = torch.split(anchors, 1, dim=-1) xg, yg, wg, lg, rg = torch.split(boxes, 1, dim=-1) - diagonal = torch.sqrt(la**2 + wa**2) + diagonal = torch.sqrt(la**2 + wa**2 + eps) xt = (xg - xa) / diagonal yt = (yg - ya) / diagonal if smooth_dim: lt = lg / la - 1 wt = wg / wa - 1 else: - lt = torch.log(lg / la) - wt = torch.log(wg / wa) + lt = torch.log(lg / la + eps) + wt = torch.log(wg / wa + eps) if encode_angle_to_vector: rgx = torch.cos(rg) rgy = torch.sin(rg) @@ -149,7 +151,7 @@ def bev_box_decode(box_encodings, anchors, encode_angle_to_vector=False, smooth_ xt, yt, wt, lt, rt = torch.split(box_encodings, 1, dim=-1) # xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1) - diagonal = torch.sqrt(la**2 + wa**2) + diagonal = torch.sqrt(la**2 + wa**2 + eps) xg = xt * diagonal + xa yg = yt * diagonal + ya if smooth_dim: diff --git a/second/pytorch/core/losses.py b/second/pytorch/core/losses.py index 29ce7c86..0768a84f 100644 --- a/second/pytorch/core/losses.py +++ b/second/pytorch/core/losses.py @@ -196,6 +196,8 @@ def _softmax_cross_entropy_with_logits(logits, labels): param = list(range(len(logits.shape))) transpose_param = [0] + [param[-1]] + param[1:-1] logits = logits.permute(*transpose_param) # [N, ..., C] -> [N, C, ...] + logits_max, _ = torch.max(logits, 1, keepdim=True) + logits = logits - logits_max loss_ftor = nn.CrossEntropyLoss(reduction='none') loss = loss_ftor(logits, labels.max(dim=-1)[1]) return loss diff --git a/second/pytorch/models/pointpillars.py b/second/pytorch/models/pointpillars.py index 266ab6eb..41ad7df2 100644 --- a/second/pytorch/models/pointpillars.py +++ b/second/pytorch/models/pointpillars.py @@ -119,8 +119,9 @@ def forward(self, features, num_voxels, coors): dtype = features.dtype # Find distance of x, y, and z from cluster center - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) + num_voxels_set_0_to_1 = num_voxels.clone() + num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1 + points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Find distance of x, y, and z from pillar center @@ -205,8 +206,9 @@ def forward(self, features, num_voxels, coors): dtype = features.dtype # Find distance of x, y, and z from cluster center - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) + num_voxels_set_0_to_1 = num_voxels.clone() + num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1 + points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Find distance of x, y, and z from pillar center @@ -291,9 +293,9 @@ def forward(self, features, num_voxels, coors): device = features.device dtype = features.dtype - # Find distance of x, y, and z from cluster center - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) + num_voxels_set_0_to_1 = num_voxels.clone() + num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1 + points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1) f_cluster = features[:, :, :3] - points_mean # Find distance of x, y, and z from pillar center @@ -380,8 +382,9 @@ def forward(self, features, num_voxels, coors): dtype = features.dtype # Find distance of x, y, and z from cluster center - points_mean = features[:, :, :3].sum( - dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) + num_voxels_set_0_to_1 = num_voxels.clone() + num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1 + points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1) f_cluster = features[:, :, :3] - points_mean pp_min = features[:, :, 2:3].min(dim=1, keepdim=True)[0] pp_max = features[:, :, 2:3].max(dim=1, keepdim=True)[0] diff --git a/second/pytorch/train.py b/second/pytorch/train.py index 524ddf34..da443981 100644 --- a/second/pytorch/train.py +++ b/second/pytorch/train.py @@ -302,6 +302,9 @@ def train(config_path, example_torch = example_convert_to_torch(example, float_dtype) batch_size = example["anchors"].shape[0] + if (batch_size == 1): + model_logging.log_text("BATCH SIZE 1: SKIPPING ITERATION") + continue ret_dict = net_parallel(example_torch) cls_preds = ret_dict["cls_preds"] @@ -441,6 +444,10 @@ def evaluate(config_path, please use kitti_anno_to_label_file and convert_detection_to_kitti_annos in second.data.kitti_dataset. """ + # Force batch size 2 + if batch_size == 1: + batch_size = 2 + assert len(kwargs) == 0 model_dir = str(Path(model_dir).resolve()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu")