traveller59 · charlesyz · May 20, 2020 · Jun 11, 2020
diff --git a/second/core/box_np_ops.py b/second/core/box_np_ops.py
@@ -4,12 +4,14 @@
 import numpy as np
 from spconv.utils import rbbox_iou, rbbox_intersection
 
-from second.core.geometry import points_in_convex_polygon_3d_jit, points_count_convex_polygon_3d_jit
-
-
-def riou_cc(rbboxes, qrbboxes, standup_thresh=0.0):
-    # less than 50ms when used in second one thread. 10x slower than gpu
-    boxes_corners = center_to_corner_box2d(rbboxes[:, :2], rbboxes[:, 2:4],
+from second.core.geometry import points_in_convex_polygon_3d_jit, points_count_convex_polygon_3d_jit
+
+
+eps = 1e-8
+
+def riou_cc(rbboxes, qrbboxes, standup_thresh=0.0):
+    # less than 50ms when used in second one thread. 10x slower than gpu
+    boxes_corners = center_to_corner_box2d(rbboxes[:, :2], rbboxes[:, 2:4],
                                            rbboxes[:, 4])
     boxes_standup = corner_to_standup_nd(boxes_corners)
     qboxes_corners = center_to_corner_box2d(qrbboxes[:, :2], qrbboxes[:, 2:4],
@@ -50,29 +52,29 @@ def second_box_encode(boxes,
         xa, ya, za, wa, la, ha, ra, *cas = np.split(anchors, box_ndim, axis=1)
         xg, yg, zg, wg, lg, hg, rg, *cgs = np.split(boxes, box_ndim, axis=1)
     else:
-        xa, ya, za, wa, la, ha, ra = np.split(anchors, box_ndim, axis=1)
-        xg, yg, zg, wg, lg, hg, rg = np.split(boxes, box_ndim, axis=1)
-
-    diagonal = np.sqrt(la**2 + wa**2)  # 4.3
-    xt = (xg - xa) / diagonal
-    yt = (yg - ya) / diagonal
-    zt = (zg - za) / ha  # 1.6
-    lt = np.log(lg / la)
-    wt = np.log(wg / wa)
-    ht = np.log(hg / ha)
-    rt = rg - ra
-    cts = [g - a for g, a in zip(cgs, cas)]
-    if smooth_dim:
+        xa, ya, za, wa, la, ha, ra = np.split(anchors, box_ndim, axis=1)
+        xg, yg, zg, wg, lg, hg, rg = np.split(boxes, box_ndim, axis=1)
+
+    diagonal = np.sqrt(la**2 + wa**2 + eps)  # 4.3
+    xt = (xg - xa) / diagonal
+    yt = (yg - ya) / diagonal
+    zt = (zg - za) / ha  # 1.6
+    lt = np.log(lg / la + eps)
+    wt = np.log(wg / wa + eps)
+    ht = np.log(hg / ha + eps)
+    rt = rg - ra
+    cts = [g - a for g, a in zip(cgs, cas)]
+    if smooth_dim:
         lt = lg / la - 1
-        wt = wg / wa - 1
-        ht = hg / ha - 1
-    else:
-        lt = np.log(lg / la)
-        wt = np.log(wg / wa)
-        ht = np.log(hg / ha)
-    if encode_angle_to_vector:
-        rgx = np.cos(rg)
-        rgy = np.sin(rg)
+        wt = wg / wa - 1
+        ht = hg / ha - 1
+    else:
+        lt = np.log(lg / la + eps)
+        wt = np.log(wg / wa + eps)
+        ht = np.log(hg / ha + eps)
+    if encode_angle_to_vector:
+        rgx = np.cos(rg)
+        rgy = np.sin(rg)
         rax = np.cos(ra)
         ray = np.sin(ra)
         rtx = rgx - rax
@@ -106,13 +108,13 @@ def second_box_decode(box_encodings,
         xa, ya, za, wa, la, ha, ra = np.split(anchors, box_ndim, axis=-1)
         if encode_angle_to_vector:
             xt, yt, zt, wt, lt, ht, rtx, rty = np.split(box_encodings, box_ndim + 1, axis=-1)
-        else:
-            xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, box_ndim, axis=-1)
-
-    diagonal = np.sqrt(la**2 + wa**2)
-    xg = xt * diagonal + xa
-    yg = yt * diagonal + ya
-    zg = zt * ha + za
+        else:
+            xt, yt, zt, wt, lt, ht, rt = np.split(box_encodings, box_ndim, axis=-1)
+
+    diagonal = np.sqrt(la**2 + wa**2 + eps)
+    xg = xt * diagonal + xa
+    yg = yt * diagonal + ya
+    zg = zt * ha + za
     if smooth_dim:
         lg = (lt + 1) * la
         wg = (wt + 1) * wa
@@ -144,21 +146,21 @@ def bev_box_encode(boxes,
         encode_angle_to_vector: bool. increase aos performance, 
             decrease other performance.
     """
-    # need to convert boxes to z-center format
-    xa, ya, wa, la, ra = np.split(anchors, 5, axis=-1)
-    xg, yg, wg, lg, rg = np.split(boxes, 5, axis=-1)
-    diagonal = np.sqrt(la**2 + wa**2)  # 4.3
-    xt = (xg - xa) / diagonal
-    yt = (yg - ya) / diagonal
-    if smooth_dim:
-        lt = lg / la - 1
-        wt = wg / wa - 1
-    else:
-        lt = np.log(lg / la)
-        wt = np.log(wg / wa)
-    if encode_angle_to_vector:
-        rgx = np.cos(rg)
-        rgy = np.sin(rg)
+    # need to convert boxes to z-center format
+    xa, ya, wa, la, ra = np.split(anchors, 5, axis=-1)
+    xg, yg, wg, lg, rg = np.split(boxes, 5, axis=-1)
+    diagonal = np.sqrt(la**2 + wa**2 + eps)  # 4.3
+    xt = (xg - xa) / diagonal
+    yt = (yg - ya) / diagonal
+    if smooth_dim:
+        lt = lg / la - 1
+        wt = wg / wa - 1
+    else:
+        lt = np.log(lg / la + eps)
+        wt = np.log(wg / wa + eps)
+    if encode_angle_to_vector:
+        rgx = np.cos(rg)
+        rgy = np.sin(rg)
         rax = np.cos(ra)
         ray = np.sin(ra)
         rtx = rgx - rax
@@ -181,13 +183,13 @@ def bev_box_decode(box_encodings,
     # need to convert box_encodings to z-bottom format
     xa, ya, wa, la, ra = np.split(anchors, 5, axis=-1)
     if encode_angle_to_vector:
-        xt, yt, wt, lt, rtx, rty = np.split(box_encodings, 6, axis=-1)
-    else:
-        xt, yt, wt, lt, rt = np.split(box_encodings, 5, axis=-1)
-    diagonal = np.sqrt(la**2 + wa**2)
-    xg = xt * diagonal + xa
-    yg = yt * diagonal + ya
-    if smooth_dim:
+        xt, yt, wt, lt, rtx, rty = np.split(box_encodings, 6, axis=-1)
+    else:
+        xt, yt, wt, lt, rt = np.split(box_encodings, 5, axis=-1)
+    diagonal = np.sqrt(la**2 + wa**2 + eps)
+    xg = xt * diagonal + xa
+    yg = yt * diagonal + ya
+    if smooth_dim:
         lg = (lt + 1) * la
         wg = (wt + 1) * wa
     else:

diff --git a/second/data/nuscenes_dataset.py b/second/data/nuscenes_dataset.py
@@ -72,7 +72,7 @@ def __init__(self,
             "pedestrian": "pedestrian",
         }  # we only eval these classes in kitti
         self.version = self._metadata["version"]
-        self.eval_version = "cvpr_2019"
+        self.eval_version = "detection_cvpr_2019"
         self._with_velocity = False
 
     def __len__(self):
@@ -82,9 +82,11 @@ def __len__(self):
     def ground_truth_annotations(self):
         if "gt_boxes" not in self._nusc_infos[0]:
             return None
-        from nuscenes.eval.detection.config import eval_detection_configs
-        cls_range_map = eval_detection_configs[self.
-                                               eval_version]["class_range"]
+        from nuscenes.eval.detection.config import config_factory
+        from nuscenes.eval.detection.config import DetectionConfig
+        cfg = config_factory(self.eval_version)
+        cls_range_map = cfg.class_range
+
         gt_annos = []
         for info in self._nusc_infos:
             gt_names = info["gt_names"]
@@ -329,7 +331,7 @@ def evaluation_nusc(self, detections, output_dir):
                 box.velocity = np.array([*velocity, 0.0])
             boxes = _lidar_nusc_box_to_global(
                 token2info[det["metadata"]["token"]], boxes,
-                mapped_class_names, "cvpr_2019")
+                mapped_class_names, "detection_cvpr_2019")
             for i, box in enumerate(boxes):
                 name = mapped_class_names[box.label]
                 velocity = box.velocity[:2].tolist()
@@ -545,16 +547,17 @@ def _second_det_to_nusc_box(detection):
     return box_list
 
 
-def _lidar_nusc_box_to_global(info, boxes, classes, eval_version="cvpr_2019"):
+def _lidar_nusc_box_to_global(info, boxes, classes, eval_version="detection_cvpr_2019"):
     import pyquaternion
     box_list = []
     for box in boxes:
         # Move box to ego vehicle coord system
         box.rotate(pyquaternion.Quaternion(info['lidar2ego_rotation']))
         box.translate(np.array(info['lidar2ego_translation']))
-        from nuscenes.eval.detection.config import eval_detection_configs
-        # filter det in ego.
-        cls_range_map = eval_detection_configs[eval_version]["class_range"]
+        from nuscenes.eval.detection.config import config_factory
+        from nuscenes.eval.detection.config import DetectionConfig
+        cfg = config_factory(eval_version)
+        cls_range_map = cfg.class_range
         radius = np.linalg.norm(box.center[:2], 2)
         det_range = cls_range_map[classes[box.label]]
         if radius > det_range:
@@ -781,11 +784,13 @@ def create_nuscenes_infos(root_path, version="v1.0-trainval", max_sweeps=10):
 
 
 def get_box_mean(info_path, class_name="vehicle.car",
-                 eval_version="cvpr_2019"):
+                 eval_version="detection_cvpr_2019"):
     with open(info_path, 'rb') as f:
         nusc_infos = pickle.load(f)["infos"]
-    from nuscenes.eval.detection.config import eval_detection_configs
-    cls_range_map = eval_detection_configs[eval_version]["class_range"]
+    from nuscenes.eval.detection.config import config_factory
+    from nuscenes.eval.detection.config import DetectionConfig
+    cfg = config_factory(eval_version)
+    cls_range_map = cfg.class_range
 
     gt_boxes_list = []
     gt_vels_list = []
@@ -867,8 +872,10 @@ def render_nusc_result(nusc, results, sample_token):
 def cluster_trailer_box(info_path, class_name="bus"):
     with open(info_path, 'rb') as f:
         nusc_infos = pickle.load(f)["infos"]
-    from nuscenes.eval.detection.config import eval_detection_configs
-    cls_range_map = eval_detection_configs["cvpr_2019"]["class_range"]
+    from nuscenes.eval.detection.config import config_factory
+    from nuscenes.eval.detection.config import DetectionConfig
+    cfg = config_factory("detection_cvpr_2019")
+    cls_range_map = cfg.class_range
     gt_boxes_list = []
     for info in nusc_infos:
         gt_boxes = info["gt_boxes"]

diff --git a/second/pytorch/core/box_torch_ops.py b/second/pytorch/core/box_torch_ops.py
@@ -12,6 +12,8 @@
 from second.core.non_max_suppression.nms_cpu import rotate_nms_cc
 import spconv
 
+eps = 1e-8
+
 def second_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=False):
     """box encode for VoxelNet
     Args:
@@ -27,7 +29,7 @@ def second_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=F
         xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
         xg, yg, zg, wg, lg, hg, rg = torch.split(boxes, 1, dim=-1)
 
-    diagonal = torch.sqrt(la**2 + wa**2)
+    diagonal = torch.sqrt(la**2 + wa**2 + eps)
     xt = (xg - xa) / diagonal
     yt = (yg - ya) / diagonal
     zt = (zg - za) / ha
@@ -37,9 +39,9 @@ def second_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=F
         wt = wg / wa - 1
         ht = hg / ha - 1
     else:
-        lt = torch.log(lg / la)
-        wt = torch.log(wg / wa)
-        ht = torch.log(hg / ha)
+        lt = torch.log(lg / la + eps)
+        wt = torch.log(wg / wa + eps)
+        ht = torch.log(hg / ha + eps)
     if encode_angle_to_vector:
         rgx = torch.cos(rg)
         rgy = torch.sin(rg)
@@ -78,7 +80,7 @@ def second_box_decode(box_encodings, anchors, encode_angle_to_vector=False, smoo
 
     # za = za + ha / 2
     # xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1)
-    diagonal = torch.sqrt(la**2 + wa**2)
+    diagonal = torch.sqrt(la**2 + wa**2 + eps)
     xg = xt * diagonal + xa
     yg = yt * diagonal + ya
     zg = zt * ha + za
@@ -109,15 +111,15 @@ def bev_box_encode(boxes, anchors, encode_angle_to_vector=False, smooth_dim=Fals
     """
     xa, ya, wa, la, ra = torch.split(anchors, 1, dim=-1)
     xg, yg, wg, lg, rg = torch.split(boxes, 1, dim=-1)
-    diagonal = torch.sqrt(la**2 + wa**2)
+    diagonal = torch.sqrt(la**2 + wa**2 + eps)
     xt = (xg - xa) / diagonal
     yt = (yg - ya) / diagonal
     if smooth_dim:
         lt = lg / la - 1
         wt = wg / wa - 1
     else:
-        lt = torch.log(lg / la)
-        wt = torch.log(wg / wa)
+        lt = torch.log(lg / la + eps)
+        wt = torch.log(wg / wa + eps)
     if encode_angle_to_vector:
         rgx = torch.cos(rg)
         rgy = torch.sin(rg)
@@ -149,7 +151,7 @@ def bev_box_decode(box_encodings, anchors, encode_angle_to_vector=False, smooth_
         xt, yt, wt, lt, rt = torch.split(box_encodings, 1, dim=-1)
 
     # xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1)
-    diagonal = torch.sqrt(la**2 + wa**2)
+    diagonal = torch.sqrt(la**2 + wa**2 + eps)
     xg = xt * diagonal + xa
     yg = yt * diagonal + ya
     if smooth_dim:

diff --git a/second/pytorch/core/losses.py b/second/pytorch/core/losses.py
@@ -196,6 +196,8 @@ def _softmax_cross_entropy_with_logits(logits, labels):
   param = list(range(len(logits.shape)))
   transpose_param = [0] + [param[-1]] + param[1:-1]
   logits = logits.permute(*transpose_param) # [N, ..., C] -> [N, C, ...]
+  logits_max, _ = torch.max(logits, 1, keepdim=True)
+  logits = logits - logits_max
   loss_ftor = nn.CrossEntropyLoss(reduction='none')
   loss = loss_ftor(logits, labels.max(dim=-1)[1])
   return loss

diff --git a/second/pytorch/models/pointpillars.py b/second/pytorch/models/pointpillars.py
@@ -119,8 +119,9 @@ def forward(self, features, num_voxels, coors):
 
         dtype = features.dtype
         # Find distance of x, y, and z from cluster center
-        points_mean = features[:, :, :3].sum(
-            dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
+        num_voxels_set_0_to_1 = num_voxels.clone()
+        num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1
+        points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1)
         f_cluster = features[:, :, :3] - points_mean
 
         # Find distance of x, y, and z from pillar center
@@ -205,8 +206,9 @@ def forward(self, features, num_voxels, coors):
 
         dtype = features.dtype
         # Find distance of x, y, and z from cluster center
-        points_mean = features[:, :, :3].sum(
-            dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
+        num_voxels_set_0_to_1 = num_voxels.clone()
+        num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1
+        points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1)
         f_cluster = features[:, :, :3] - points_mean
 
         # Find distance of x, y, and z from pillar center
@@ -291,9 +293,9 @@ def forward(self, features, num_voxels, coors):
         device = features.device
 
         dtype = features.dtype
-        # Find distance of x, y, and z from cluster center
-        points_mean = features[:, :, :3].sum(
-            dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
+        num_voxels_set_0_to_1 = num_voxels.clone()
+        num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1
+        points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1)
         f_cluster = features[:, :, :3] - points_mean
 
         # Find distance of x, y, and z from pillar center
@@ -380,8 +382,9 @@ def forward(self, features, num_voxels, coors):
 
         dtype = features.dtype
         # Find distance of x, y, and z from cluster center
-        points_mean = features[:, :, :3].sum(
-            dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
+        num_voxels_set_0_to_1 = num_voxels.clone()
+        num_voxels_set_0_to_1[num_voxels_set_0_to_1==0] = 1
+        points_mean = features[:, :, :3].sum( dim=1, keepdim=True) / num_voxels_set_0_to_1.type_as(features).view(-1, 1, 1)
         f_cluster = features[:, :, :3] - points_mean
         pp_min = features[:, :, 2:3].min(dim=1, keepdim=True)[0]
         pp_max = features[:, :, 2:3].max(dim=1, keepdim=True)[0]

diff --git a/second/pytorch/train.py b/second/pytorch/train.py
@@ -302,6 +302,9 @@ def train(config_path,
                 example_torch = example_convert_to_torch(example, float_dtype)
 
                 batch_size = example["anchors"].shape[0]
+                if (batch_size == 1):
+                    model_logging.log_text("BATCH SIZE 1: SKIPPING ITERATION")
+                    continue
 
                 ret_dict = net_parallel(example_torch)
                 cls_preds = ret_dict["cls_preds"]
@@ -441,6 +444,10 @@ def evaluate(config_path,
     please use kitti_anno_to_label_file and convert_detection_to_kitti_annos
     in second.data.kitti_dataset.
     """
+    # Force batch size 2
+    if batch_size == 1:
+        batch_size = 2
+
     assert len(kwargs) == 0
     model_dir = str(Path(model_dir).resolve())
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")