diff --git a/src/sharp/cli/predict.py b/src/sharp/cli/predict.py index 8914bb56..ed9391a6 100644 --- a/src/sharp/cli/predict.py +++ b/src/sharp/cli/predict.py @@ -93,6 +93,7 @@ def predict_cli( else: for ext in extensions: image_paths.extend(list(input_path.glob(f"**/*{ext}"))) + image_paths.sort() if len(image_paths) == 0: LOGGER.info("No valid images found. Input was %s.", input_path) diff --git a/src/sharp/cli/render.py b/src/sharp/cli/render.py index 22c0bf87..30605b3f 100644 --- a/src/sharp/cli/render.py +++ b/src/sharp/cli/render.py @@ -52,6 +52,7 @@ def render_cli(input_path: Path, output_path: Path, verbose: bool): scene_paths = [input_path] elif input_path.is_dir(): scene_paths = list(input_path.glob("*.ply")) + scene_paths.sort() else: LOGGER.error("Input path must be either directory or single PLY file.") exit(1) diff --git a/src/sharp/utils/gaussians.py b/src/sharp/utils/gaussians.py index ed73de86..623a32d4 100644 --- a/src/sharp/utils/gaussians.py +++ b/src/sharp/utils/gaussians.py @@ -352,9 +352,10 @@ def save_ply( def _inverse_sigmoid(tensor: torch.Tensor) -> torch.Tensor: return torch.log(tensor / (1.0 - tensor)) - xyz = gaussians.mean_vectors.flatten(0, 1) - scale_logits = torch.log(gaussians.singular_values).flatten(0, 1) - quaternions = gaussians.quaternions.flatten(0, 1) + xyz = gaussians.mean_vectors.flatten(0, 1).detach().cpu() + scale_logits = torch.log(gaussians.singular_values).flatten(0, 1).detach().cpu() + quaternions = gaussians.quaternions.flatten(0, 1).detach().cpu() + opacity_logits = _inverse_sigmoid(gaussians.opacities).flatten(0, 1).unsqueeze(-1).detach().cpu() # SHARP takes an image, convert it to sRGB color space as input, # and predicts linearRGB Gaussians as output. @@ -368,14 +369,10 @@ def _inverse_sigmoid(tensor: torch.Tensor) -> torch.Tensor: # - The SHARP renderer will still handle conversions properly. # - Public renderers will be mostly working fine when regarding sRGB images as linearRGB images, # although for the best performance, it is recommended to apply the conversions. - colors = convert_rgb_to_spherical_harmonics( - cs_utils.linearRGB2sRGB(gaussians.colors.flatten(0, 1)) - ) + colors_linear = gaussians.colors.flatten(0, 1).detach().cpu() + colors = convert_rgb_to_spherical_harmonics(cs_utils.linearRGB2sRGB(colors_linear)) color_space_index = cs_utils.encode_color_space("sRGB") - # Store opacity logits. - opacity_logits = _inverse_sigmoid(gaussians.opacities).flatten(0, 1).unsqueeze(-1) - attributes = torch.cat( ( xyz, @@ -397,8 +394,8 @@ def _inverse_sigmoid(tensor: torch.Tensor) -> torch.Tensor: ] num_gaussians = len(xyz) - elements = np.empty(num_gaussians, dtype=dtype_full) - elements[:] = list(map(tuple, attributes.detach().cpu().numpy())) + attributes_np = attributes.numpy() + elements = np.core.records.fromarrays(attributes_np.T, dtype=dtype_full) vertex_elements = PlyElement.describe(elements, "vertex") # Load image-wise metadata. @@ -476,8 +473,10 @@ def _inverse_sigmoid(tensor: torch.Tensor) -> torch.Tensor: disparity_element, color_space_element, version_element, - ] + ], + byte_order='<' ) - plydata.write(path) + with open(path, 'wb') as f: + plydata.write(f) return plydata diff --git a/src/sharp/utils/io.py b/src/sharp/utils/io.py index 07a98be5..93a4bcb3 100644 --- a/src/sharp/utils/io.py +++ b/src/sharp/utils/io.py @@ -58,7 +58,7 @@ def load_rgb( if f_35mm is None or f_35mm < 1: f_35mm = img_exif.get("FocalLength", None) if f_35mm is None: - LOGGER.warn(f"Did not find focallength in exif data of {path} - Setting to 30mm.") + LOGGER.warning(f"Did not find focallength in exif data of {path} - Setting to 30mm.") f_35mm = 30.0 if f_35mm < 10.0: LOGGER.info("Found focal length below 10mm, assuming it's not for 35mm.")