diff --git a/.github/workflows/core_code_checks.yml b/.github/workflows/core_code_checks.yml index 1eb0612702..b3421c22d3 100644 --- a/.github/workflows/core_code_checks.yml +++ b/.github/workflows/core_code_checks.yml @@ -15,10 +15,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python 3.8.13 + - name: Set up Python 3.11.13 uses: actions/setup-python@v4 with: - python-version: '3.8.13' + python-version: '3.11.13' - uses: actions/cache@v3 with: path: ${{ env.pythonLocation }} diff --git a/README_FULL.md b/README_FULL.md new file mode 100644 index 0000000000..25a213d3f0 --- /dev/null +++ b/README_FULL.md @@ -0,0 +1,3 @@ +install nerfstudio via base.bat Conda or python venv(experimental) +install extra nerfstudio algorythms via extras.bat +validate the installed and available algorythms with test_cli.py diff --git a/base.bat b/base.bat index a0a3a2a6ad..a06a7e1930 100644 --- a/base.bat +++ b/base.bat @@ -3,8 +3,8 @@ setlocal enabledelayedexpansion REM ==== CONFIG ==== set ENV_NAME=nerfstudio -set YAML_FILE=nerfstudio_stable_environment_post_zipnerf.yaml -set REQUIREMENTS=requirements_post_zipnerf.txt +set YAML_FILE=requirements_conda.yaml +set REQUIREMENTS=requirements_pip.txt set PYTHON_EXE=python echo. diff --git a/docs/developer_guides/pipelines/datamanagers.md b/docs/developer_guides/pipelines/datamanagers.md index 316514c641..0d4a8b8ba1 100644 --- a/docs/developer_guides/pipelines/datamanagers.md +++ b/docs/developer_guides/pipelines/datamanagers.md @@ -115,6 +115,32 @@ To train splatfacto with a large dataset that's unable to fit in memory, please ns-train splatfacto --data {PROCESSED_DATA_DIR} --pipeline.datamanager.cache-images disk ``` +Checkout these flowcharts for more customization on large datasets! + +```{image} imgs/DatamanagerGuide-LargeNeRF-light.png +:align: center +:class: only-light +:width: 600 +``` + +```{image} imgs/DatamanagerGuide-LargeNeRF-dark.png +:align: center +:class: only-dark +:width: 600 +``` + +```{image} imgs/DatamanagerGuide-Large3DGS-light.png +:align: center +:class: only-light +:width: 600 +``` + +```{image} imgs/DatamanagerGuide-Large3DGS-dark.png +:align: center +:class: only-dark +:width: 600 +``` + ## Migrating Your DataManager to the new DataManager Many methods subclass a DataManager and add extra data to it. If you would like your custom datamanager to also support new parallel features, you can migrate any custom dataloading logic to the new `custom_ray_processor()` API. This function takes in a full training batch (either image or ray bundle) and allows the user to modify or add to it. Let's take a look at an example for the LERF method, which was built on Nerfstudio's VanillaDataManager. This API provides an interface to attach new information to the RayBundle (for ray based methods), Cameras object (for splatting based methods), or ground truth dictionary. It runs in a background process if disk caching is enabled, otherwise it runs in the main process. diff --git a/docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-dark.png b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-dark.png new file mode 100644 index 0000000000..cdbb2f8f7a Binary files /dev/null and b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-dark.png differ diff --git a/docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-light.png b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-light.png new file mode 100644 index 0000000000..972577bba1 Binary files /dev/null and b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-Large3DGS-light.png differ diff --git a/docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-dark.png b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-dark.png new file mode 100644 index 0000000000..23c93aee7d Binary files /dev/null and b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-dark.png differ diff --git a/docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-light.png b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-light.png new file mode 100644 index 0000000000..ebdec0c2ac Binary files /dev/null and b/docs/developer_guides/pipelines/imgs/DatamanagerGuide-LargeNeRF-light.png differ diff --git a/docs/developer_guides/viewer/index.md b/docs/developer_guides/viewer/index.md index 9df5c60d5a..9a35a87324 100644 --- a/docs/developer_guides/viewer/index.md +++ b/docs/developer_guides/viewer/index.md @@ -2,7 +2,7 @@ > We have a real-time web viewer that requires no installation. It's available at [https://viewer.nerf.studio/](https://viewer.nerf.studio/), where you can connect to your training job. -The viewer is built on [Viser](https://github.com/brentyi/viser/tree/main/viser) using [ThreeJS](https://threejs.org/) and packaged into a [ReactJS](https://reactjs.org/) application. This client viewer application will connect via a websocket to a server running on your machine. +The viewer is built on [Viser](https://github.com/nerfstudio-project/viser) using [ThreeJS](https://threejs.org/) and packaged into a [ReactJS](https://reactjs.org/) application. This client viewer application will connect via a websocket to a server running on your machine. ```{toctree} :titlesonly: diff --git a/docs/index.md b/docs/index.md index f43ec385a1..c17f37535e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -154,6 +154,7 @@ This documentation is organized into 3 parts: - [SIGNeRF](nerfology/methods/signerf.md): Controlled Generative Editing of NeRF Scenes - [K-Planes](nerfology/methods/kplanes.md): Unified 3D and 4D Radiance Fields - [LERF](nerfology/methods/lerf.md): Language Embedded Radiance Fields +- [LiveScene](nerfology/methods/livescene.md): Language Embedding Interactive Radiance Fields for Physical Scene Rendering and Control - [Feature Splatting](nerfology/methods/feature_splatting.md): Gaussian Feature Splatting based on GSplats - [Nerfbusters](nerfology/methods/nerfbusters.md): Removing Ghostly Artifacts from Casually Captured NeRFs - [NeRFPlayer](nerfology/methods/nerfplayer.md): 4D Radiance Fields by Streaming Feature Channels @@ -161,7 +162,7 @@ This documentation is organized into 3 parts: - [PyNeRF](nerfology/methods/pynerf.md): Pyramidal Neural Radiance Fields - [SeaThru-NeRF](nerfology/methods/seathru_nerf.md): Neural Radiance Field for subsea scenes - [Zip-NeRF](nerfology/methods/zipnerf.md): Anti-Aliased Grid-Based Neural Radiance Fields -- [NeRFtoGSandBack](nerfology/methods/nerf2gs2nerf.md): Converting back and forth between NeRF and GS to get the best of both approaches. +- [NeRFtoGSandBack](nerfology/methods/nerf2gs2nerf.md): Converting back and forth between NeRF and GS to get the best of both approaches - [OpenNeRF](nerfology/methods/opennerf.md): OpenSet 3D Neural Scene Segmentation **Eager to contribute a method?** We'd love to see you use nerfstudio in implementing new (or even existing) methods! Please view our {ref}`guide` for more details about how to add to this list! diff --git a/docs/nerfology/methods/index.md b/docs/nerfology/methods/index.md index 320d6ae97f..98c1367a03 100644 --- a/docs/nerfology/methods/index.md +++ b/docs/nerfology/methods/index.md @@ -34,6 +34,7 @@ The following methods are supported in nerfstudio: SIGNeRF K-Planes LERF + LiveScene Feature-Splatting Mip-NeRF NeRF diff --git a/docs/nerfology/methods/livescene.md b/docs/nerfology/methods/livescene.md new file mode 100644 index 0000000000..3a7f03f045 --- /dev/null +++ b/docs/nerfology/methods/livescene.md @@ -0,0 +1,101 @@ +# LiveScene + +

Language Embedding Interactive Radiance Fields for Physical Scene Rendering and Control

+ +```{button-link} https://tavish9.github.io/livescene// +:color: primary +:outline: +Paper Website +``` + +```{button-link} https://github.com/Tavish9/livescene/ +:color: primary +:outline: +Code +``` + + + +**The first scene-level language-embedded interactive radiance field, which efficiently reconstructs and controls complex physical scenes, enabling manipulation of multiple articulated objects and language-based interaction.** + +## Installation + +First install nerfstudio dependencies. Then run: + +```bash +pip install git+https://github.com/Tavish9/livescene +``` + +## Running LiveScene + +Details for running LiveScene (built with Nerfstudio!) can be found [here](https://github.com/Tavish9/livescene). +Once installed, run: + +```bash +ns-train livescene --help +``` + +There is only one default configuration provided. However, you can run it for different datasets. + +The default configurations provided is: + +| Method | Description | Memory | Quality | +| ----------- | ----------------------------------------------- | ------ | ------- | +| `livescene` | LiveScene with OpenCLIP ViT-B/16, used in paper | ~8 GB | Good | + +There are two new dataparser provider for LiveScene: + +| Method | Description | Scene type | +| ---------------- | ------------------------------- | ----------------- | +| `livescene-sim` | OmniSim dataset for LiveScene | Synthetic dataset | +| `livescene-real` | InterReal dataset for LiveScene | Real dataset | + +## Method + +LiveScene proposes an efficient factorization that decomposes the interactive scene into multiple local deformable fields to separately reconstruct individual interactive objects, achieving the first accurate and independent control on multiple interactive objects in a complex scene. Moreover, LiveScene introduces an interaction-aware language embedding method that generates varying language embeddings to localize individual interactive objects under different interactive states, enabling arbitrary control of interactive objects using natural language. + +### Overview + +Given a camera view and control variable $\boldsymbol{\kappa}$ of one specific interactive object, a series 3D points are sampled in a local deformable field that models the interactive motions of this specific interactive object, and then the interactive object with novel interactive motion state is generated via volume-rendering. Moreover, an interaction-aware language embedding is utilized to localize and control individual interactive objects using natural language. + + + +### Multi-scale Interaction Space Factorization + +LiveScene maintains mutiple local deformable fields $\left \{\mathcal{R}_1, \mathcal{R}\_2, \cdots \mathcal{R}_\alpha \right \}$ for each interactive object in the 4D space, and project high-dimensional interaction features into a compact multi-scale 4D space. In training, LiveScene denotes a feature repulsion loss and to amplify the feature differences between distinct deformable scenes, which relieve the boundary ray sampling and feature storage conflicts. + + + +### Interaction-Aware Language Embedding + +LiveScene Leverages the proposed multi-scale interaction space factorization to efficiently store language features in lightweight planes by indexing the maximum probability sampling instead of 3D fields in LERF. For any sampling point $\mathbf{p}$, it retrieves local language feature group, and perform bilinear interpolation to obtain a language embedding that adapts to interactive variable changes from surrounding clip features. + + + +## Dataset + +To our knowledge, existing view synthetic datasets for interactive scene rendering are primarily limited to a few interactive objects, making it impractical to scale up to real scenarios involving multi-object interactions. To bridge this gap, we construct two scene-level, high-quality annotated datasets to advance research progress in reconstructing and understanding interactive scenes: OminiSim and InterReal, containing 28 subsets and 70 interactive objects with 2 million samples, providing rgbd images, camera trajectories, interactive object masks, prompt captions, and corresponding object state quantities at each time step. + + + +## Interaction + +For more interaction with viewer, please see [here](https://github.com/Tavish9/livescene?tab=readme-ov-file#3-interact-with-viewer). + +## BibTeX + +If you find our work helpful for your research, please consider citing + +```none +@misc{livescene2024, + title={LiveScene: Language Embedding Interactive Radiance Fields for Physical Scene Rendering and Control}, + author={Delin Qu, Qizhi Chen, Pingrui Zhang, Xianqiang Gao, Bin Zhao, Zhigang Wang, Dong Wang†, Xuelong Li†}, + year={2024}, + eprint={2406.16038}, + archivePrefix={arXiv}, + } +``` diff --git a/nerfstudio/cameras/camera_utils.py b/nerfstudio/cameras/camera_utils.py index e9e194543e..54fc6c2d5d 100644 --- a/nerfstudio/cameras/camera_utils.py +++ b/nerfstudio/cameras/camera_utils.py @@ -172,7 +172,7 @@ def get_interpolated_poses(pose_a: NDArray, pose_b: NDArray, steps: int = 10) -> quat_b = quaternion_from_matrix(pose_b[:3, :3]) ts = np.linspace(0, 1, steps) - quats = [quaternion_slerp(quat_a, quat_b, t) for t in ts] + quats = [quaternion_slerp(quat_a, quat_b, float(t)) for t in ts] trans = [(1 - t) * pose_a[:3, 3] + t * pose_b[:3, 3] for t in ts] poses_ab = [] @@ -199,7 +199,7 @@ def get_interpolated_k( List of interpolated camera poses """ Ks: List[Float[Tensor, "3 3"]] = [] - ts = np.linspace(0, 1, steps) + ts = torch.linspace(0, 1, steps, dtype=k_a.dtype, device=k_a.device) for t in ts: new_k = k_a * (1.0 - t) + k_b * t Ks.append(new_k) @@ -218,7 +218,7 @@ def get_interpolated_time( steps: number of steps the interpolated pose path should contain """ times: List[Float[Tensor, "1"]] = [] - ts = np.linspace(0, 1, steps) + ts = torch.linspace(0, 1, steps, dtype=time_a.dtype, device=time_a.device) for t in ts: new_t = time_a * (1.0 - t) + time_b * t times.append(new_t) diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py index b1561e65b2..e971b41f61 100644 --- a/nerfstudio/cameras/cameras.py +++ b/nerfstudio/cameras/cameras.py @@ -1021,3 +1021,34 @@ def rescale_output_resolution( self.width = torch.ceil(self.width * scaling_factor).to(torch.int64) else: raise ValueError("Scale rounding mode must be 'floor', 'round' or 'ceil'.") + + def update_tiling_intrinsics(self, tiling_factor: int) -> None: + """ + Update camera intrinsics based on tiling_factor. + Must match tiling logic as defined in dataparser. + + Args: + tiling_factor: Tiling factor to apply to the camera intrinsics. + """ + if tiling_factor == 1: + return + + num_tiles = tiling_factor**2 + + # Compute tile sizes + base_tile_w, remainder_w = self.width // tiling_factor, self.width % tiling_factor + base_tile_h, remainder_h = self.height // tiling_factor, self.height % tiling_factor + + tile_indices = torch.arange(len(self.cx), device=self.cx.device).unsqueeze(1) % num_tiles + row_indices, col_indices = tile_indices // tiling_factor, tile_indices % tiling_factor + + x_offsets = col_indices * base_tile_w + torch.minimum(col_indices, remainder_w) + y_offsets = row_indices * base_tile_h + torch.minimum(row_indices, remainder_h) + + # Adjust principal points + self.cx = self.cx - x_offsets + self.cy = self.cy - y_offsets + + # Adjust height/width + self.width = base_tile_w + (col_indices < remainder_w).to(torch.int) + self.height = base_tile_h + (row_indices < remainder_h).to(torch.int) diff --git a/nerfstudio/cameras/rays.py b/nerfstudio/cameras/rays.py index a9c38d8e61..f4f1086993 100644 --- a/nerfstudio/cameras/rays.py +++ b/nerfstudio/cameras/rays.py @@ -136,6 +136,7 @@ def get_weights(self, densities: Float[Tensor, "*batch num_samples 1"]) -> Float Weights for each sample """ + assert self.deltas is not None, "Deltas must be set to compute weights" delta_density = self.deltas * densities alphas = 1 - torch.exp(-delta_density) diff --git a/nerfstudio/configs/external_methods.py b/nerfstudio/configs/external_methods.py index 002b3299b6..fdf1be7429 100644 --- a/nerfstudio/configs/external_methods.py +++ b/nerfstudio/configs/external_methods.py @@ -93,6 +93,21 @@ class ExternalMethod: ) ) +# LiveScene +external_methods.append( + ExternalMethod( + """[bold yellow]LiveScene[/bold yellow] +For more information visit: https://docs.nerf.studio/nerfology/methods/livescene.html + +To enable LiveScene, you must install it first by running: + [grey]pip install git+https://github.com/Tavish9/livescene[/grey]""", + configurations=[ + ("livescene", "LiveScene with OpenCLIP ViT-B/16, used in paper"), + ], + pip_package="git+https://github.com/Tavish9/livescene", + ) +) + # Feature Splatting external_methods.append( ExternalMethod( diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index bc1b4225aa..dbb6acf14c 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -219,7 +219,7 @@ max_num_iterations=30000, mixed_precision=True, pipeline=VanillaPipelineConfig( - datamanager=VanillaDataManagerConfig( + datamanager=ParallelDataManagerConfig( _target=ParallelDataManager[DepthDataset], dataparser=NerfstudioDataParserConfig(), train_num_rays_per_batch=4096, diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py index 3ec06120cf..9ed8db6178 100644 --- a/nerfstudio/data/datamanagers/full_images_datamanager.py +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -26,6 +26,7 @@ from copy import deepcopy from dataclasses import dataclass, field from functools import cached_property +from itertools import islice from pathlib import Path from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin @@ -45,7 +46,7 @@ from nerfstudio.data.datasets.base_dataset import InputDataset from nerfstudio.data.utils.data_utils import identity_collate from nerfstudio.data.utils.dataloaders import ImageBatchStream, _undistort_image -from nerfstudio.utils.misc import get_orig_class +from nerfstudio.utils.misc import get_dict_to_torch, get_orig_class from nerfstudio.utils.rich_utils import CONSOLE @@ -84,7 +85,7 @@ class FullImageDatamanagerConfig(DataManagerConfig): dataloader_num_workers: int = 4 """The number of workers performing the dataloading from either disk/RAM, which includes collating, pixel sampling, unprojecting, ray generation etc.""" - prefetch_factor: int = 4 + prefetch_factor: Optional[int] = 4 """The limit number of batches a worker will start loading once an iterator is created. More details are described here: https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader""" cache_compressed_images: bool = False @@ -356,9 +357,9 @@ def fixed_indices_eval_dataloader(self) -> List[Tuple[Cameras, Dict]]: self.eval_imagebatch_stream, batch_size=1, num_workers=0, - collate_fn=identity_collate, + collate_fn=lambda x: x[0], ) - return [batch[0] for batch in dataloader] + return list(islice(dataloader, len(self.eval_dataset))) image_indices = [i for i in range(len(self.eval_dataset))] data = [d.copy() for d in self.cached_eval] @@ -388,6 +389,8 @@ def next_train(self, step: int) -> Tuple[Cameras, Dict]: self.train_count += 1 if self.config.cache_images == "disk": camera, data = next(self.iter_train_image_dataloader)[0] + camera = camera.to(self.device) + data = get_dict_to_torch(data, self.device) return camera, data image_idx = self.train_unseen_cameras.pop(0) @@ -414,6 +417,8 @@ def next_eval(self, step: int) -> Tuple[Cameras, Dict]: self.eval_count += 1 if self.config.cache_images == "disk": camera, data = next(self.iter_eval_image_dataloader)[0] + camera = camera.to(self.device) + data = get_dict_to_torch(data, self.device) return camera, data return self.next_eval_image(step=step) diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py index fe3a62f3c4..e1e1f6ef52 100644 --- a/nerfstudio/data/datamanagers/parallel_datamanager.py +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -40,7 +40,7 @@ RayBatchStream, variable_res_collate, ) -from nerfstudio.utils.misc import get_orig_class +from nerfstudio.utils.misc import get_dict_to_torch, get_orig_class from nerfstudio.utils.rich_utils import CONSOLE @@ -56,7 +56,7 @@ class ParallelDataManagerConfig(VanillaDataManagerConfig): dataloader_num_workers: int = 4 """The number of workers performing the dataloading from either disk/RAM, which includes collating, pixel sampling, unprojecting, ray generation etc.""" - prefetch_factor: int = 10 + prefetch_factor: Optional[int] = 10 """The limit number of batches a worker will start loading once an iterator is created. More details are described here: https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader""" cache_compressed_images: bool = False @@ -241,12 +241,16 @@ def next_train(self, step: int) -> Tuple[RayBundle, Dict]: """Returns the next batch of data from the train dataloader.""" self.train_count += 1 ray_bundle, batch = next(self.iter_train_raybundles)[0] + ray_bundle = ray_bundle.to(self.device) + batch = get_dict_to_torch(batch, self.device) return ray_bundle, batch def next_eval(self, step: int) -> Tuple[RayBundle, Dict]: """Returns the next batch of data from the eval dataloader.""" self.eval_count += 1 ray_bundle, batch = next(self.iter_train_raybundles)[0] + ray_bundle = ray_bundle.to(self.device) + batch = get_dict_to_torch(batch, self.device) return ray_bundle, batch def next_eval_image(self, step: int) -> Tuple[Cameras, Dict]: diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py index 837794ce18..dede3a3be5 100644 --- a/nerfstudio/data/dataparsers/colmap_dataparser.py +++ b/nerfstudio/data/dataparsers/colmap_dataparser.py @@ -59,6 +59,8 @@ class ColmapDataParserConfig(DataParserConfig): """How much to downscale images. If not set, images are chosen such that the max dimension is <1600px.""" downscale_rounding_mode: Literal["floor", "round", "ceil"] = "floor" """How to round downscale image height and Image width.""" + tiling_factor: int = 1 + """Tile images into n^2 equal-resolution images, where n is this number. n | H, n | W for image with resolution WxH""" scene_scale: float = 1.0 """How much to scale the region of interest by.""" orientation_method: Literal["pca", "up", "vertical", "none"] = "up" @@ -115,7 +117,8 @@ class ColmapDataParser(DataParser): The dataparser loads the downscaled images from folders with `_{downscale_factor}` suffix. If these folders do not exist, the user can choose to automatically downscale the images and - create these folders. + create these folders. If tiling_factor > 1, the images are instead loaded from folders with + `_tiled_{tiling_factor}` suffix. The loader is compatible with the datasets processed using the ns-process-data script and can be used as a drop-in replacement. It further supports datasets like Mip-NeRF 360 (although @@ -327,13 +330,26 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs): image_filenames, mask_filenames, depth_filenames, downscale_factor = self._setup_downscale_factor( image_filenames, mask_filenames, depth_filenames ) + image_filenames, mask_filenames, depth_filenames = self._setup_tiling( + image_filenames, mask_filenames, depth_filenames + ) + + num_tiles = self.config.tiling_factor**2 - image_filenames = [image_filenames[i] for i in indices] - mask_filenames = [mask_filenames[i] for i in indices] if len(mask_filenames) > 0 else [] - depth_filenames = [depth_filenames[i] for i in indices] if len(depth_filenames) > 0 else [] + image_filenames = [image_filenames[i * num_tiles + j] for i in indices for j in range(num_tiles)] + mask_filenames = ( + [mask_filenames[i * num_tiles + j] for i in indices for j in range(num_tiles)] + if len(mask_filenames) > 0 + else [] + ) + depth_filenames = ( + [depth_filenames[i * num_tiles + j] for i in indices for j in range(num_tiles)] + if len(depth_filenames) > 0 + else [] + ) idx_tensor = torch.tensor(indices, dtype=torch.long) - poses = poses[idx_tensor] + poses = poses[idx_tensor].repeat_interleave(num_tiles, dim=0) # in x,y,z order # assumes that the scene is centered at the origin @@ -344,13 +360,13 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs): ) ) - fx = torch.tensor(fx, dtype=torch.float32)[idx_tensor] - fy = torch.tensor(fy, dtype=torch.float32)[idx_tensor] - cx = torch.tensor(cx, dtype=torch.float32)[idx_tensor] - cy = torch.tensor(cy, dtype=torch.float32)[idx_tensor] - height = torch.tensor(height, dtype=torch.int32)[idx_tensor] - width = torch.tensor(width, dtype=torch.int32)[idx_tensor] - distortion_params = torch.stack(distort, dim=0)[idx_tensor] + fx = torch.tensor(fx, dtype=torch.float32)[idx_tensor].repeat_interleave(num_tiles) + fy = torch.tensor(fy, dtype=torch.float32)[idx_tensor].repeat_interleave(num_tiles) + cx = torch.tensor(cx, dtype=torch.float32)[idx_tensor].repeat_interleave(num_tiles) + cy = torch.tensor(cy, dtype=torch.float32)[idx_tensor].repeat_interleave(num_tiles) + height = torch.tensor(height, dtype=torch.int32)[idx_tensor].repeat_interleave(num_tiles) + width = torch.tensor(width, dtype=torch.int32)[idx_tensor].repeat_interleave(num_tiles) + distortion_params = torch.stack(distort, dim=0)[idx_tensor].repeat_interleave(num_tiles, dim=0) cameras = Cameras( fx=fx, @@ -364,6 +380,7 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs): camera_type=camera_type, ) + cameras.update_tiling_intrinsics(tiling_factor=self.config.tiling_factor) cameras.rescale_output_resolution( scaling_factor=1.0 / downscale_factor, scale_rounding_mode=self.config.downscale_rounding_mode ) @@ -455,6 +472,7 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca points3D_image_ids.append( torch.cat((nids, torch.full((max_num_points - len(nids),), -1, dtype=torch.int64))) ) + assert downscale_factor is not None points3D_image_xy.append( torch.cat((nxy, torch.full((max_num_points - len(nxy), nxy.shape[-1]), 0, dtype=torch.float32))) / downscale_factor @@ -463,6 +481,109 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0) return out + def _tile_images(self, paths, get_fname, tiling_factor): + """ + Tile images into self.tiling_factor^2 tiles. + Logic must match intrinsics update in Cameras object. + """ + with status(msg="[bold yellow]Tiling images...", spinner="growVertical"): + assert isinstance(tiling_factor, int) + assert tiling_factor > 1 + + for path in paths: + img = Image.open(path) + w, h = img.size + + base_tile_w, remainder_w = divmod(w, tiling_factor) + base_tile_h, remainder_h = divmod(h, tiling_factor) + + path_out_base = get_fname(path) + path_out_base.parent.mkdir(parents=True, exist_ok=True) + + for row in range(tiling_factor): + for col in range(tiling_factor): + idx = row * tiling_factor + col + + # Distribute the remainder among the first remainder_w columns and remainder_h rows + tile_w = base_tile_w + int(col < remainder_w) + tile_h = base_tile_h + int(row < remainder_h) + + x_offset = col * base_tile_w + min(col, remainder_w) + y_offset = row * base_tile_h + min(row, remainder_h) + + tile = img.crop( + ( + x_offset, + y_offset, + x_offset + tile_w, + y_offset + tile_h, + ) + ) + + output_path = path_out_base.with_stem(path_out_base.stem + f"_{idx}") + tile.save(output_path) + + CONSOLE.log("[bold green]:tada: Done tiling images.") + + def _setup_tiling(self, image_filenames: List[Path], mask_filenames: List[Path], depth_filenames: List[Path]): + """ + Wrapper around self._tile_images() to handle tiling of image, mask, and depth files. + """ + if self.config.tiling_factor == 1: + return image_filenames, mask_filenames, depth_filenames + + assert self._downscale_factor == 1, "Tiling not supported with downscaling, please set --downscale_factor=1" + + def get_fname(parent: Path, filepath: Path) -> Path: + """Returns transformed file name when tiling factor is applied""" + rel_part = filepath.relative_to(parent) + base_part = parent.parent / (str(parent.name) + f"_tiled_{self.config.tiling_factor}") + return base_part / rel_part + + if not all(get_fname(self.config.data / self.config.images_path, fp).parent.exists() for fp in image_filenames): + self._tile_images( + image_filenames, + partial(get_fname, self.config.data / self.config.images_path), + self.config.tiling_factor, + ) + if len(mask_filenames) > 0: + assert self.config.masks_path is not None + self._tile_images( + mask_filenames, + partial(get_fname, self.config.data / self.config.masks_path), + self.config.tiling_factor, + ) + if len(depth_filenames) > 0: + assert self.config.depths_path is not None + self._tile_images( + depth_filenames, + partial(get_fname, self.config.data / self.config.depths_path), + self.config.tiling_factor, + ) + + num_tiles = self.config.tiling_factor**2 + image_filenames = [ + get_fname(self.config.data / self.config.images_path, fp.with_stem(fp.stem + f"_{i}")) + for fp in image_filenames + for i in range(num_tiles) + ] + if len(mask_filenames) > 0: + assert self.config.masks_path is not None + mask_filenames = [ + get_fname(self.config.data / self.config.masks_path, fp.with_stem(fp.stem + f"_{i}")) + for fp in mask_filenames + for i in range(num_tiles) + ] + if len(depth_filenames) > 0: + assert self.config.depths_path is not None + depth_filenames = [ + get_fname(self.config.data / self.config.depths_path, fp.with_stem(fp.stem + f"_{i}")) + for fp in depth_filenames + for i in range(num_tiles) + ] + + return image_filenames, mask_filenames, depth_filenames + def _downscale_images( self, paths, diff --git a/nerfstudio/data/dataparsers/dycheck_dataparser.py b/nerfstudio/data/dataparsers/dycheck_dataparser.py index 42ce6afce7..19d5d8a59f 100644 --- a/nerfstudio/data/dataparsers/dycheck_dataparser.py +++ b/nerfstudio/data/dataparsers/dycheck_dataparser.py @@ -289,8 +289,8 @@ def process_frames(self, frame_names: List[str], time_ids: np.ndarray) -> Tuple[ cam_json = load_from_json(self.data / f"camera/{frame}.json") c2w = torch.as_tensor(cam_json["orientation"]).T position = torch.as_tensor(cam_json["position"]) - position -= self._center # some scenes look weird (wheel) - position *= self._scale * self.config.scale_factor + position -= torch.as_tensor(self._center) # some scenes look weird (wheel) + position *= torch.as_tensor(self._scale) * self.config.scale_factor pose = torch.zeros([3, 4]) pose[:3, :3] = c2w pose[:3, 3] = position diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 24dc456d15..3512d31ca2 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -484,6 +484,7 @@ def _get_fname(self, filepath: Path, data_dir: Path, downsample_folder_prefix="i CONSOLE.log(f"Auto image downscale factor of {self.downscale_factor}") else: self.downscale_factor = self.config.downscale_factor + assert self.downscale_factor is not None if self.downscale_factor > 1: return data_dir / f"{downsample_folder_prefix}{self.downscale_factor}" / filepath.name diff --git a/nerfstudio/data/pixel_samplers.py b/nerfstudio/data/pixel_samplers.py index f2bc6d96ef..8b97120b57 100644 --- a/nerfstudio/data/pixel_samplers.py +++ b/nerfstudio/data/pixel_samplers.py @@ -18,6 +18,7 @@ import random import warnings +from collections import defaultdict from dataclasses import dataclass, field from typing import Dict, Optional, Type, Union @@ -226,6 +227,7 @@ def sample_method_fisheye( rand_samples = torch.rand((samples_needed, 2), device=device) # Convert random samples to radius and theta. + assert self.config.fisheye_crop_radius is not None radii = self.config.fisheye_crop_radius * torch.sqrt(rand_samples[:, 0]) theta = 2.0 * torch.pi * rand_samples[:, 1] @@ -335,8 +337,7 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int, # only sample within the mask, if the mask is in the batch all_indices = [] - all_images = [] - all_depth_images = [] + all_images = defaultdict(list) assert num_rays_per_batch % 2 == 0, "num_rays_per_batch must be divisible by 2" num_rays_per_image = divide_rays_per_image(num_rays_per_batch, num_images) @@ -350,10 +351,11 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int, ) indices[:, 0] = i all_indices.append(indices) - all_images.append(batch["image"][i][indices[:, 1], indices[:, 2]]) - if "depth_image" in batch: - all_depth_images.append(batch["depth_image"][i][indices[:, 1], indices[:, 2]]) + for key, value in batch.items(): + if key in ["image_idx", "mask"]: + continue + all_images[key].append(value[i][indices[:, 1], indices[:, 2]]) else: for i, num_rays in enumerate(num_rays_per_image): image_height, image_width, _ = batch["image"][i].shape @@ -363,26 +365,19 @@ def collate_image_dataset_batch_list(self, batch: Dict, num_rays_per_batch: int, indices = self.sample_method(num_rays, 1, image_height, image_width, device=device) indices[:, 0] = i all_indices.append(indices) - all_images.append(batch["image"][i][indices[:, 1], indices[:, 2]]) - if "depth_image" in batch: - all_depth_images.append(batch["depth_image"][i][indices[:, 1], indices[:, 2]]) + for key, value in batch.items(): + if key in ["image_idx", "mask"]: + continue + all_images[key].append(value[i][indices[:, 1], indices[:, 2]]) indices = torch.cat(all_indices, dim=0) - c, y, x = (i.flatten() for i in torch.split(indices, 1, dim=-1)) - collated_batch = { - key: value[c, y, x] - for key, value in batch.items() - if key not in ("image_idx", "image", "mask", "depth_image") and value is not None - } - - collated_batch["image"] = torch.cat(all_images, dim=0) - if "depth_image" in batch: - collated_batch["depth_image"] = torch.cat(all_depth_images, dim=0) + collated_batch = {key: torch.cat(all_images[key], dim=0) for key in all_images} assert collated_batch["image"].shape[0] == num_rays_per_batch # Needed to correct the random indices to their actual camera idx locations. + c = indices[..., 0].flatten() indices[:, 0] = batch["image_idx"][c] collated_batch["indices"] = indices # with the abs camera indices diff --git a/nerfstudio/data/utils/data_utils.py b/nerfstudio/data/utils/data_utils.py index e79f169034..d1a6721844 100644 --- a/nerfstudio/data/utils/data_utils.py +++ b/nerfstudio/data/utils/data_utils.py @@ -116,7 +116,7 @@ def get_depth_image_from_path( else: image = cv2.imread(str(filepath.absolute()), cv2.IMREAD_ANYDEPTH) image = image.astype(np.float32) * scale_factor - image = cv2.resize(image, (width, height), interpolation=interpolation) + image = cv2.resize(image, (width, height), interpolation=interpolation) # type: ignore return torch.from_numpy(image[:, :, np.newaxis]) diff --git a/nerfstudio/data/utils/dataloaders.py b/nerfstudio/data/utils/dataloaders.py index 9fa2faaf7d..f9a6ccc5f1 100644 --- a/nerfstudio/data/utils/dataloaders.py +++ b/nerfstudio/data/utils/dataloaders.py @@ -574,18 +574,18 @@ def __iter__(self): """ Here, the variable 'batch' refers to the output of our pixel sampler. - batch is a dict_keys(['image', 'indices']) - - batch['image'] returns a pytorch tensor with shape `torch.Size([4096, 3])` , where 4096 = num_rays_per_batch. + - batch['image'] returns a `torch.Size([4096, 3])` tensor on CPU, where 4096 = num_rays_per_batch. - Note: each row in this tensor represents the RGB values as floats in [0, 1] of the pixel the ray goes through. - The info of what specific image index that pixel belongs to is stored within batch[’indices’] - - batch['indices'] returns a pytorch tensor `torch.Size([4096, 3])` tensor where each row represents (image_idx, pixelRow, pixelCol) + - batch['indices'] returns a `torch.Size([4096, 3])` tensor on CPU where each row represents (image_idx, pixelRow, pixelCol) pixel_sampler (for variable_res_collate) will loop though each image, samples pixel within the mask, and returns them as the variable `indices` which has shape torch.Size([4096, 3]), where each row represents a pixel (image_idx, pixelRow, pixelCol) """ batch = worker_pixel_sampler.sample(collated_batch) # type: ignore # Note: collated_batch["image"].get_device() will return CPU if self.exclude_batch_keys_from_device contains 'image' ray_indices = batch["indices"] - # the ray_bundle is on the GPU; batch["image"] is on the CPU, here we move it to the GPU - ray_bundle = self.ray_generator(ray_indices).to(self.device) + # Both ray_bundle and batch["image"] are on the CPU and will be moved to the GPU in the main process (parallel_datamanager.py) + ray_bundle = self.ray_generator(ray_indices) if self.custom_ray_processor: ray_bundle, batch = self.custom_ray_processor(ray_bundle, batch) @@ -645,10 +645,6 @@ def __iter__(self): camera, data = self.custom_image_processor(camera, data) i += 1 - camera = camera.to(self.device) - for k in data.keys(): - if isinstance(data[k], torch.Tensor): - data[k] = data[k].to(self.device) yield camera, data diff --git a/nerfstudio/exporter/exporter_utils.py b/nerfstudio/exporter/exporter_utils.py index 49b7994d5f..3cd1fc2872 100644 --- a/nerfstudio/exporter/exporter_utils.py +++ b/nerfstudio/exporter/exporter_utils.py @@ -345,6 +345,7 @@ def collect_camera_poses(pipeline: VanillaPipeline) -> Tuple[List[Dict[str, Any] camera_optimizer = None if hasattr(pipeline.model, "camera_optimizer"): camera_optimizer = pipeline.model.camera_optimizer + assert isinstance(camera_optimizer, CameraOptimizer) train_frames = collect_camera_poses_for_dataset(train_dataset, camera_optimizer) # Note: returning original poses, even if --eval-mode=all diff --git a/nerfstudio/field_components/encodings.py b/nerfstudio/field_components/encodings.py index 48845748bd..7bc18100b5 100644 --- a/nerfstudio/field_components/encodings.py +++ b/nerfstudio/field_components/encodings.py @@ -45,11 +45,6 @@ def __init__(self, in_dim: int) -> None: raise ValueError("Input dimension should be greater than zero") super().__init__(in_dim=in_dim) - @classmethod - def get_tcnn_encoding_config(cls) -> dict: - """Get the encoding configuration for tcnn if implemented""" - raise NotImplementedError("Encoding does not have a TCNN implementation") - @abstractmethod def forward(self, in_tensor: Shaped[Tensor, "*bs input_dim"]) -> Shaped[Tensor, "*bs output_dim"]: """Call forward and returns and processed tensor @@ -217,6 +212,7 @@ def __init__( self.min_freq = min_freq_exp self.max_freq = max_freq_exp self.register_buffer(name="b_matrix", tensor=basis) + self.b_matrix: Tensor self.include_input = include_input def get_out_dim(self) -> int: diff --git a/nerfstudio/fields/sdf_field.py b/nerfstudio/fields/sdf_field.py index ed936d4b2b..8f53f55a0f 100644 --- a/nerfstudio/fields/sdf_field.py +++ b/nerfstudio/fields/sdf_field.py @@ -328,6 +328,7 @@ def get_alpha( ) # always non-positive # Estimate signed distances at section points + assert ray_samples.deltas is not None, "Ray samples must have deltas for alpha computation." estimated_next_sdf = sdf + iter_cos * ray_samples.deltas * 0.5 estimated_prev_sdf = sdf - iter_cos * ray_samples.deltas * 0.5 diff --git a/nerfstudio/models/generfacto.py b/nerfstudio/models/generfacto.py index 5c1316ab79..9a8d4e81b1 100644 --- a/nerfstudio/models/generfacto.py +++ b/nerfstudio/models/generfacto.py @@ -19,7 +19,7 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Dict, List, Optional, Tuple, Type +from typing import Dict, List, Optional, Tuple, Type, cast import numpy as np import torch @@ -444,7 +444,7 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te loss_dict = misc.scale_dict(loss_dict, self.config.loss_coefficients) if self.train_normals: # orientation loss for computed normals - loss_dict["orientation_loss"] = self.orientation_loss_mult * torch.mean( + loss_dict["orientation_loss"] = cast(float, self.orientation_loss_mult) * torch.mean( outputs["rendered_orientation_loss"] ) else: diff --git a/nerfstudio/models/instant_ngp.py b/nerfstudio/models/instant_ngp.py index 1dd00af8da..6686b556cd 100644 --- a/nerfstudio/models/instant_ngp.py +++ b/nerfstudio/models/instant_ngp.py @@ -19,7 +19,7 @@ from __future__ import annotations from dataclasses import dataclass, field -from typing import Dict, List, Literal, Optional, Tuple, Type, Union +from typing import Dict, List, Literal, Optional, Tuple, Type, Union, cast import nerfacc import torch @@ -152,7 +152,7 @@ def get_training_callbacks( def update_occupancy_grid(step: int): self.occupancy_grid.update_every_n_steps( step=step, - occ_eval_fn=lambda x: self.field.density_fn(x) * self.config.render_step_size, + occ_eval_fn=lambda x: self.field.density_fn(x) * cast(float, self.config.render_step_size), ) return [ @@ -170,7 +170,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: param_groups["fields"] = list(self.field.parameters()) return param_groups - def get_outputs(self, ray_bundle: RayBundle): + def get_outputs(self, ray_bundle: RayBundle): # type: ignore assert self.field is not None num_rays = len(ray_bundle) diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py index 136a3168e6..475cc5c388 100644 --- a/nerfstudio/models/splatfacto.py +++ b/nerfstudio/models/splatfacto.py @@ -291,7 +291,7 @@ def populate_modules(self): ) self.strategy_state = self.strategy.initialize_state() else: - raise ValueError(f"""Splatfacto does not support strategy {self.config.strategy} + raise ValueError(f"""Splatfacto does not support strategy {self.config.strategy} Currently, the supported strategies include default and mcmc.""") @property @@ -552,7 +552,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: colors_crop = torch.sigmoid(colors_crop).squeeze(1) # [N, 1, 3] -> [N, 3] sh_degree_to_use = None - render, alpha, self.info = rasterization( + render, alpha, self.info = rasterization( # type: ignore[reportPossiblyUnboundVariable] means=means_crop, quats=quats_crop, # rasterization does normalization internally scales=torch.exp(scales_crop), diff --git a/nerfstudio/process_data/colmap_utils - Copia.py b/nerfstudio/process_data/colmap_utils - Copia.py deleted file mode 100644 index 1d9405c81a..0000000000 --- a/nerfstudio/process_data/colmap_utils - Copia.py +++ /dev/null @@ -1,714 +0,0 @@ -# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Tools supporting the execution of COLMAP and preparation of COLMAP-based datasets for nerfstudio training. -""" - -import json -from pathlib import Path -from typing import Any, Dict, Literal, Optional, Union - -import appdirs -import cv2 -import numpy as np -import requests -import torch -from packaging.version import Version -from rich.progress import track - -# TODO(1480) use pycolmap instead of colmap_parsing_utils -# import pycolmap -from nerfstudio.data.utils.colmap_parsing_utils import ( - qvec2rotmat, - read_cameras_binary, - read_images_binary, - read_points3D_binary, - read_points3D_text, -) -from nerfstudio.process_data.process_data_utils import CameraModel -from nerfstudio.utils import colormaps -from nerfstudio.utils.rich_utils import CONSOLE, status -from nerfstudio.utils.scripts import run_command - - -def get_colmap_version(colmap_cmd: str, default_version: str = "3.8") -> Version: - """Returns the version of COLMAP. - This code assumes that colmap returns a version string of the form - "COLMAP 3.8 ..." which may not be true for all versions of COLMAP. - - Args: - default_version: Default version to return if COLMAP version can't be determined. - Returns: - The version of COLMAP. - """ - output = run_command(f"{colmap_cmd} -h", verbose=False) - assert output is not None - for line in output.split("\n"): - if line.startswith("COLMAP"): - version = line.split(" ")[1] - version = Version(version) - return version - CONSOLE.print(f"[bold red]Could not find COLMAP version. Using default {default_version}") - return Version(default_version) - - -def get_vocab_tree() -> Path: - """Return path to vocab tree. Downloads vocab tree if it doesn't exist. - - Returns: - The path to the vocab tree. - """ - vocab_tree_filename = Path(appdirs.user_data_dir("nerfstudio")) / "vocab_tree.fbow" - - if not vocab_tree_filename.exists(): - r = requests.get("https://demuc.de/colmap/vocab_tree_flickr100K_words32K.bin", stream=True) - vocab_tree_filename.parent.mkdir(parents=True, exist_ok=True) - with open(vocab_tree_filename, "wb") as f: - total_length = r.headers.get("content-length") - assert total_length is not None - for chunk in track( - r.iter_content(chunk_size=1024), - total=int(total_length) / 1024 + 1, - description="Downloading vocab tree...", - ): - if chunk: - f.write(chunk) - f.flush() - return vocab_tree_filename - - -def run_colmap( - image_dir: Path, - colmap_dir: Path, - camera_model: CameraModel, - camera_mask_path: Optional[Path] = None, - gpu: bool = True, - verbose: bool = False, - matching_method: Literal["vocab_tree", "exhaustive", "sequential"] = "vocab_tree", - refine_intrinsics: bool = True, - colmap_cmd: str = "colmap", -) -> None: - """Runs COLMAP on the images. - - Args: - image_dir: Path to the directory containing the images. - colmap_dir: Path to the output directory. - camera_model: Camera model to use. - camera_mask_path: Path to the camera mask. - gpu: If True, use GPU. - verbose: If True, logs the output of the command. - matching_method: Matching method to use. - refine_intrinsics: If True, refine intrinsics. - colmap_cmd: Path to the COLMAP executable. - """ - - colmap_version = get_colmap_version(colmap_cmd) - - colmap_database_path = colmap_dir / "database.db" - colmap_database_path.unlink(missing_ok=True) - - # Feature extraction - feature_extractor_cmd = [ - f"{colmap_cmd} feature_extractor", - f"--database_path {colmap_dir / 'database.db'}", - f"--image_path {image_dir}", - "--ImageReader.single_camera 1", - f"--ImageReader.camera_model {camera_model.value}", - f"--SiftExtraction.use_gpu {int(gpu)}", - ] - if camera_mask_path is not None: - feature_extractor_cmd.append(f"--ImageReader.camera_mask_path {camera_mask_path}") - feature_extractor_cmd = " ".join(feature_extractor_cmd) - with status(msg="[bold yellow]Running COLMAP feature extractor...", spinner="moon", verbose=verbose): - run_command(feature_extractor_cmd, verbose=verbose) - - CONSOLE.log("[bold green]:tada: Done extracting COLMAP features.") - - # Feature matching - feature_matcher_cmd = [ - f"{colmap_cmd} {matching_method}_matcher", - f"--database_path {colmap_dir / 'database.db'}", - f"--SiftMatching.use_gpu {int(gpu)}", - ] - if matching_method == "vocab_tree": - vocab_tree_filename = get_vocab_tree() - feature_matcher_cmd.append(f'--VocabTreeMatching.vocab_tree_path "{vocab_tree_filename}"') - feature_matcher_cmd = " ".join(feature_matcher_cmd) - with status(msg="[bold yellow]Running COLMAP feature matcher...", spinner="runner", verbose=verbose): - run_command(feature_matcher_cmd, verbose=verbose) - CONSOLE.log("[bold green]:tada: Done matching COLMAP features.") - - # Bundle adjustment - sparse_dir = colmap_dir / "sparse" - sparse_dir.mkdir(parents=True, exist_ok=True) - mapper_cmd = [ - f"{colmap_cmd} mapper", - f"--database_path {colmap_dir / 'database.db'}", - f"--image_path {image_dir}", - f"--output_path {sparse_dir}", - ] - if colmap_version >= Version("3.7"): - mapper_cmd.append("--Mapper.ba_global_function_tolerance=1e-6") - - mapper_cmd = " ".join(mapper_cmd) - - with status( - msg="[bold yellow]Running COLMAP bundle adjustment... (This may take a while)", - spinner="circle", - verbose=verbose, - ): - run_command(mapper_cmd, verbose=verbose) - CONSOLE.log("[bold green]:tada: Done COLMAP bundle adjustment.") - - if refine_intrinsics: - with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose): - bundle_adjuster_cmd = [ - f"{colmap_cmd} bundle_adjuster", - f"--input_path {sparse_dir}/0", - f"--output_path {sparse_dir}/0", - "--BundleAdjustment.refine_principal_point 1", - ] - run_command(" ".join(bundle_adjuster_cmd), verbose=verbose) - CONSOLE.log("[bold green]:tada: Done refining intrinsics.") - - -def parse_colmap_camera_params(camera) -> Dict[str, Any]: - """ - Parses all currently supported COLMAP cameras into the transforms.json metadata - - Args: - camera: COLMAP camera - Returns: - transforms.json metadata containing camera's intrinsics and distortion parameters - - """ - out: Dict[str, Any] = { - "w": camera.width, - "h": camera.height, - } - - # Parameters match https://github.com/colmap/colmap/blob/dev/src/base/camera_models.h - camera_params = camera.params - if camera.model == "SIMPLE_PINHOLE": - # du = 0 - # dv = 0 - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[0]) - out["cx"] = float(camera_params[1]) - out["cy"] = float(camera_params[2]) - out["k1"] = 0.0 - out["k2"] = 0.0 - out["p1"] = 0.0 - out["p2"] = 0.0 - camera_model = CameraModel.OPENCV - elif camera.model == "PINHOLE": - # f, cx, cy, k - - # du = 0 - # dv = 0 - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[1]) - out["cx"] = float(camera_params[2]) - out["cy"] = float(camera_params[3]) - out["k1"] = 0.0 - out["k2"] = 0.0 - out["p1"] = 0.0 - out["p2"] = 0.0 - camera_model = CameraModel.OPENCV - elif camera.model == "SIMPLE_RADIAL": - # f, cx, cy, k - - # r2 = u**2 + v**2; - # radial = k * r2 - # du = u * radial - # dv = u * radial - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[0]) - out["cx"] = float(camera_params[1]) - out["cy"] = float(camera_params[2]) - out["k1"] = float(camera_params[3]) - out["k2"] = 0.0 - out["p1"] = 0.0 - out["p2"] = 0.0 - camera_model = CameraModel.OPENCV - elif camera.model == "RADIAL": - # f, cx, cy, k1, k2 - - # r2 = u**2 + v**2; - # radial = k1 * r2 + k2 * r2 ** 2 - # du = u * radial - # dv = v * radial - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[0]) - out["cx"] = float(camera_params[1]) - out["cy"] = float(camera_params[2]) - out["k1"] = float(camera_params[3]) - out["k2"] = float(camera_params[4]) - out["p1"] = 0.0 - out["p2"] = 0.0 - camera_model = CameraModel.OPENCV - elif camera.model == "OPENCV": - # fx, fy, cx, cy, k1, k2, p1, p2 - - # uv = u * v; - # r2 = u**2 + v**2 - # radial = k1 * r2 + k2 * r2 ** 2 - # du = u * radial + 2 * p1 * u*v + p2 * (r2 + 2 * u**2) - # dv = v * radial + 2 * p2 * u*v + p1 * (r2 + 2 * v**2) - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[1]) - out["cx"] = float(camera_params[2]) - out["cy"] = float(camera_params[3]) - out["k1"] = float(camera_params[4]) - out["k2"] = float(camera_params[5]) - out["p1"] = float(camera_params[6]) - out["p2"] = float(camera_params[7]) - camera_model = CameraModel.OPENCV - elif camera.model == "OPENCV_FISHEYE": - # fx, fy, cx, cy, k1, k2, k3, k4 - - # r = sqrt(u**2 + v**2) - - # if r > eps: - # theta = atan(r) - # theta2 = theta ** 2 - # theta4 = theta2 ** 2 - # theta6 = theta4 * theta2 - # theta8 = theta4 ** 2 - # thetad = theta * (1 + k1 * theta2 + k2 * theta4 + k3 * theta6 + k4 * theta8) - # du = u * thetad / r - u; - # dv = v * thetad / r - v; - # else: - # du = dv = 0 - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[1]) - out["cx"] = float(camera_params[2]) - out["cy"] = float(camera_params[3]) - out["k1"] = float(camera_params[4]) - out["k2"] = float(camera_params[5]) - out["k3"] = float(camera_params[6]) - out["k4"] = float(camera_params[7]) - camera_model = CameraModel.OPENCV_FISHEYE - elif camera.model == "FULL_OPENCV": - # fx, fy, cx, cy, k1, k2, p1, p2, k3, k4, k5, k6 - - # u2 = u ** 2 - # uv = u * v - # v2 = v ** 2 - # r2 = u2 + v2 - # r4 = r2 * r2 - # r6 = r4 * r2 - # radial = (1 + k1 * r2 + k2 * r4 + k3 * r6) / - # (1 + k4 * r2 + k5 * r4 + k6 * r6) - # du = u * radial + 2 * p1 * uv + p2 * (r2 + 2 * u2) - u - # dv = v * radial + 2 * p2 * uv + p1 * (r2 + 2 * v2) - v - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[1]) - out["cx"] = float(camera_params[2]) - out["cy"] = float(camera_params[3]) - out["k1"] = float(camera_params[4]) - out["k2"] = float(camera_params[5]) - out["p1"] = float(camera_params[6]) - out["p2"] = float(camera_params[7]) - out["k3"] = float(camera_params[8]) - out["k4"] = float(camera_params[9]) - out["k5"] = float(camera_params[10]) - out["k6"] = float(camera_params[11]) - raise NotImplementedError(f"{camera.model} camera model is not supported yet!") - elif camera.model == "FOV": - # fx, fy, cx, cy, omega - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[1]) - out["cx"] = float(camera_params[2]) - out["cy"] = float(camera_params[3]) - out["omega"] = float(camera_params[4]) - raise NotImplementedError(f"{camera.model} camera model is not supported yet!") - elif camera.model == "SIMPLE_RADIAL_FISHEYE": - # f, cx, cy, k - - # r = sqrt(u ** 2 + v ** 2) - # if r > eps: - # theta = atan(r) - # theta2 = theta ** 2 - # thetad = theta * (1 + k * theta2) - # du = u * thetad / r - u; - # dv = v * thetad / r - v; - # else: - # du = dv = 0 - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[0]) - out["cx"] = float(camera_params[1]) - out["cy"] = float(camera_params[2]) - out["k1"] = float(camera_params[3]) - out["k2"] = 0.0 - out["k3"] = 0.0 - out["k4"] = 0.0 - camera_model = CameraModel.OPENCV_FISHEYE - elif camera.model == "RADIAL_FISHEYE": - # f, cx, cy, k1, k2 - - # r = sqrt(u ** 2 + v ** 2) - # if r > eps: - # theta = atan(r) - # theta2 = theta ** 2 - # theta4 = theta2 ** 2 - # thetad = theta * (1 + k * theta2) - # thetad = theta * (1 + k1 * theta2 + k2 * theta4) - # du = u * thetad / r - u; - # dv = v * thetad / r - v; - # else: - # du = dv = 0 - out["fl_x"] = float(camera_params[0]) - out["fl_y"] = float(camera_params[0]) - out["cx"] = float(camera_params[1]) - out["cy"] = float(camera_params[2]) - out["k1"] = float(camera_params[3]) - out["k2"] = float(camera_params[4]) - out["k3"] = 0 - out["k4"] = 0 - camera_model = CameraModel.OPENCV_FISHEYE - else: - # THIN_PRISM_FISHEYE not supported! - raise NotImplementedError(f"{camera.model} camera model is not supported yet!") - - out["camera_model"] = camera_model.value - return out - - -def colmap_to_json( - recon_dir: Path, - output_dir: Path, - camera_mask_path: Optional[Path] = None, - image_id_to_depth_path: Optional[Dict[int, Path]] = None, - image_rename_map: Optional[Dict[str, str]] = None, - ply_filename="sparse_pc.ply", - keep_original_world_coordinate: bool = False, - use_single_camera_mode: bool = True, -) -> int: - """Converts COLMAP's cameras.bin and images.bin to a JSON file. - - Args: - recon_dir: Path to the reconstruction directory, e.g. "sparse/0" - output_dir: Path to the output directory. - camera_model: Camera model used. - camera_mask_path: Path to the camera mask. - image_id_to_depth_path: When including sfm-based depth, embed these depth file paths in the exported json - image_rename_map: Use these image names instead of the names embedded in the COLMAP db - keep_original_world_coordinate: If True, no extra transform will be applied to world coordinate. - Colmap optimized world often have y direction of the first camera pointing towards down direction, - while nerfstudio world set z direction to be up direction for viewer. - Returns: - The number of registered images. - """ - - # TODO(1480) use pycolmap - # recon = pycolmap.Reconstruction(recon_dir) - # cam_id_to_camera = recon.cameras - # im_id_to_image = recon.images - cam_id_to_camera = read_cameras_binary(recon_dir / "cameras.bin") - im_id_to_image = read_images_binary(recon_dir / "images.bin") - if set(cam_id_to_camera.keys()) != {1}: - CONSOLE.print(f"[bold yellow]Warning: More than one camera is found in {recon_dir}") - print(cam_id_to_camera) - use_single_camera_mode = False # update bool: one camera per frame - out = {} # out = {"camera_model": parse_colmap_camera_params(cam_id_to_camera[1])["camera_model"]} - else: # one camera for all frames - out = parse_colmap_camera_params(cam_id_to_camera[1]) - - frames = [] - for im_id, im_data in im_id_to_image.items(): - # NB: COLMAP uses Eigen / scalar-first quaternions - # * https://colmap.github.io/format.html - # * https://github.com/colmap/colmap/blob/bf3e19140f491c3042bfd85b7192ef7d249808ec/src/base/pose.cc#L75 - # the `rotation_matrix()` handles that format for us. - - # TODO(1480) BEGIN use pycolmap API - # rotation = im_data.rotation_matrix() - rotation = qvec2rotmat(im_data.qvec) - - translation = im_data.tvec.reshape(3, 1) - w2c = np.concatenate([rotation, translation], 1) - w2c = np.concatenate([w2c, np.array([[0, 0, 0, 1]])], 0) - c2w = np.linalg.inv(w2c) - # Convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL) - c2w[0:3, 1:3] *= -1 - if not keep_original_world_coordinate: - c2w = c2w[np.array([0, 2, 1, 3]), :] - c2w[2, :] *= -1 - - name = im_data.name - if image_rename_map is not None: - name = image_rename_map[name] - name = Path(f"./images/{name}") - - frame = { - "file_path": name.as_posix(), - "transform_matrix": c2w.tolist(), - "colmap_im_id": im_id, - } - if camera_mask_path is not None: - frame["mask_path"] = camera_mask_path.relative_to(camera_mask_path.parent.parent).as_posix() - if image_id_to_depth_path is not None: - depth_path = image_id_to_depth_path[im_id] - frame["depth_file_path"] = str(depth_path.relative_to(depth_path.parent.parent)) - - if not use_single_camera_mode: # add the camera parameters for this frame - frame.update(parse_colmap_camera_params(cam_id_to_camera[im_data.camera_id])) - - frames.append(frame) - - out["frames"] = frames - - applied_transform = None - if not keep_original_world_coordinate: - applied_transform = np.eye(4)[:3, :] - applied_transform = applied_transform[np.array([0, 2, 1]), :] - applied_transform[2, :] *= -1 - out["applied_transform"] = applied_transform.tolist() - - # create ply from colmap - assert ply_filename.endswith(".ply"), f"ply_filename: {ply_filename} does not end with '.ply'" - create_ply_from_colmap( - ply_filename, - recon_dir, - output_dir, - torch.from_numpy(applied_transform).float() if applied_transform is not None else None, - ) - out["ply_file_path"] = ply_filename - - with open(output_dir / "transforms.json", "w", encoding="utf-8") as f: - json.dump(out, f, indent=4) - - return len(frames) - - -def create_sfm_depth( - recon_dir: Path, - output_dir: Path, - verbose: bool = True, - depth_scale_to_integer_factor: float = 1000.0, - min_depth: float = 0.001, - max_depth: float = 10000, - max_repoj_err: float = 2.5, - min_n_visible: int = 2, - include_depth_debug: bool = False, - input_images_dir: Optional[Path] = None, -) -> Dict[int, Path]: - """Converts COLMAP's points3d.bin to sparse depth map images encoded as - 16-bit "millimeter depth" PNGs. - - Notes: - * This facility does NOT use COLMAP dense reconstruction; it creates depth - maps from sparse SfM points here. - * COLMAP does *not* reconstruct metric depth unless you give it calibrated - (metric) intrinsics as input. Therefore, "depth" in this function has - potentially ambiguous units. - - Args: - recon_dir: Path to the reconstruction directory, e.g. "sparse/0" - output_dir: Path to the output directory. - verbose: If True, logs progress of depth image creation. - depth_scale_to_integer_factor: Use this parameter to tune the conversion of - raw depth measurements to integer depth values. This value should - be equal to 1. / `depth_unit_scale_factor`, where - `depth_unit_scale_factor` is the value you provide at training time. - E.g. for millimeter depth, leave `depth_unit_scale_factor` at 1e-3 - and depth_scale_to_integer_factor at 1000. - min_depth: Discard points closer than this to the camera. - max_depth: Discard points farther than this from the camera. - max_repoj_err: Discard points with reprojection error greater than this - amount (in pixels). - min_n_visible: Discard 3D points that have been triangulated with fewer - than this many frames. - include_depth_debug: Also include debug images showing depth overlaid - upon RGB. - Returns: - Depth file paths indexed by COLMAP image id - """ - - # TODO(1480) use pycolmap - # recon = pycolmap.Reconstruction(recon_dir) - # ptid_to_info = recon.points3D - # cam_id_to_camera = recon.cameras - # im_id_to_image = recon.images - ptid_to_info = read_points3D_binary(recon_dir / "points3D.bin") - cam_id_to_camera = read_cameras_binary(recon_dir / "cameras.bin") - im_id_to_image = read_images_binary(recon_dir / "images.bin") - - # Only support first camera - CAMERA_ID = 1 - W = cam_id_to_camera[CAMERA_ID].width - H = cam_id_to_camera[CAMERA_ID].height - - if verbose: - iter_images = track( - im_id_to_image.items(), total=len(im_id_to_image.items()), description="Creating depth maps ..." - ) - else: - iter_images = iter(im_id_to_image.items()) - - image_id_to_depth_path = {} - for im_id, im_data in iter_images: - # TODO(1480) BEGIN delete when abandoning colmap_parsing_utils - pids = [pid for pid in im_data.point3D_ids if pid != -1] - xyz_world = np.array([ptid_to_info[pid].xyz for pid in pids]) - rotation = qvec2rotmat(im_data.qvec) - z = (rotation @ xyz_world.T)[-1] + im_data.tvec[-1] - errors = np.array([ptid_to_info[pid].error for pid in pids]) - n_visible = np.array([len(ptid_to_info[pid].image_ids) for pid in pids]) - uv = np.array([im_data.xys[i] for i in range(len(im_data.xys)) if im_data.point3D_ids[i] != -1]) - # TODO(1480) END delete when abandoning colmap_parsing_utils - - # TODO(1480) BEGIN use pycolmap API - - # # Get only keypoints that have corresponding triangulated 3D points - # p2ds = im_data.get_valid_points2D() - - # xyz_world = np.array([ptid_to_info[p2d.point3D_id].xyz for p2d in p2ds]) - - # # COLMAP OpenCV convention: z is always positive - # z = (im_data.rotation_matrix() @ xyz_world.T)[-1] + im_data.tvec[-1] - - # # Mean reprojection error in image space - # errors = np.array([ptid_to_info[p2d.point3D_id].error for p2d in p2ds]) - - # # Number of frames in which each frame is visible - # n_visible = np.array([ptid_to_info[p2d.point3D_id].track.length() for p2d in p2ds]) - - # Note: these are *unrectified* pixel coordinates that should match the original input - # no matter the camera model - # uv = np.array([p2d.xy for p2d in p2ds]) - - # TODO(1480) END use pycolmap API - - idx = np.where( - (z >= min_depth) - & (z <= max_depth) - & (errors <= max_repoj_err) - & (n_visible >= min_n_visible) - & (uv[:, 0] >= 0) - & (uv[:, 0] < W) - & (uv[:, 1] >= 0) - & (uv[:, 1] < H) - ) - z = z[idx] - uv = uv[idx] - - uu, vv = uv[:, 0].astype(int), uv[:, 1].astype(int) - depth = np.zeros((H, W), dtype=np.float32) - depth[vv, uu] = z - - # E.g. if `depth` is metric and in units of meters, and `depth_scale_to_integer_factor` - # is 1000, then `depth_img` will be integer millimeters. - depth_img = (depth_scale_to_integer_factor * depth).astype(np.uint16) - - out_name = str(im_data.name) - depth_path = output_dir / out_name - if depth_path.suffix == ".jpg": - depth_path = depth_path.with_suffix(".png") - cv2.imwrite(str(depth_path), depth_img) # type: ignore - - image_id_to_depth_path[im_id] = depth_path - - if include_depth_debug: - assert input_images_dir is not None, "Need explicit input_images_dir for debug images" - assert input_images_dir.exists(), input_images_dir - - depth_flat = depth.flatten()[:, None] - overlay = 255.0 * colormaps.apply_depth_colormap(torch.from_numpy(depth_flat)).numpy() - overlay = overlay.reshape([H, W, 3]) - input_image_path = input_images_dir / im_data.name - input_image = cv2.imread(str(input_image_path)) # type: ignore - debug = 0.3 * input_image + 0.7 + overlay - - out_name = out_name + ".debug.jpg" - output_path = output_dir / "debug_depth" / out_name - output_path.parent.mkdir(parents=True, exist_ok=True) - cv2.imwrite(str(output_path), debug.astype(np.uint8)) # type: ignore - - return image_id_to_depth_path - - -def get_matching_summary(num_initial_frames: int, num_matched_frames: int) -> str: - """Returns a summary of the matching results. - - Args: - num_initial_frames: The number of initial frames. - num_matched_frames: The number of matched frames. - - Returns: - A summary of the matching results. - """ - match_ratio = num_matched_frames / num_initial_frames - if match_ratio == 1: - return "[bold green]COLMAP found poses for all images, CONGRATS!" - if match_ratio < 0.4: - result = f"[bold red]COLMAP only found poses for {num_matched_frames / num_initial_frames * 100:.2f}%" - result += " of the images. This is low.\nThis can be caused by a variety of reasons," - result += " such poor scene coverage, blurry images, or large exposure changes." - return result - if match_ratio < 0.8: - result = f"[bold yellow]COLMAP only found poses for {num_matched_frames / num_initial_frames * 100:.2f}%" - result += " of the images.\nThis isn't great, but may be ok." - result += "\nMissing poses can be caused by a variety of reasons, such poor scene coverage, blurry images," - result += " or large exposure changes." - return result - return f"[bold green]COLMAP found poses for {num_matched_frames / num_initial_frames * 100:.2f}% of the images." - - -def create_ply_from_colmap( - filename: str, recon_dir: Path, output_dir: Path, applied_transform: Union[torch.Tensor, None] -) -> None: - """Writes a ply file from colmap. - - Args: - filename: file name for .ply - recon_dir: Directory to grab colmap points - output_dir: Directory to output .ply - """ - if (recon_dir / "points3D.bin").exists(): - colmap_points = read_points3D_binary(recon_dir / "points3D.bin") - elif (recon_dir / "points3D.txt").exists(): - colmap_points = read_points3D_text(recon_dir / "points3D.txt") - else: - raise ValueError(f"Could not find points3D.txt or points3D.bin in {recon_dir}") - - # Load point Positions - points3D = torch.from_numpy(np.array([p.xyz for p in colmap_points.values()], dtype=np.float32)) - if applied_transform is not None: - assert applied_transform.shape == (3, 4) - points3D = torch.einsum("ij,bj->bi", applied_transform[:3, :3], points3D) + applied_transform[:3, 3] - - # Load point colours - points3D_rgb = torch.from_numpy(np.array([p.rgb for p in colmap_points.values()], dtype=np.uint8)) - - # write ply - with open(output_dir / filename, "w") as f: - # Header - f.write("ply\n") - f.write("format ascii 1.0\n") - f.write(f"element vertex {len(points3D)}\n") - f.write("property float x\n") - f.write("property float y\n") - f.write("property float z\n") - f.write("property uint8 red\n") - f.write("property uint8 green\n") - f.write("property uint8 blue\n") - f.write("end_header\n") - - for coord, color in zip(points3D, points3D_rgb): - x, y, z = coord - r, g, b = color - f.write(f"{x:8f} {y:8f} {z:8f} {r} {g} {b}\n") diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 89f78ed53f..75878d6a5f 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -21,7 +21,7 @@ import sys from enum import Enum from pathlib import Path -from typing import List, Literal, Optional, OrderedDict, Tuple, Union +from typing import List, Literal, Optional, OrderedDict, Tuple, Union, cast import cv2 import imageio @@ -95,7 +95,7 @@ def get_image_filenames(directory: Path, max_num_images: int = -1) -> Tuple[List else: idx = np.arange(num_orig_images) - image_filenames = list(np.array(image_paths)[idx]) + image_filenames = cast(List[Path], list(np.array(image_paths)[idx])) return image_filenames, num_orig_images @@ -596,7 +596,7 @@ def generate_circle_mask(height: int, width: int, percent_radius) -> Optional[np mask = np.zeros((height, width), dtype=np.uint8) center = (width // 2, height // 2) radius = int(percent_radius * np.sqrt(width**2 + height**2) / 2.0) - cv2.circle(mask, center, radius, 1, -1) + cv2.circle(mask, center, radius, 1, -1) # type: ignore return mask diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py index b9d5d8c26e..40ca0f259e 100644 --- a/nerfstudio/scripts/downloads/eyeful_tower.py +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -120,7 +120,7 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe root = transformed.getroot() assert len(root) == 1 - chunk = root[0] + chunk = root[0] # type: ignore[reportOptionalSubscript] sensors = chunk.find("sensors") assert sensors is not None diff --git a/nerfstudio/scripts/exporter_frank.py b/nerfstudio/scripts/exporter_frank.py deleted file mode 100644 index 7d7d54be40..0000000000 --- a/nerfstudio/scripts/exporter_frank.py +++ /dev/null @@ -1,676 +0,0 @@ -# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Script for exporting NeRF into other formats. -""" - -from __future__ import annotations - -import json -import os -import sys -import typing -from collections import OrderedDict -from dataclasses import dataclass, field -from importlib.metadata import version -from pathlib import Path -from typing import List, Optional, Tuple, Union, cast - -import numpy as np -import open3d as o3d -import torch -import tyro -from typing_extensions import Annotated, Literal - -from nerfstudio.cameras.rays import RayBundle -from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanager -from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManager -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager -from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManager -from nerfstudio.data.scene_box import OrientedBox -from nerfstudio.exporter import texture_utils, tsdf_utils -from nerfstudio.exporter.exporter_utils import collect_camera_poses, generate_point_cloud, get_mesh_from_filename -from nerfstudio.exporter.marching_cubes import generate_mesh_with_multires_marching_cubes -from nerfstudio.fields.sdf_field import SDFField # noqa -from nerfstudio.models.splatfacto import SplatfactoModel -from nerfstudio.pipelines.base_pipeline import Pipeline, VanillaPipeline -from nerfstudio.utils.eval_utils import eval_setup -from nerfstudio.utils.rich_utils import CONSOLE - - -@dataclass -class Exporter: - """Export the mesh from a YML config to a folder.""" - - load_config: Path - """Path to the config YAML file.""" - output_dir: Path - """Path to the output directory.""" - - -def validate_pipeline(normal_method: str, normal_output_name: str, pipeline: Pipeline) -> None: - """Check that the pipeline is valid for this exporter. - - Args: - normal_method: Method to estimate normals with. Either "open3d" or "model_output". - normal_output_name: Name of the normal output. - pipeline: Pipeline to evaluate with. - """ - if normal_method == "model_output": - CONSOLE.print("Checking that the pipeline has a normal output.") - origins = torch.zeros((1, 3), device=pipeline.device) - directions = torch.ones_like(origins) - pixel_area = torch.ones_like(origins[..., :1]) - camera_indices = torch.zeros_like(origins[..., :1]) - metadata = {"directions_norm": torch.linalg.vector_norm(directions, dim=-1, keepdim=True)} - ray_bundle = RayBundle( - origins=origins, - directions=directions, - pixel_area=pixel_area, - camera_indices=camera_indices, - metadata=metadata, - ) - outputs = pipeline.model(ray_bundle) - if normal_output_name not in outputs: - CONSOLE.print(f"[bold yellow]Warning: Normal output '{normal_output_name}' not found in pipeline outputs.") - CONSOLE.print(f"Available outputs: {list(outputs.keys())}") - CONSOLE.print( - "[bold yellow]Warning: Please train a model with normals " - "(e.g., nerfacto with predicted normals turned on)." - ) - CONSOLE.print("[bold yellow]Warning: Or change --normal-method") - CONSOLE.print("[bold yellow]Exiting early.") - sys.exit(1) - - -@dataclass -class ExportPointCloud(Exporter): - """Export NeRF as a point cloud.""" - - num_points: int = 1000000 - """Number of points to generate. May result in less if outlier removal is used.""" - remove_outliers: bool = True - """Remove outliers from the point cloud.""" - reorient_normals: bool = True - """Reorient point cloud normals based on view direction.""" - normal_method: Literal["open3d", "model_output"] = "model_output" - """Method to estimate normals with.""" - normal_output_name: str = "normals" - """Name of the normal output.""" - depth_output_name: str = "depth" - """Name of the depth output.""" - rgb_output_name: str = "rgb" - """Name of the RGB output.""" - - obb_center: Optional[Tuple[float, float, float]] = None - """Center of the oriented bounding box.""" - obb_rotation: Optional[Tuple[float, float, float]] = None - """Rotation of the oriented bounding box. Expressed as RPY Euler angles in radians""" - obb_scale: Optional[Tuple[float, float, float]] = None - """Scale of the oriented bounding box along each axis.""" - num_rays_per_batch: int = 32768 - """Number of rays to evaluate per batch. Decrease if you run out of memory.""" - std_ratio: float = 10.0 - """Threshold based on STD of the average distances across the point cloud to remove outliers.""" - save_world_frame: bool = False - """If set, saves the point cloud in the same frame as the original dataset. Otherwise, uses the - scaled and reoriented coordinate space expected by the NeRF models.""" - - def main(self) -> None: - """Export point cloud.""" - - if not self.output_dir.exists(): - self.output_dir.mkdir(parents=True) - - _, pipeline, _, _ = eval_setup(self.load_config) - - validate_pipeline(self.normal_method, self.normal_output_name, pipeline) - - # Increase the batchsize to speed up the evaluation. - assert isinstance( - pipeline.datamanager, - (VanillaDataManager, ParallelDataManager,FullImageDatamanager, RandomCamerasDataManager)) - if isinstance(pipeline.datamanager, VanillaDataManager): - assert pipeline.datamanager.train_pixel_sampler is not None - pipeline.datamanager.train_pixel_sampler.num_rays_per_batch = self.num_rays_per_batch - - # Whether the normals should be estimated based on the point cloud. - estimate_normals = self.normal_method == "open3d" - crop_obb = None - if self.obb_center is not None and self.obb_rotation is not None and self.obb_scale is not None: - crop_obb = OrientedBox.from_params(self.obb_center, self.obb_rotation, self.obb_scale) - pcd = generate_point_cloud( - pipeline=pipeline, - num_points=self.num_points, - remove_outliers=self.remove_outliers, - reorient_normals=self.reorient_normals, - estimate_normals=estimate_normals, - rgb_output_name=self.rgb_output_name, - depth_output_name=self.depth_output_name, - normal_output_name=self.normal_output_name if self.normal_method == "model_output" else None, - crop_obb=crop_obb, - std_ratio=self.std_ratio, - ) - if self.save_world_frame: - # apply the inverse dataparser transform to the point cloud - points = np.asarray(pcd.points) - poses = np.eye(4, dtype=np.float32)[None, ...].repeat(points.shape[0], axis=0)[:, :3, :] - poses[:, :3, 3] = points - poses = pipeline.datamanager.train_dataparser_outputs.transform_poses_to_original_space( - torch.from_numpy(poses) - ) - points = poses[:, :3, 3].numpy() - pcd.points = o3d.utility.Vector3dVector(points) - - torch.cuda.empty_cache() - - CONSOLE.print(f"[bold green]:white_check_mark: Generated {pcd}") - CONSOLE.print("Saving Point Cloud...") - tpcd = o3d.t.geometry.PointCloud.from_legacy(pcd) - # The legacy PLY writer converts colors to UInt8, - # let us do the same to save space. - tpcd.point.colors = (tpcd.point.colors * 255).to(o3d.core.Dtype.UInt8) # type: ignore - o3d.t.io.write_point_cloud(str(self.output_dir / "point_cloud.ply"), tpcd) - print("\033[A\033[A") - CONSOLE.print("[bold green]:white_check_mark: Saving Point Cloud") - - -@dataclass -class ExportTSDFMesh(Exporter): - """ - Export a mesh using TSDF processing. - """ - - downscale_factor: int = 2 - """Downscale the images starting from the resolution used for training.""" - depth_output_name: str = "depth" - """Name of the depth output.""" - rgb_output_name: str = "rgb" - """Name of the RGB output.""" - resolution: Union[int, List[int]] = field(default_factory=lambda: [128, 128, 128]) - """Resolution of the TSDF volume or [x, y, z] resolutions individually.""" - batch_size: int = 10 - """How many depth images to integrate per batch.""" - use_bounding_box: bool = True - """Whether to use a bounding box for the TSDF volume.""" - bounding_box_min: Tuple[float, float, float] = (-1, -1, -1) - """Minimum of the bounding box, used if use_bounding_box is True.""" - bounding_box_max: Tuple[float, float, float] = (1, 1, 1) - """Minimum of the bounding box, used if use_bounding_box is True.""" - texture_method: Literal["tsdf", "nerf"] = "nerf" - """Method to texture the mesh with. Either 'tsdf' or 'nerf'.""" - px_per_uv_triangle: int = 4 - """Number of pixels per UV triangle.""" - unwrap_method: Literal["xatlas", "custom"] = "xatlas" - """The method to use for unwrapping the mesh.""" - num_pixels_per_side: int = 2048 - """If using xatlas for unwrapping, the pixels per side of the texture image.""" - target_num_faces: Optional[int] = 50000 - """Target number of faces for the mesh to texture.""" - refine_mesh_using_initial_aabb_estimate: bool = False - """Refine the mesh using the initial AABB estimate.""" - refinement_epsilon: float = 1e-2 - """Refinement epsilon for the mesh. This is the distance in meters that the refined AABB/OBB will be expanded by - in each direction.""" - - def main(self) -> None: - """Export mesh""" - - if not self.output_dir.exists(): - self.output_dir.mkdir(parents=True) - - _, pipeline, _, _ = eval_setup(self.load_config) - - tsdf_utils.export_tsdf_mesh( - pipeline, - self.output_dir, - self.downscale_factor, - self.depth_output_name, - self.rgb_output_name, - self.resolution, - self.batch_size, - use_bounding_box=self.use_bounding_box, - bounding_box_min=self.bounding_box_min, - bounding_box_max=self.bounding_box_max, - refine_mesh_using_initial_aabb_estimate=self.refine_mesh_using_initial_aabb_estimate, - refinement_epsilon=self.refinement_epsilon, - ) - - # possibly - # texture the mesh with NeRF and export to a mesh.obj file - # and a material and texture file - if self.texture_method == "nerf": - # load the mesh from the tsdf export - mesh = get_mesh_from_filename( - str(self.output_dir / "tsdf_mesh.ply"), target_num_faces=self.target_num_faces - ) - CONSOLE.print("Texturing mesh with NeRF") - texture_utils.export_textured_mesh( - mesh, - pipeline, - self.output_dir, - px_per_uv_triangle=self.px_per_uv_triangle if self.unwrap_method == "custom" else None, - unwrap_method=self.unwrap_method, - num_pixels_per_side=self.num_pixels_per_side, - ) - - -@dataclass -class ExportPoissonMesh(Exporter): - """ - Export a mesh using poisson surface reconstruction. - """ - - num_points: int = 1000000 - """Number of points to generate. May result in less if outlier removal is used.""" - remove_outliers: bool = True - """Remove outliers from the point cloud.""" - reorient_normals: bool = True - """Reorient point cloud normals based on view direction.""" - depth_output_name: str = "depth" - """Name of the depth output.""" - rgb_output_name: str = "rgb" - """Name of the RGB output.""" - normal_method: Literal["open3d", "model_output"] = "model_output" - """Method to estimate normals with.""" - normal_output_name: str = "normals" - """Name of the normal output.""" - save_point_cloud: bool = False - """Whether to save the point cloud.""" - obb_center: Optional[Tuple[float, float, float]] = None - """Center of the oriented bounding box.""" - obb_rotation: Optional[Tuple[float, float, float]] = None - """Rotation of the oriented bounding box. Expressed as RPY Euler angles in radians""" - obb_scale: Optional[Tuple[float, float, float]] = None - """Scale of the oriented bounding box along each axis.""" - num_rays_per_batch: int = 32768 - """Number of rays to evaluate per batch. Decrease if you run out of memory.""" - texture_method: Literal["point_cloud", "nerf"] = "nerf" - """Method to texture the mesh with. Either 'point_cloud' or 'nerf'.""" - px_per_uv_triangle: int = 4 - """Number of pixels per UV triangle.""" - unwrap_method: Literal["xatlas", "custom"] = "xatlas" - """The method to use for unwrapping the mesh.""" - num_pixels_per_side: int = 2048 - """If using xatlas for unwrapping, the pixels per side of the texture image.""" - target_num_faces: Optional[int] = 50000 - """Target number of faces for the mesh to texture.""" - std_ratio: float = 10.0 - """Threshold based on STD of the average distances across the point cloud to remove outliers.""" - - def main(self) -> None: - """Export mesh""" - - if not self.output_dir.exists(): - self.output_dir.mkdir(parents=True) - - _, pipeline, _, _ = eval_setup(self.load_config) - - validate_pipeline(self.normal_method, self.normal_output_name, pipeline) - - # Increase the batchsize to speed up the evaluation. - assert isinstance( - pipeline.datamanager, - (VanillaDataManager, ParallelDataManager,FullImageDatamanager,RandomCamerasDataManager)) - if isinstance(pipeline.datamanager, VanillaDataManager): - assert pipeline.datamanager.train_pixel_sampler is not None - pipeline.datamanager.train_pixel_sampler.num_rays_per_batch = self.num_rays_per_batch - - # Whether the normals should be estimated based on the point cloud. - estimate_normals = self.normal_method == "open3d" - if self.obb_center is not None and self.obb_rotation is not None and self.obb_scale is not None: - crop_obb = OrientedBox.from_params(self.obb_center, self.obb_rotation, self.obb_scale) - else: - crop_obb = None - - pcd = generate_point_cloud( - pipeline=pipeline, - num_points=self.num_points, - remove_outliers=self.remove_outliers, - reorient_normals=self.reorient_normals, - estimate_normals=estimate_normals, - rgb_output_name=self.rgb_output_name, - depth_output_name=self.depth_output_name, - normal_output_name=self.normal_output_name if self.normal_method == "model_output" else None, - crop_obb=crop_obb, - std_ratio=self.std_ratio, - ) - torch.cuda.empty_cache() - CONSOLE.print(f"[bold green]:white_check_mark: Generated {pcd}") - - if self.save_point_cloud: - CONSOLE.print("Saving Point Cloud...") - o3d.io.write_point_cloud(str(self.output_dir / "point_cloud.ply"), pcd) - print("\033[A\033[A") - CONSOLE.print("[bold green]:white_check_mark: Saving Point Cloud") - - CONSOLE.print("Computing Mesh... this may take a while.") - mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=9) - vertices_to_remove = densities < np.quantile(densities, 0.1) - mesh.remove_vertices_by_mask(vertices_to_remove) - print("\033[A\033[A") - CONSOLE.print("[bold green]:white_check_mark: Computing Mesh") - - CONSOLE.print("Saving Mesh...") - o3d.io.write_triangle_mesh(str(self.output_dir / "poisson_mesh.ply"), mesh) - print("\033[A\033[A") - CONSOLE.print("[bold green]:white_check_mark: Saving Mesh") - - # This will texture the mesh with NeRF and export to a mesh.obj file - # and a material and texture file - if self.texture_method == "nerf": - # load the mesh from the poisson reconstruction - mesh = get_mesh_from_filename( - str(self.output_dir / "poisson_mesh.ply"), target_num_faces=self.target_num_faces - ) - CONSOLE.print("Texturing mesh with NeRF") - texture_utils.export_textured_mesh( - mesh, - pipeline, - self.output_dir, - px_per_uv_triangle=self.px_per_uv_triangle if self.unwrap_method == "custom" else None, - unwrap_method=self.unwrap_method, - num_pixels_per_side=self.num_pixels_per_side, - ) - - -@dataclass -class ExportMarchingCubesMesh(Exporter): - """Export a mesh using marching cubes.""" - - isosurface_threshold: float = 0.0 - """The isosurface threshold for extraction. For SDF based methods the surface is the zero level set.""" - resolution: int = 1024 - """Marching cube resolution.""" - simplify_mesh: bool = False - """Whether to simplify the mesh.""" - bounding_box_min: Tuple[float, float, float] = (-1.0, -1.0, -1.0) - """Minimum of the bounding box.""" - bounding_box_max: Tuple[float, float, float] = (1.0, 1.0, 1.0) - """Maximum of the bounding box.""" - px_per_uv_triangle: int = 4 - """Number of pixels per UV triangle.""" - unwrap_method: Literal["xatlas", "custom"] = "xatlas" - """The method to use for unwrapping the mesh.""" - num_pixels_per_side: int = 2048 - """If using xatlas for unwrapping, the pixels per side of the texture image.""" - target_num_faces: Optional[int] = 50000 - """Target number of faces for the mesh to texture.""" - - def main(self) -> None: - """Main function.""" - if not self.output_dir.exists(): - self.output_dir.mkdir(parents=True) - - _, pipeline, _, _ = eval_setup(self.load_config) - - # TODO: Make this work with Density Field - assert hasattr(pipeline.model.config, "sdf_field"), "Model must have an SDF field." - - CONSOLE.print("Extracting mesh with marching cubes... which may take a while") - - assert self.resolution % 512 == 0, f"""resolution must be divisible by 512, got {self.resolution}. - This is important because the algorithm uses a multi-resolution approach - to evaluate the SDF where the minimum resolution is 512.""" - - # Extract mesh using marching cubes for sdf at a multi-scale resolution. - multi_res_mesh = generate_mesh_with_multires_marching_cubes( - geometry_callable_field=lambda x: cast(SDFField, pipeline.model.field) - .forward_geonetwork(x)[:, 0] - .contiguous(), - resolution=self.resolution, - bounding_box_min=self.bounding_box_min, - bounding_box_max=self.bounding_box_max, - isosurface_threshold=self.isosurface_threshold, - coarse_mask=None, - ) - filename = self.output_dir / "sdf_marching_cubes_mesh.ply" - multi_res_mesh.export(filename) - - # load the mesh from the marching cubes export - mesh = get_mesh_from_filename(str(filename), target_num_faces=self.target_num_faces) - CONSOLE.print("Texturing mesh with NeRF...") - texture_utils.export_textured_mesh( - mesh, - pipeline, - self.output_dir, - px_per_uv_triangle=self.px_per_uv_triangle if self.unwrap_method == "custom" else None, - unwrap_method=self.unwrap_method, - num_pixels_per_side=self.num_pixels_per_side, - ) - - -@dataclass -class ExportCameraPoses(Exporter): - """ - Export camera poses to a .json file. - """ - - def main(self) -> None: - """Export camera poses""" - if not self.output_dir.exists(): - self.output_dir.mkdir(parents=True) - - _, pipeline, _, _ = eval_setup(self.load_config) - assert isinstance(pipeline, VanillaPipeline) - train_frames, eval_frames = collect_camera_poses(pipeline) - - for file_name, frames in [("transforms_train.json", train_frames), ("transforms_eval.json", eval_frames)]: - if len(frames) == 0: - CONSOLE.print(f"[bold yellow]No frames found for {file_name}. Skipping.") - continue - - output_file_path = os.path.join(self.output_dir, file_name) - - with open(output_file_path, "w", encoding="UTF-8") as f: - json.dump(frames, f, indent=4) - - CONSOLE.print(f"[bold green]:white_check_mark: Saved poses to {output_file_path}") - - -@dataclass -class ExportGaussianSplat(Exporter): - """ - Export 3D Gaussian Splatting model to a .ply - """ - - output_filename: str = "splat.ply" - """Name of the output file.""" - obb_center: Optional[Tuple[float, float, float]] = None - """Center of the oriented bounding box.""" - obb_rotation: Optional[Tuple[float, float, float]] = None - """Rotation of the oriented bounding box. Expressed as RPY Euler angles in radians""" - obb_scale: Optional[Tuple[float, float, float]] = None - """Scale of the oriented bounding box along each axis.""" - ply_color_mode: Literal["sh_coeffs", "rgb"] = "sh_coeffs" - """If "rgb", export colors as red/green/blue fields. Otherwise, export colors as - spherical harmonics coefficients.""" - - @staticmethod - def write_ply( - filename: str, - count: int, - map_to_tensors: typing.OrderedDict[str, np.ndarray], - ): - """ - Writes a PLY file with given vertex properties and a tensor of float or uint8 values in the order specified by the OrderedDict. - Note: All float values will be converted to float32 for writing. - - Parameters: - filename (str): The name of the file to write. - count (int): The number of vertices to write. - map_to_tensors (OrderedDict[str, np.ndarray]): An ordered dictionary mapping property names to numpy arrays of float or uint8 values. - Each array should be 1-dimensional and of equal length matching 'count'. Arrays should not be empty. - """ - - # Ensure count matches the length of all tensors - if not all(tensor.size == count for tensor in map_to_tensors.values()): - raise ValueError("Count does not match the length of all tensors") - - # Type check for numpy arrays of type float or uint8 and non-empty - if not all( - isinstance(tensor, np.ndarray) - and (tensor.dtype.kind == "f" or tensor.dtype == np.uint8) - and tensor.size > 0 - for tensor in map_to_tensors.values() - ): - raise ValueError("All tensors must be numpy arrays of float or uint8 type and not empty") - - with open(filename, "wb") as ply_file: - nerfstudio_version = version("nerfstudio") - # Write PLY header - ply_file.write(b"ply\n") - ply_file.write(b"format binary_little_endian 1.0\n") - ply_file.write(f"comment Generated by Nerstudio {nerfstudio_version}\n".encode()) - ply_file.write(b"comment Vertical Axis: z\n") - ply_file.write(f"element vertex {count}\n".encode()) - - # Write properties, in order due to OrderedDict - for key, tensor in map_to_tensors.items(): - data_type = "float" if tensor.dtype.kind == "f" else "uchar" - ply_file.write(f"property {data_type} {key}\n".encode()) - - ply_file.write(b"end_header\n") - - # Write binary data - # Note: If this is a performance bottleneck consider using numpy.hstack for efficiency improvement - for i in range(count): - for tensor in map_to_tensors.values(): - value = tensor[i] - if tensor.dtype.kind == "f": - ply_file.write(np.float32(value).tobytes()) - elif tensor.dtype == np.uint8: - ply_file.write(value.tobytes()) - - def main(self) -> None: - if not self.output_dir.exists(): - self.output_dir.mkdir(parents=True) - - _, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference") - - assert isinstance(pipeline.model, SplatfactoModel) - - model: SplatfactoModel = pipeline.model - - filename = self.output_dir / self.output_filename - - map_to_tensors = OrderedDict() - - with torch.no_grad(): - positions = model.means.cpu().numpy() - count = positions.shape[0] - n = count - map_to_tensors["x"] = positions[:, 0] - map_to_tensors["y"] = positions[:, 1] - map_to_tensors["z"] = positions[:, 2] - map_to_tensors["nx"] = np.zeros(n, dtype=np.float32) - map_to_tensors["ny"] = np.zeros(n, dtype=np.float32) - map_to_tensors["nz"] = np.zeros(n, dtype=np.float32) - - if self.ply_color_mode == "rgb": - colors = torch.clamp(model.colors.clone(), 0.0, 1.0).data.cpu().numpy() - colors = (colors * 255).astype(np.uint8) - map_to_tensors["red"] = colors[:, 0] - map_to_tensors["green"] = colors[:, 1] - map_to_tensors["blue"] = colors[:, 2] - elif self.ply_color_mode == "sh_coeffs": - shs_0 = model.shs_0.contiguous().cpu().numpy() - for i in range(shs_0.shape[1]): - map_to_tensors[f"f_dc_{i}"] = shs_0[:, i, None] - - if model.config.sh_degree > 0: - if self.ply_color_mode == "rgb": - CONSOLE.print( - "Warning: model has higher level of spherical harmonics, ignoring them and only export rgb." - ) - elif self.ply_color_mode == "sh_coeffs": - # transpose(1, 2) was needed to match the sh order in Inria version - shs_rest = model.shs_rest.transpose(1, 2).contiguous().cpu().numpy() - shs_rest = shs_rest.reshape((n, -1)) - for i in range(shs_rest.shape[-1]): - map_to_tensors[f"f_rest_{i}"] = shs_rest[:, i, None] - - map_to_tensors["opacity"] = model.opacities.data.cpu().numpy() - - scales = model.scales.data.cpu().numpy() - for i in range(3): - map_to_tensors[f"scale_{i}"] = scales[:, i, None] - - quats = model.quats.data.cpu().numpy() - for i in range(4): - map_to_tensors[f"rot_{i}"] = quats[:, i, None] - - if self.obb_center is not None and self.obb_rotation is not None and self.obb_scale is not None: - crop_obb = OrientedBox.from_params(self.obb_center, self.obb_rotation, self.obb_scale) - assert crop_obb is not None - mask = crop_obb.within(torch.from_numpy(positions)).numpy() - for k, t in map_to_tensors.items(): - map_to_tensors[k] = map_to_tensors[k][mask] - - n = map_to_tensors["x"].shape[0] - count = n - - # post optimization, it is possible have NaN/Inf values in some attributes - # to ensure the exported ply file has finite values, we enforce finite filters. - select = np.ones(n, dtype=bool) - for k, t in map_to_tensors.items(): - n_before = np.sum(select) - select = np.logical_and(select, np.isfinite(t).all(axis=-1)) - n_after = np.sum(select) - if n_after < n_before: - CONSOLE.print(f"{n_before - n_after} NaN/Inf elements in {k}") - nan_count = np.sum(select) - n - - # filter gaussians that have opacities < 1/255, because they are skipped in cuda rasterization - low_opacity_gaussians = (map_to_tensors["opacity"]).squeeze(axis=-1) < -5.5373 # logit(1/255) - lowopa_count = np.sum(low_opacity_gaussians) - select[low_opacity_gaussians] = 0 - - if np.sum(select) < n: - CONSOLE.print( - f"{nan_count} Gaussians have NaN/Inf and {lowopa_count} have low opacity, only export {np.sum(select)}/{n}" - ) - for k, t in map_to_tensors.items(): - map_to_tensors[k] = map_to_tensors[k][select] - count = np.sum(select) - - ExportGaussianSplat.write_ply(str(filename), count, map_to_tensors) - - -Commands = tyro.conf.FlagConversionOff[ - Union[ - Annotated[ExportPointCloud, tyro.conf.subcommand(name="pointcloud")], - Annotated[ExportTSDFMesh, tyro.conf.subcommand(name="tsdf")], - Annotated[ExportPoissonMesh, tyro.conf.subcommand(name="poisson")], - Annotated[ExportMarchingCubesMesh, tyro.conf.subcommand(name="marching-cubes")], - Annotated[ExportCameraPoses, tyro.conf.subcommand(name="cameras")], - Annotated[ExportGaussianSplat, tyro.conf.subcommand(name="gaussian-splat")], - ] -] - - -def entrypoint(): - """Entrypoint for use with pyproject scripts.""" - tyro.extras.set_accent_color("bright_yellow") - tyro.cli(Commands).main() - - -if __name__ == "__main__": - entrypoint() - - -def get_parser_fn(): - """Get the parser function for the sphinx docs.""" - return tyro.extras.get_parser(Commands) # noqa diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py index 1fdd36f7f2..4825bfc786 100644 --- a/nerfstudio/scripts/process_data.py +++ b/nerfstudio/scripts/process_data.py @@ -19,7 +19,7 @@ import zipfile from dataclasses import dataclass from pathlib import Path -from typing import Optional, Union +from typing import List, Optional, Union, cast import numpy as np import tyro @@ -90,7 +90,7 @@ def main(self) -> None: record3d_image_filenames = list(np.array(record3d_image_filenames)[idx]) # Copy images to output directory copied_image_paths = process_data_utils.copy_images_list( - record3d_image_filenames, + cast(List[Path], record3d_image_filenames), image_dir=image_dir, verbose=self.verbose, num_downscales=self.num_downscales, diff --git a/nerfstudio/scripts/train.py b/nerfstudio/scripts/train.py index dd2300568a..bcaf92aba7 100644 --- a/nerfstudio/scripts/train.py +++ b/nerfstudio/scripts/train.py @@ -215,6 +215,7 @@ def launch( process_context.join() except KeyboardInterrupt: for i, process in enumerate(process_context.processes): + assert process is not None if process.is_alive(): CONSOLE.log(f"Terminating process {i}...") process.terminate() diff --git a/nerfstudio/utils/colormaps.py b/nerfstudio/utils/colormaps.py index 0a790e1237..c771178c2a 100644 --- a/nerfstudio/utils/colormaps.py +++ b/nerfstudio/utils/colormaps.py @@ -111,7 +111,10 @@ def apply_float_colormap(image: Float[Tensor, "*bs 1"], colormap: Colormaps = "v image_long_max = torch.max(image_long) assert image_long_min >= 0, f"the min value is {image_long_min}" assert image_long_max <= 255, f"the max value is {image_long_max}" - return torch.tensor(matplotlib.colormaps[colormap].colors, device=image.device)[image_long[..., 0]] + return torch.tensor( + matplotlib.colormaps[colormap].colors, # type: ignore + device=image.device, + )[image_long[..., 0]] def apply_depth_colormap( diff --git a/nerfstudio/utils/misc.py b/nerfstudio/utils/misc.py index f55e1259a3..3f71adb7ed 100644 --- a/nerfstudio/utils/misc.py +++ b/nerfstudio/utils/misc.py @@ -44,7 +44,7 @@ def get_dict_to_torch(stuff: T, device: Union[torch.device, str] = "cpu", exclud stuff[k] = get_dict_to_torch(v, device) return stuff if isinstance(stuff, torch.Tensor): - return stuff.to(device) + return stuff.to(device) # type: ignore[reportReturnType] return stuff @@ -59,7 +59,7 @@ def get_dict_to_cpu(stuff: T) -> T: stuff[k] = get_dict_to_cpu(v) return stuff if isinstance(stuff, torch.Tensor): - return stuff.detach().cpu() + return stuff.detach().cpu() # type: ignore[reportReturnType] return stuff diff --git a/nerfstudio/utils/profiler.py b/nerfstudio/utils/profiler.py index b3046aa73f..48a8be96dc 100644 --- a/nerfstudio/utils/profiler.py +++ b/nerfstudio/utils/profiler.py @@ -58,7 +58,7 @@ def time_function(name_or_func: Union[CallableT, str]) -> Union[CallableT, Conte Returns: A wrapped function or context to use in a `with` statement. """ - return _TimeFunction(name_or_func) + return _TimeFunction(name_or_func) # type: ignore[reportReturnType] class _TimeFunction(ContextDecorator): diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 83e3407576..9369336e89 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -367,7 +367,9 @@ def update_spline(self) -> None: points_array = self._position_spline.evaluate( self.spline_t_from_t_sec(np.linspace(0, transition_times_cumsum[-1], num_frames)) ) - colors_array = np.array([colorsys.hls_to_rgb(h, 0.5, 1.0) for h in np.linspace(0.0, 1.0, len(points_array))]) + colors_array = np.array( + [colorsys.hls_to_rgb(float(h), 0.5, 1.0) for h in np.linspace(0.0, 1.0, len(points_array))], + ) # Clear prior spline nodes. for node in self._spline_nodes: @@ -377,7 +379,7 @@ def update_spline(self) -> None: self._spline_nodes.append( self._server.scene.add_spline_catmull_rom( "/render_camera_spline", - positions=points_array, + points=points_array, color=(220, 220, 220), closed=self.loop, line_width=1.0, diff --git a/nerfstudio/viewer/render_state_machine.py b/nerfstudio/viewer/render_state_machine.py index 32448d9bc8..ced692d453 100644 --- a/nerfstudio/viewer/render_state_machine.py +++ b/nerfstudio/viewer/render_state_machine.py @@ -147,10 +147,9 @@ def _render_img(self, camera_state: CameraState): [color[0] / 255.0, color[1] / 255.0, color[2] / 255.0], device=self.viewer.get_model().device, ) - self.viewer.get_model().set_background(background_color) + self.viewer.get_model().set_background(background_color) # type: ignore[reportCallIssue] was_training = self.viewer.get_model().training self.viewer.get_model().eval() - step = self.viewer.step try: if self.viewer.control_panel.crop_viewport: color = self.viewer.control_panel.background_color @@ -201,7 +200,10 @@ def _render_img(self, camera_state: CameraState): render_time = vis_t.duration if writer.is_initialized() and render_time != 0: writer.put_time( - name=EventName.VIS_RAYS_PER_SEC, duration=num_rays / render_time, step=step, avg_over_steps=True + name=EventName.VIS_RAYS_PER_SEC, + duration=num_rays / render_time, + step=self.viewer.step, + avg_over_steps=True, ) return outputs diff --git a/pyproject.toml b/pyproject.toml index 7509ba1290..585eee4f9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ dependencies = [ "torchvision>=0.14.1", "torchmetrics[image]>=1.0.1", "typing_extensions>=4.4.0", - "viser==0.2.7", + "viser==1.0.0", "nuscenes-devkit>=1.1.1", "wandb>=0.13.3", "xatlas", @@ -93,7 +93,7 @@ dev = [ "pre-commit==3.3.2", "pytest==7.1.2", "pytest-xdist==2.5.0", - "ruff>=0.6.1", + "ruff==0.12.2", "sshconf==0.2.5", "pycolmap>=0.3.0", # NOTE: pycolmap==0.3.0 is not available on newer python versions "diffusers==0.16.1", @@ -103,8 +103,7 @@ dev = [ # NOTE: Disabling projectaria-tools because it doesn't have prebuilt windows wheels # Syntax comes from here: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ "projectaria-tools>=1.3.1; sys_platform != 'win32'", - # pin torch to <=2.1 to fix https://github.com/pytorch/pytorch/issues/118736 - "torch>=1.13.1,<2.2", + "torch==2.7.1", "awscli==1.33.18" ] @@ -161,7 +160,6 @@ reportMissingImports = "warning" reportMissingTypeStubs = false reportPrivateImportUsage = false -pythonVersion = "3.8" pythonPlatform = "Linux" [tool.ruff] @@ -185,6 +183,7 @@ lint.ignore = [ "PLR0915", # Too many statements. "PLR0913", # Too many arguments. "PLC0414", # Import alias does not rename variable. (this is used for exporting names) + "PLC0415", # `import` should be at the top-level of a file "PLC1901", # Use falsey strings. "PLR5501", # Use `elif` instead of `else if`. "PLR0911", # Too many return statements. diff --git a/test_cli.bat b/test_cli.bat index a315890a05..b04e03822b 100644 --- a/test_cli.bat +++ b/test_cli.bat @@ -2,6 +2,10 @@ echo. echo ========== Nerfstudio CLI Validator ========== echo. +@echo off +REM Ensure UTF-8 output +chcp 65001 > nul +set PYTHONIOENCODING=utf-8 REM Check if CLI works echo Running: ns-train --help @@ -31,4 +35,4 @@ echo. echo ✅ CLI appears to be working correctly. :end -pause \ No newline at end of file +pause diff --git a/test_cli.py b/test_cli.py index 21f4d449c5..8cd3ec2aea 100644 --- a/test_cli.py +++ b/test_cli.py @@ -1,7 +1,14 @@ +# test_cli.py import subprocess import sys import shutil +import io +import os +# Force UTF-8 encoding +sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='utf-8') +sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding='utf-8') +os.environ["PYTHONIOENCODING"] = "utf-8" # Optional expected modules to validate trainer registration EXPECTED_TRAINERS = [ @@ -15,7 +22,7 @@ def run_command(cmd, description=""): print(f"\n🔹 Running: {cmd}") try: - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, encoding='utf-8') if result.returncode != 0: print(f"❌ Failed: {description or cmd}") print(result.stderr) @@ -26,15 +33,13 @@ def run_command(cmd, description=""): print(f"❌ Exception while running {cmd}:\n{e}") return False - def command_exists(command): return shutil.which(command) is not None - def main(): print("🔧 Nerfstudio CLI Validation Tool\n") - # 1. Check if CLI commands exist + # 1. Check CLI tools for cmd in ["ns-train", "ns-viewer", "ns-process-data"]: if not command_exists(cmd): print(f"❌ Command not found: {cmd}. Is nerfstudio CLI installed?") @@ -42,11 +47,11 @@ def main(): else: print(f"✔️ Found CLI command: {cmd}") - # 2. Check basic help commands + # 2. CLI Help Check if not run_command("ns-train --help", "Check ns-train help"): return if not run_command("ns-viewer --help", "Check ns-viewer help"): return - # 3. Parse trainer list + # 3. Trainer Parsing print("\n📋 Checking registered trainers:") output = run_command("ns-train --help", "List trainers") if not output: return @@ -64,7 +69,7 @@ def main(): for t in trainers: print(f" - {t}") - # 4. Check for expected trainers (from add-ons) + # 4. Validate Addons print("\n🔍 Verifying addon trainer registration:") missing = [] for expected in EXPECTED_TRAINERS: @@ -81,6 +86,5 @@ def main(): print("\n✅ CLI Validation Complete.") - if __name__ == "__main__": main()