diff --git a/.gitmodules b/.gitmodules index d48532b..cafb9ac 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,6 @@ [submodule "SIBR_viewers"] path = SIBR_viewers url = https://gitlab.inria.fr/sibr/sibr_core.git +[submodule "submodules/fused-ssim"] + path = submodules/fused-ssim + url = https://github.com/rahul-goel/fused-ssim.git diff --git a/README.md b/README.md index 525912c..7a3484e 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,15 @@ This research was funded by the ERC Advanced grant FUNGRAPH No 788065. The autho ## NEW FEATURES ! -We have limited resources for maintaining and updating the code. However, we have added a few new features since the original release that are inspired by some of the excellent work many other researchers have been doing on 3DGS. We will be adding other features within the ability of our resources. +We have limited resources for maintaining and updating the code. However, we have added a few new features since the original release that are inspired by some of the excellent work many other researchers have been doing on 3DGS. We will be adding other features within the ability of our resources. -Update of August 2024: -We have added/corrected the following features: [Depth regularization](#depth-regularization) for training, [anti-aliasing](#anti-aliasing) and [exposure compensation](#exposure-compensation). We have enhanced the SIBR real time viewer by correcting bugs and adding features in the [Top View](#sibr-top-view) that allows visualization of input and user cameras. Please note that it is currently not possible to use depth regularization with the training speed acceleration since they use different rasterizer versions. +**Update of October 2024**: We integrated [training speed acceleration](#training-speed-acceleration) and made it compatible with [depth regularization](#depth-regularization), [anti-aliasing](#anti-aliasing) and [exposure compensation](#exposure-compensation). -Update of Spring 2024: + +**Update of August 2024**: +We have added/corrected the following features: [depth regularization](#depth-regularization) for training, [anti-aliasing](#anti-aliasing) and [exposure compensation](#exposure-compensation). We have enhanced the SIBR real time viewer by correcting bugs and adding features in the [Top View](#sibr-top-view) that allows visualization of input and user cameras. Please note that it is currently not possible to use depth regularization with the training speed acceleration since they use different rasterizer versions. + +**Update of Spring 2024**: Orange Labs has kindly added [OpenXR support](#openxr-support) for VR viewing. ## Step-by-step Tutorial @@ -492,11 +495,34 @@ python convert.py -s --skip_matching [--resize] #If not resizing, Ima
+### Training speed acceleration + +We integrated the drop-in replacements from [Taming-3dgs](https://humansensinglab.github.io/taming-3dgs/)1 with [fused ssim](https://github.com/rahul-goel/fused-ssim/tree/main) into the original codebase to speed up training times. Once installed, the accelerated rasterizer delivers a **$\times$ 1.6 training time speedup** using `--optimizer_type default` and a **$\times$ 2.7 training time speedup** using `--optimizer_type sparse_adam`. + +To get faster training times you must first install the accelerated rasterizer to your environment: + +```bash +pip uninstall diff-gaussian-rasterization -y +cd submodules/diff-gaussian-rasterization +rm -r build +git checkout 3dgs_accel +pip install . +``` + +Then you can add the following parameter to use the sparse adam optimizer when running `train.py`: + +```bash +--optimizer_type sparse_adam +``` + +*Note that this custom rasterizer has a different behaviour than the original version, for more details on training times please see [stats for training times](results.md/#training-times-comparisons)*. + +*1. Mallick and Goel, et al. ‘Taming 3DGS: High-Quality Radiance Fields with Limited Resources’. SIGGRAPH Asia 2024 Conference Papers, 2024, https://doi.org/10.1145/3680528.3687694, [github](https://github.com/humansensinglab/taming-3dgs)* + + ### Depth regularization - -Two preprocessing steps are required to enable depth regularization when training a scene: - To have better reconstructed scenes we use depth maps as priors during optimization with each input images. It works best on untextured parts ex: roads and can remove floaters. Several papers have used similar ideas to improve various aspects of 3DGS; (e.g. [DepthRegularizedGS](https://robot0321.github.io/DepthRegGS/index.html), [SparseGS](https://formycat.github.io/SparseGS-Real-Time-360-Sparse-View-Synthesis-using-Gaussian-Splatting/), [DNGaussian](https://fictionarry.github.io/DNGaussian/)). The depth regularization we integrated is that used in our [Hierarchical 3DGS](https://repo-sam.inria.fr/fungraph/hierarchical-3d-gaussians/) paper, but applied to the original 3DGS; for some scenes (e.g., the DeepBlending scenes) it improves quality significantly; for others it either makes a small difference or can even be worse. For details statistics please see here: [Stats for depth regularization](results.md). +To have better reconstructed scenes we use depth maps as priors during optimization with each input images. It works best on untextured parts ex: roads and can remove floaters. Several papers have used similar ideas to improve various aspects of 3DGS; (e.g. [DepthRegularizedGS](https://robot0321.github.io/DepthRegGS/index.html), [SparseGS](https://formycat.github.io/SparseGS-Real-Time-360-Sparse-View-Synthesis-using-Gaussian-Splatting/), [DNGaussian](https://fictionarry.github.io/DNGaussian/)). The depth regularization we integrated is that used in our [Hierarchical 3DGS](https://repo-sam.inria.fr/fungraph/hierarchical-3d-gaussians/) paper, but applied to the original 3DGS; for some scenes (e.g., the DeepBlending scenes) it improves quality significantly; for others it either makes a small difference or can even be worse. For example results showing the potential benefit and statistics on quality please see here: [Stats for depth regularization](results.md). When training on a synthetic dataset, depth maps can be produced and they do not require further processing to be used in our method. For real world datasets please do the following: 1. Get depth maps for each input images, to this effect we suggest using [Depth anything v2](https://github.com/DepthAnything/Depth-Anything-V2?tab=readme-ov-file#usage). @@ -508,7 +534,11 @@ When training on a synthetic dataset, depth maps can be produced and they do not A new parameter should be set when training if you want to use depth regularization `-d `. ### Exposure compensation -To compensate for exposure changes in the different input images we optimize an affine transformation for each image just as in [Hierarchical 3dgs](https://repo-sam.inria.fr/fungraph/hierarchical-3d-gaussians/). Add the following parameters to enable it: +To compensate for exposure changes in the different input images we optimize an affine transformation for each image just as in [Hierarchical 3dgs](https://repo-sam.inria.fr/fungraph/hierarchical-3d-gaussians/). + +This can greatly improve reconstruction results for "in the wild" captures, e.g., with a smartphone when the exposure setting of the camera is not fixed. For example results showing the potential benefit and statistics on quality please see here: [Stats for exposure compensation](results.md). + +Add the following parameters to enable it: ``` --exposure_lr_init 0.001 --exposure_lr_final 0.0001 --exposure_lr_delay_steps 5000 --exposure_lr_delay_mult 0.001 --train_test_exp ``` diff --git a/arguments/__init__.py b/arguments/__init__.py index 8c16a18..0b2f448 100644 --- a/arguments/__init__.py +++ b/arguments/__init__.py @@ -96,6 +96,7 @@ class OptimizationParams(ParamGroup): self.depth_l1_weight_init = 1.0 self.depth_l1_weight_final = 0.01 self.random_background = False + self.optimizer_type = "default" super().__init__(parser, "Optimization Parameters") def get_combined_args(parser : ArgumentParser): diff --git a/assets/Exposure_comparison.png b/assets/Exposure_comparison.png new file mode 100644 index 0000000..8c1b1d8 Binary files /dev/null and b/assets/Exposure_comparison.png differ diff --git a/assets/all_results_LPIPS.png b/assets/all_results_LPIPS.png deleted file mode 100644 index 28cf94c..0000000 Binary files a/assets/all_results_LPIPS.png and /dev/null differ diff --git a/assets/all_results_PSNR.png b/assets/all_results_PSNR.png deleted file mode 100644 index 529eee2..0000000 Binary files a/assets/all_results_PSNR.png and /dev/null differ diff --git a/assets/all_results_SSIM.png b/assets/all_results_SSIM.png deleted file mode 100644 index 983eb01..0000000 Binary files a/assets/all_results_SSIM.png and /dev/null differ diff --git a/assets/charts/accel_default_LPIPS.png b/assets/charts/accel_default_LPIPS.png new file mode 100644 index 0000000..eda641c Binary files /dev/null and b/assets/charts/accel_default_LPIPS.png differ diff --git a/assets/charts/accel_default_PSNR.png b/assets/charts/accel_default_PSNR.png new file mode 100644 index 0000000..77a18bf Binary files /dev/null and b/assets/charts/accel_default_PSNR.png differ diff --git a/assets/charts/accel_default_SSIM.png b/assets/charts/accel_default_SSIM.png new file mode 100644 index 0000000..fc9e4cb Binary files /dev/null and b/assets/charts/accel_default_SSIM.png differ diff --git a/assets/charts/accel_sparse_adam_LPIPS.png b/assets/charts/accel_sparse_adam_LPIPS.png new file mode 100644 index 0000000..2b0163d Binary files /dev/null and b/assets/charts/accel_sparse_adam_LPIPS.png differ diff --git a/assets/charts/accel_sparse_adam_PSNR.png b/assets/charts/accel_sparse_adam_PSNR.png new file mode 100644 index 0000000..2965e17 Binary files /dev/null and b/assets/charts/accel_sparse_adam_PSNR.png differ diff --git a/assets/charts/accel_sparse_adam_SSIM.png b/assets/charts/accel_sparse_adam_SSIM.png new file mode 100644 index 0000000..f1a38a9 Binary files /dev/null and b/assets/charts/accel_sparse_adam_SSIM.png differ diff --git a/assets/charts/base_LPIPS.png b/assets/charts/base_LPIPS.png new file mode 100644 index 0000000..6720d69 Binary files /dev/null and b/assets/charts/base_LPIPS.png differ diff --git a/assets/charts/base_PSNR.png b/assets/charts/base_PSNR.png new file mode 100644 index 0000000..5322c15 Binary files /dev/null and b/assets/charts/base_PSNR.png differ diff --git a/assets/charts/base_SSIM.png b/assets/charts/base_SSIM.png new file mode 100644 index 0000000..4d771c6 Binary files /dev/null and b/assets/charts/base_SSIM.png differ diff --git a/assets/charts/exposure_LPIPS.png b/assets/charts/exposure_LPIPS.png new file mode 100644 index 0000000..ede29a4 Binary files /dev/null and b/assets/charts/exposure_LPIPS.png differ diff --git a/assets/charts/exposure_PSNR.png b/assets/charts/exposure_PSNR.png new file mode 100644 index 0000000..23ce5b4 Binary files /dev/null and b/assets/charts/exposure_PSNR.png differ diff --git a/assets/charts/exposure_SSIM.png b/assets/charts/exposure_SSIM.png new file mode 100644 index 0000000..e66bb3d Binary files /dev/null and b/assets/charts/exposure_SSIM.png differ diff --git a/assets/charts/timings.png b/assets/charts/timings.png new file mode 100644 index 0000000..1e29ace Binary files /dev/null and b/assets/charts/timings.png differ diff --git a/assets/depth_comparison.png b/assets/depth_comparison.png new file mode 100644 index 0000000..8aa27c5 Binary files /dev/null and b/assets/depth_comparison.png differ diff --git a/full_eval.py b/full_eval.py index f9ae845..e0eb2d0 100644 --- a/full_eval.py +++ b/full_eval.py @@ -23,6 +23,14 @@ parser.add_argument("--skip_training", action="store_true") parser.add_argument("--skip_rendering", action="store_true") parser.add_argument("--skip_metrics", action="store_true") parser.add_argument("--output_path", default="./eval") +parser.add_argument("--use_depth", action="store_true") +parser.add_argument("--use_expcomp", action="store_true") +parser.add_argument("--fast", action="store_true") +parser.add_argument("--aa", action="store_true") + + + + args, _ = parser.parse_known_args() all_scenes = [] @@ -36,9 +44,19 @@ if not args.skip_training or not args.skip_rendering: parser.add_argument("--tanksandtemples", "-tat", required=True, type=str) parser.add_argument("--deepblending", "-db", required=True, type=str) args = parser.parse_args() - if not args.skip_training: - common_args = " --quiet --eval --test_iterations -1 " + common_args = " --disable_viewer --quiet --eval --test_iterations -1 " + + if args.aa: + common_args += " --antialiasing " + if args.use_depth: + common_args += " -d depths2/ " + + if args.use_expcomp: + common_args += " --exposure_lr_init 0.001 --exposure_lr_final 0.0001 --exposure_lr_delay_steps 5000 --exposure_lr_delay_mult 0.001 --train_test_exp " + + if args.fast: + common_args += " --optimizer_type sparse_adam " start_time = time.time() for scene in mipnerf360_outdoor_scenes: @@ -61,7 +79,7 @@ if not args.skip_training: os.system("python train.py -s " + source + " -m " + args.output_path + "/" + scene + common_args) db_timing = (time.time() - start_time)/60.0 -with open("timing.txt", 'w') as file: +with open(os.path.join(args.output_path,"timing.txt"), 'w') as file: file.write(f"m360: {m360_timing} minutes \n tandt: {tandt_timing} minutes \n db: {db_timing} minutes\n") if not args.skip_rendering: @@ -74,8 +92,14 @@ if not args.skip_rendering: all_sources.append(args.tanksandtemples + "/" + scene) for scene in deep_blending_scenes: all_sources.append(args.deepblending + "/" + scene) - + common_args = " --quiet --eval --skip_train" + + if args.aa: + common_args += " --antialiasing " + if args.use_expcomp: + common_args += " --train_test_exp " + for scene, source in zip(all_scenes, all_sources): os.system("python render.py --iteration 7000 -s " + source + " -m " + args.output_path + "/" + scene + common_args) os.system("python render.py --iteration 30000 -s " + source + " -m " + args.output_path + "/" + scene + common_args) @@ -85,4 +109,4 @@ if not args.skip_metrics: for scene in all_scenes: scenes_string += "\"" + args.output_path + "/" + scene + "\" " - os.system("python metrics.py -m " + scenes_string) \ No newline at end of file + os.system("python metrics.py -m " + scenes_string) diff --git a/gaussian_renderer/__init__.py b/gaussian_renderer/__init__.py index a9d2121..e12f4b6 100644 --- a/gaussian_renderer/__init__.py +++ b/gaussian_renderer/__init__.py @@ -15,7 +15,7 @@ from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianR from scene.gaussian_model import GaussianModel from utils.sh_utils import eval_sh -def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None, use_trained_exp=False): +def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, separate_sh = False, override_color = None, use_trained_exp=False): """ Render the scene. @@ -79,20 +79,35 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized) colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0) else: - shs = pc.get_features + if separate_sh: + dc, shs = pc.get_features_dc, pc.get_features_rest + else: + shs = pc.get_features else: colors_precomp = override_color # Rasterize visible Gaussians to image, obtain their radii (on screen). - rendered_image, radii, depth_image = rasterizer( - means3D = means3D, - means2D = means2D, - shs = shs, - colors_precomp = colors_precomp, - opacities = opacity, - scales = scales, - rotations = rotations, - cov3D_precomp = cov3D_precomp) + if separate_sh: + rendered_image, radii, depth_image = rasterizer( + means3D = means3D, + means2D = means2D, + dc = dc, + shs = shs, + colors_precomp = colors_precomp, + opacities = opacity, + scales = scales, + rotations = rotations, + cov3D_precomp = cov3D_precomp) + else: + rendered_image, radii, depth_image = rasterizer( + means3D = means3D, + means2D = means2D, + shs = shs, + colors_precomp = colors_precomp, + opacities = opacity, + scales = scales, + rotations = rotations, + cov3D_precomp = cov3D_precomp) # Apply exposure to rendered image (training only) if use_trained_exp: diff --git a/render.py b/render.py index b22162a..244cb75 100644 --- a/render.py +++ b/render.py @@ -20,8 +20,14 @@ from utils.general_utils import safe_state from argparse import ArgumentParser from arguments import ModelParams, PipelineParams, get_combined_args from gaussian_renderer import GaussianModel +try: + from diff_gaussian_rasterization import SparseGaussianAdam + SPARSE_ADAM_AVAILABLE = True +except: + SPARSE_ADAM_AVAILABLE = False -def render_set(model_path, name, iteration, views, gaussians, pipeline, background, train_test_exp): + +def render_set(model_path, name, iteration, views, gaussians, pipeline, background, train_test_exp, separate_sh): render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders") gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt") @@ -29,12 +35,17 @@ def render_set(model_path, name, iteration, views, gaussians, pipeline, backgrou makedirs(gts_path, exist_ok=True) for idx, view in enumerate(tqdm(views, desc="Rendering progress")): - rendering = render(view, gaussians, pipeline, background, use_trained_exp=train_test_exp)["render"] + rendering = render(view, gaussians, pipeline, background, use_trained_exp=train_test_exp, separate_sh=separate_sh)["render"] gt = view.original_image[0:3, :, :] + + if args.train_test_exp: + rendering = rendering[..., rendering.shape[-1] // 2:] + gt = gt[..., gt.shape[-1] // 2:] + torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png")) torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png")) -def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool): +def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, separate_sh: bool): with torch.no_grad(): gaussians = GaussianModel(dataset.sh_degree) scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False) @@ -43,10 +54,10 @@ def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParam background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") if not skip_train: - render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background, dataset.train_test_exp) + render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background, dataset.train_test_exp, separate_sh) if not skip_test: - render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background, dataset.train_test_exp) + render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background, dataset.train_test_exp, separate_sh) if __name__ == "__main__": # Set up command line argument parser @@ -63,4 +74,4 @@ if __name__ == "__main__": # Initialize system state (RNG) safe_state(args.quiet) - render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test) \ No newline at end of file + render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test, SPARSE_ADAM_AVAILABLE) \ No newline at end of file diff --git a/results.md b/results.md index b511067..8838b6e 100644 --- a/results.md +++ b/results.md @@ -1,14 +1,90 @@ # Evaluations -We evaluated the impact of the features we added on MipNeRF360, Tanks&Temples and Deep Blending datasets. +We evaluated the impact of the features we added on MipNeRF360, Tanks&Temples and Deep Blending datasets. [Exposure Compensation](#exposure-compensation) is evaluated separately. Note that [Default rasterizer](#default-rasterizer) refers to the original [3dgs rasterizer](https://github.com/graphdeco-inria/diff-gaussian-rasterization/tree/9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0) and [Accelerated rasterizer](#accelerated-rasterizer) refers to the [taming-3dgs rasterizer](https://github.com/graphdeco-inria/diff-gaussian-rasterization/tree/3dgs_accel). -## PSNR -![all results PSNR](assets/all_results_PSNR.png) -***DR**:depth regularization, **AA**:antialiasing, **EXPCOMP**:exposure compensation.* +## Default rasterizer -## SSIM -![all results SSIM](assets/all_results_SSIM.png) -***DR**:depth regularization, **AA**:antialiasing, **EXPCOMP**:exposure compensation.* +### PSNR -## LPIPS -![all results LPIPS](assets/all_results_LPIPS.png) -*lower is better, **DR**:depth regularization, **AA**:antialiasing, **EXPCOMP**:exposure compensation.* \ No newline at end of file +![all results PSNR](assets/charts/base_PSNR.png) + +***DR**:depth regularization, **AA**:antialiasing* + +
+ +![nodepth/depth](assets/Depth_comparison.png) + +### SSIM +![all results SSIM](assets/charts/base_SSIM.png) + +***DR**:depth regularization, **AA**:antialiasing* + +### LPIPS +![all results LPIPS](assets/charts/base_LPIPS.png) + +*lower is better, **DR**:depth regularization, **AA**:antialiasing* + +## Accelerated rasterizer + +### Default optimizer + +These numbers were obtained using the accelerated rasterizer and `--optimizer_type default` when training. + +#### PSNR +![all results PSNR](assets/charts/accel_default_PSNR.png) + +***DR**:depth regularization, **AA**:antialiasing* + +#### SSIM +![all results SSIM](assets/charts/accel_default_SSIM.png) + +***DR**:depth regularization, **AA**:antialiasing* + +#### LPIPS +![all results LPIPS](assets/charts/accel_default_LPIPS.png) + +*lower is better, **DR**:depth regularization, **AA**:antialiasing* + +### Sparse Adam optimizer + +These numbers were obtained using the accelerated rasterizer and `--optimizer_type sparse_adam` when training. + +#### PSNR +![all results PSNR](assets/charts/accel_sparse_adam_PSNR.png) + +***DR**:depth regularization, **AA**:antialiasing* + +#### SSIM +![all results SSIM](assets/charts/accel_sparse_adam_SSIM.png) + +***DR**:depth regularization, **AA**:antialiasing* + +#### LPIPS +![all results LPIPS](assets/charts/accel_sparse_adam_LPIPS.png) + +*lower is better, **DR**:depth regularization, **AA**:antialiasing* + +## Exposure compensation + +We account for exposure variations between images by optimizing a 3x4 affine transform for each image. During training, this transform is applied to the colour of the rendered images. +The exposure compensation is designed to improve the inputs' coherence during training and is not applied during real-time navigation. +Enabling the `--train_test_exp` option includes the left half of the test images in the training set, using only their right halves for testing, following the same testing methodology as NeRF-W and Mega-NeRF. This allows us to optimize the exposure affine transform for test views. However, since this setting alters the train/test splits, the resulting metrics are not comparable to those from models trained without it. Here we provide results with `--train_test_exp`, with and without exposure compensation. + +### PSNR + +![exposures_psnr](/assets/charts/exposure_PSNR.png) + +### SSIM + +![exposures_ssim](/assets/charts/exposure_SSIM.png) + +### LPIPS + +*Lower is better.* +![exposures_lpips](/assets/charts/exposure_LPIPS.png) + +![noexposure/exposure](assets/Exposure_comparison.png) + +## Training times comparisons + +We report the training times with all features enabled using the original 3dgs rasterizer *(baseline)* and the accelerated rasterizer with default optimizer then sparse adam. +![Training-times](assets/charts/timings.png) diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py index 4a46cea..473887d 100644 --- a/scene/gaussian_model.py +++ b/scene/gaussian_model.py @@ -22,6 +22,11 @@ from simple_knn._C import distCUDA2 from utils.graphics_utils import BasicPointCloud from utils.general_utils import strip_symmetric, build_scaling_rotation +try: + from diff_gaussian_rasterization import SparseGaussianAdam +except: + pass + class GaussianModel: def setup_functions(self): @@ -42,8 +47,9 @@ class GaussianModel: self.rotation_activation = torch.nn.functional.normalize - def __init__(self, sh_degree): + def __init__(self, sh_degree, optimizer_type="default"): self.active_sh_degree = 0 + self.optimizer_type = optimizer_type self.max_sh_degree = sh_degree self._xyz = torch.empty(0) self._features_dc = torch.empty(0) @@ -183,9 +189,16 @@ class GaussianModel: {'params': [self._rotation], 'lr': training_args.rotation_lr, "name": "rotation"} ] - self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15) - if self.pretrained_exposures is None: - self.exposure_optimizer = torch.optim.Adam([self._exposure]) + if self.optimizer_type == "default": + self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15) + elif self.optimizer_type == "sparse_adam": + try: + self.optimizer = SparseGaussianAdam(l, lr=0.0, eps=1e-15) + except: + # A special version of the rasterizer is required to enable sparse adam + self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15) + + self.exposure_optimizer = torch.optim.Adam([self._exposure]) self.xyz_scheduler_args = get_expon_lr_func(lr_init=training_args.position_lr_init*self.spatial_lr_scale, lr_final=training_args.position_lr_final*self.spatial_lr_scale, @@ -348,6 +361,7 @@ class GaussianModel: self.denom = self.denom[valid_points_mask] self.max_radii2D = self.max_radii2D[valid_points_mask] + self.tmp_radii = self.tmp_radii[valid_points_mask] def cat_tensors_to_optimizer(self, tensors_dict): optimizable_tensors = {} @@ -371,7 +385,7 @@ class GaussianModel: return optimizable_tensors - def densification_postfix(self, new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation): + def densification_postfix(self, new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_tmp_radii): d = {"xyz": new_xyz, "f_dc": new_features_dc, "f_rest": new_features_rest, @@ -387,6 +401,7 @@ class GaussianModel: self._scaling = optimizable_tensors["scaling"] self._rotation = optimizable_tensors["rotation"] + self.tmp_radii = torch.cat((self.tmp_radii, new_tmp_radii)) self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") self.max_radii2D = torch.zeros((self.get_xyz.shape[0]), device="cuda") @@ -410,8 +425,9 @@ class GaussianModel: new_features_dc = self._features_dc[selected_pts_mask].repeat(N,1,1) new_features_rest = self._features_rest[selected_pts_mask].repeat(N,1,1) new_opacity = self._opacity[selected_pts_mask].repeat(N,1) + new_tmp_radii = self.tmp_radii[selected_pts_mask].repeat(N) - self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacity, new_scaling, new_rotation) + self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacity, new_scaling, new_rotation, new_tmp_radii) prune_filter = torch.cat((selected_pts_mask, torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool))) self.prune_points(prune_filter) @@ -429,12 +445,15 @@ class GaussianModel: new_scaling = self._scaling[selected_pts_mask] new_rotation = self._rotation[selected_pts_mask] - self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation) + new_tmp_radii = self.tmp_radii[selected_pts_mask] - def densify_and_prune(self, max_grad, min_opacity, extent, max_screen_size): + self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_tmp_radii) + + def densify_and_prune(self, max_grad, min_opacity, extent, max_screen_size, radii): grads = self.xyz_gradient_accum / self.denom grads[grads.isnan()] = 0.0 + self.tmp_radii = radii self.densify_and_clone(grads, max_grad, extent) self.densify_and_split(grads, max_grad, extent) @@ -444,9 +463,11 @@ class GaussianModel: big_points_ws = self.get_scaling.max(dim=1).values > 0.1 * extent prune_mask = torch.logical_or(torch.logical_or(prune_mask, big_points_vs), big_points_ws) self.prune_points(prune_mask) + tmp_radii = self.tmp_radii + self.tmp_radii = None torch.cuda.empty_cache() def add_densification_stats(self, viewspace_point_tensor, update_filter): self.xyz_gradient_accum[update_filter] += torch.norm(viewspace_point_tensor.grad[update_filter,:2], dim=-1, keepdim=True) - self.denom[update_filter] += 1 \ No newline at end of file + self.denom[update_filter] += 1 diff --git a/submodules/fused-ssim b/submodules/fused-ssim new file mode 160000 index 0000000..1272e21 --- /dev/null +++ b/submodules/fused-ssim @@ -0,0 +1 @@ +Subproject commit 1272e21a282342e89537159e4bad508b19b34157 diff --git a/submodules/simple-knn b/submodules/simple-knn index 44f7642..86710c2 160000 --- a/submodules/simple-knn +++ b/submodules/simple-knn @@ -1 +1 @@ -Subproject commit 44f764299fa305faf6ec5ebd99939e0508331503 +Subproject commit 86710c2d4b46680c02301765dd79e465819c8f19 diff --git a/train.py b/train.py index 07bc6be..fe844c0 100644 --- a/train.py +++ b/train.py @@ -28,10 +28,26 @@ try: except ImportError: TENSORBOARD_FOUND = False +try: + from fused_ssim import fused_ssim + FUSED_SSIM_AVAILABLE = True +except: + FUSED_SSIM_AVAILABLE = False + +try: + from diff_gaussian_rasterization import SparseGaussianAdam + SPARSE_ADAM_AVAILABLE = True +except: + SPARSE_ADAM_AVAILABLE = False + def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from): + + if not SPARSE_ADAM_AVAILABLE and opt.optimizer_type == "sparse_adam": + sys.exit(f"Trying to use sparse adam but it is not installed, please install the correct rasterizer using pip install [3dgs_accel].") + first_iter = 0 tb_writer = prepare_output_and_logger(dataset) - gaussians = GaussianModel(dataset.sh_degree) + gaussians = GaussianModel(dataset.sh_degree, opt.optimizer_type) scene = Scene(dataset, gaussians) gaussians.training_setup(opt) if checkpoint: @@ -44,6 +60,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi iter_start = torch.cuda.Event(enable_timing = True) iter_end = torch.cuda.Event(enable_timing = True) + use_sparse_adam = opt.optimizer_type == "sparse_adam" and SPARSE_ADAM_AVAILABLE depth_l1_weight = get_expon_lr_func(opt.depth_l1_weight_init, opt.depth_l1_weight_final, max_steps=opt.iterations) viewpoint_stack = scene.getTrainCameras().copy() @@ -91,13 +108,21 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi bg = torch.rand((3), device="cuda") if opt.random_background else background - render_pkg = render(viewpoint_cam, gaussians, pipe, bg, use_trained_exp=dataset.train_test_exp) + render_pkg = render(viewpoint_cam, gaussians, pipe, bg, use_trained_exp=dataset.train_test_exp, separate_sh=SPARSE_ADAM_AVAILABLE) image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"] + if viewpoint_cam.alpha_mask is not None: + alpha_mask = viewpoint_cam.alpha_mask.cuda() + image *= alpha_mask + # Loss gt_image = viewpoint_cam.original_image.cuda() Ll1 = l1_loss(image, gt_image) - ssim_value = ssim(image, gt_image) + if FUSED_SSIM_AVAILABLE: + ssim_value = fused_ssim(image.unsqueeze(0), gt_image.unsqueeze(0)) + else: + ssim_value = ssim(image, gt_image) + loss = (1.0 - opt.lambda_dssim) * Ll1 + opt.lambda_dssim * (1.0 - ssim_value) # Depth regularization @@ -130,7 +155,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi progress_bar.close() # Log and save - training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background), dataset.train_test_exp) + training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background, dataset.train_test_exp, SPARSE_ADAM_AVAILABLE), dataset.train_test_exp) if (iteration in saving_iterations): print("\n[ITER {}] Saving Gaussians".format(iteration)) scene.save(iteration) @@ -143,7 +168,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0: size_threshold = 20 if iteration > opt.opacity_reset_interval else None - gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold) + gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold, radii) if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter): gaussians.reset_opacity() @@ -152,8 +177,13 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi if iteration < opt.iterations: gaussians.exposure_optimizer.step() gaussians.exposure_optimizer.zero_grad(set_to_none = True) - gaussians.optimizer.step() - gaussians.optimizer.zero_grad(set_to_none = True) + if use_sparse_adam: + visible = radii > 0 + gaussians.optimizer.step(visible, radii.shape[0]) + gaussians.optimizer.zero_grad(set_to_none = True) + else: + gaussians.optimizer.step() + gaussians.optimizer.zero_grad(set_to_none = True) if (iteration in checkpoint_iterations): print("\n[ITER {}] Saving Checkpoint".format(iteration)) diff --git a/utils/loss_utils.py b/utils/loss_utils.py index 9defc23..60cf1f7 100644 --- a/utils/loss_utils.py +++ b/utils/loss_utils.py @@ -13,6 +13,29 @@ import torch import torch.nn.functional as F from torch.autograd import Variable from math import exp +try: + from diff_gaussian_rasterization._C import fusedssim, fusedssim_backward +except: + pass + +C1 = 0.01 ** 2 +C2 = 0.03 ** 2 + +class FusedSSIMMap(torch.autograd.Function): + @staticmethod + def forward(ctx, C1, C2, img1, img2): + ssim_map = fusedssim(C1, C2, img1, img2) + ctx.save_for_backward(img1.detach(), img2) + ctx.C1 = C1 + ctx.C2 = C2 + return ssim_map + + @staticmethod + def backward(ctx, opt_grad): + img1, img2 = ctx.saved_tensors + C1, C2 = ctx.C1, ctx.C2 + grad = fusedssim_backward(C1, C2, img1, img2, opt_grad) + return None, None, grad, None def l1_loss(network_output, gt): return torch.abs((network_output - gt)).mean() @@ -62,3 +85,7 @@ def _ssim(img1, img2, window, window_size, channel, size_average=True): else: return ssim_map.mean(1).mean(1).mean(1) + +def fast_ssim(img1, img2): + ssim_map = FusedSSIMMap.apply(C1, C2, img1, img2) + return ssim_map.mean()