diff --git a/README.md b/README.md index 4ba5215..8c58ab4 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ python train.py -s #### --opacity_lr Opacity learning rate, ```0.05``` by default. #### --scaling_lr - Scaling learning rate, ```0.001``` by default. + Scaling learning rate, ```0.005``` by default. #### --rotation_lr Rotation learning rate, ```0.001``` by default. #### --position_lr_max_steps @@ -465,11 +465,13 @@ python convert.py -s --skip_matching [--resize] #If not resizing, Ima ## FAQ - *Where do I get data sets, e.g., those referenced in ```full_eval.py```?* The MipNeRF360 data set is provided by the authors of the original paper on the project site. Note that two of the data sets cannot be openly shared and require you to consult the authors directly. For Tanks&Temples and Deep Blending, please use the download links provided at the top of the page. + +- *How can I use this for a much larger dataset, like a city district?* The current method was not designed for these, but given enough memory, it should work out. However, the approach can struggle in multi-scale detail scenes (extreme close-ups, mixed with far-away shots). This is usually the case in, e.g., driving data sets (cars close up, buildings far away). For such scenes, you will want to lower the ```--position_lr_init/final``` and ```--scaling_lr``` (x0.3, x0.1, ...). + - *I don't have 24 GB of VRAM for training, what do I do?* The VRAM consumption is determined by the number of points that are being optimized, which increases over time. If you only want to train to 7k iterations, you will need significantly less. To do the full training routine and avoid running out of memory, you can increase the ```--densify_grad_threshold```, ```--densification_interval``` or reduce the value of ```--densify_until_iter```. Note however that this will affect the quality of the result. Also try setting ```--test_iterations``` to ```-1``` to avoid memory spikes during testing. If ```--densify_grad_threshold``` is very high, no densification should occur and training should complete if the scene itself loads successfully. - *24 GB of VRAM for reference quality training is still a lot! Can't we do it with less?* Yes, most likely. By our calculations it should be possible with **way** less memory (~8GB). If we can find the time we will try to achieve this. If some PyTorch veteran out there wants to tackle this, we look forward to your pull request! -- *How can I use this for a much larger dataset, like a city district?* Given enough memory, this should work out fine, but it will require to adapt the ```--scaling_lr``` and ```--position_lr_init/final```. To avoid manual tuning, a suggestion is to check the loss: if it diverges early on, reset and use a lower scaling / position learning rate. - *How can I use the differentiable Gaussian rasterizer for my own project?* Easy, it is included in this repo as a submodule ```diff-gaussian-rasterization```. Feel free to check out and install the package. It's not really documented, but using it from the Python side is very straightforward (cf. ```gaussian_renderer/__init__.py```). diff --git a/arguments/__init__.py b/arguments/__init__.py index eba1dba..ccb904e 100644 --- a/arguments/__init__.py +++ b/arguments/__init__.py @@ -76,7 +76,7 @@ class OptimizationParams(ParamGroup): self.position_lr_max_steps = 30_000 self.feature_lr = 0.0025 self.opacity_lr = 0.05 - self.scaling_lr = 0.001 + self.scaling_lr = 0.005 self.rotation_lr = 0.001 self.percent_dense = 0.01 self.lambda_dssim = 0.2 diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py index 56f1839..9111d67 100644 --- a/scene/gaussian_model.py +++ b/scene/gaussian_model.py @@ -152,11 +152,11 @@ class GaussianModel: self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda") l = [ - {'params': [self._xyz], 'lr': training_args.position_lr_init*self.spatial_lr_scale, "name": "xyz"}, + {'params': [self._xyz], 'lr': training_args.position_lr_init * self.spatial_lr_scale, "name": "xyz"}, {'params': [self._features_dc], 'lr': training_args.feature_lr, "name": "f_dc"}, {'params': [self._features_rest], 'lr': training_args.feature_lr / 20.0, "name": "f_rest"}, {'params': [self._opacity], 'lr': training_args.opacity_lr, "name": "opacity"}, - {'params': [self._scaling], 'lr': training_args.scaling_lr*self.spatial_lr_scale, "name": "scaling"}, + {'params': [self._scaling], 'lr': training_args.scaling_lr, "name": "scaling"}, {'params': [self._rotation], 'lr': training_args.rotation_lr, "name": "rotation"} ] diff --git a/train.py b/train.py index 54783ce..8e56202 100644 --- a/train.py +++ b/train.py @@ -162,8 +162,8 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i for config in validation_configs: if config['cameras'] and len(config['cameras']) > 0: - l1_test = 0 - psnr_test = 0 + l1_test = 0.0 + psnr_test = 0.0 for idx, viewpoint in enumerate(config['cameras']): image = torch.clamp(renderFunc(viewpoint, scene.gaussians, *renderArgs)["render"], 0.0, 1.0) gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0) @@ -171,8 +171,8 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i tb_writer.add_images(config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration) if iteration == testing_iterations[0]: tb_writer.add_images(config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration) - l1_test += l1_loss(image, gt_image).mean() - psnr_test += psnr(image, gt_image).mean() + l1_test += l1_loss(image, gt_image).mean().double() + psnr_test += psnr(image, gt_image).mean().double() psnr_test /= len(config['cameras']) l1_test /= len(config['cameras']) print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test))