From 6891d458f9e656fde549f57da28d26c4c10ff073 Mon Sep 17 00:00:00 2001
From: bkerbl <bkerbl@ad.inria.fr>
Date: Sun, 16 Jul 2023 11:48:04 +0200
Subject: [PATCH] Scaling LR fixed

---
 README.md               | 6 ++++--
 arguments/__init__.py   | 2 +-
 scene/gaussian_model.py | 4 ++--
 train.py                | 8 ++++----
 4 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 4ba5215..8c58ab4 100644
--- a/README.md
+++ b/README.md
@@ -149,7 +149,7 @@ python train.py -s <path to COLMAP or NeRF Synthetic dataset>
   #### --opacity_lr
   Opacity learning rate, ```0.05``` by default.
   #### --scaling_lr
-  Scaling learning rate, ```0.001``` by default.
+  Scaling learning rate, ```0.005``` by default.
   #### --rotation_lr
   Rotation learning rate, ```0.001``` by default.
   #### --position_lr_max_steps
@@ -465,11 +465,13 @@ python convert.py -s <location> --skip_matching [--resize] #If not resizing, Ima
 ## FAQ
 - *Where do I get data sets, e.g., those referenced in ```full_eval.py```?* The MipNeRF360 data set is provided by the authors of the original paper on the project site. Note that two of the data sets cannot be openly shared and require you to consult the authors directly. For Tanks&Temples and Deep Blending, please use the download links provided at the top of the page.
 
+
+- *How can I use this for a much larger dataset, like a city district?* The current method was not designed for these, but given enough memory, it should work out. However, the approach can struggle in multi-scale detail scenes (extreme close-ups, mixed with far-away shots). This is usually the case in, e.g., driving data sets (cars close up, buildings far away). For such scenes, you will want to lower the ```--position_lr_init/final``` and ```--scaling_lr``` (x0.3, x0.1, ...).
+
 - *I don't have 24 GB of VRAM for training, what do I do?* The VRAM consumption is determined by the number of points that are being optimized, which increases over time. If you only want to train to 7k iterations, you will need significantly less. To do the full training routine and avoid running out of memory, you can increase the ```--densify_grad_threshold```, ```--densification_interval``` or reduce the value of ```--densify_until_iter```. Note however that this will affect the quality of the result. Also try setting ```--test_iterations``` to ```-1``` to avoid memory spikes during testing. If ```--densify_grad_threshold``` is very high, no densification should occur and training should complete if the scene itself loads successfully.
 
 - *24 GB of VRAM for reference quality training is still a lot! Can't we do it with less?* Yes, most likely. By our calculations it should be possible with **way** less memory (~8GB). If we can find the time we will try to achieve this. If some PyTorch veteran out there wants to tackle this, we look forward to your pull request!
 
-- *How can I use this for a much larger dataset, like a city district?* Given enough memory, this should work out fine, but it will require to adapt the ```--scaling_lr``` and ```--position_lr_init/final```. To avoid manual tuning, a suggestion is to check the loss: if it diverges early on, reset and use a lower scaling / position learning rate.
 
 - *How can I use the differentiable Gaussian rasterizer for my own project?* Easy, it is included in this repo as a submodule ```diff-gaussian-rasterization```. Feel free to check out and install the package. It's not really documented, but using it from the Python side is very straightforward (cf. ```gaussian_renderer/__init__.py```).
 
diff --git a/arguments/__init__.py b/arguments/__init__.py
index eba1dba..ccb904e 100644
--- a/arguments/__init__.py
+++ b/arguments/__init__.py
@@ -76,7 +76,7 @@ class OptimizationParams(ParamGroup):
         self.position_lr_max_steps = 30_000
         self.feature_lr = 0.0025
         self.opacity_lr = 0.05
-        self.scaling_lr = 0.001
+        self.scaling_lr = 0.005
         self.rotation_lr = 0.001
         self.percent_dense = 0.01
         self.lambda_dssim = 0.2
diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py
index 56f1839..9111d67 100644
--- a/scene/gaussian_model.py
+++ b/scene/gaussian_model.py
@@ -152,11 +152,11 @@ class GaussianModel:
         self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
 
         l = [
-            {'params': [self._xyz], 'lr': training_args.position_lr_init*self.spatial_lr_scale, "name": "xyz"},
+            {'params': [self._xyz], 'lr': training_args.position_lr_init * self.spatial_lr_scale, "name": "xyz"},
             {'params': [self._features_dc], 'lr': training_args.feature_lr, "name": "f_dc"},
             {'params': [self._features_rest], 'lr': training_args.feature_lr / 20.0, "name": "f_rest"},
             {'params': [self._opacity], 'lr': training_args.opacity_lr, "name": "opacity"},
-            {'params': [self._scaling], 'lr': training_args.scaling_lr*self.spatial_lr_scale, "name": "scaling"},
+            {'params': [self._scaling], 'lr': training_args.scaling_lr, "name": "scaling"},
             {'params': [self._rotation], 'lr': training_args.rotation_lr, "name": "rotation"}
         ]
 
diff --git a/train.py b/train.py
index 54783ce..8e56202 100644
--- a/train.py
+++ b/train.py
@@ -162,8 +162,8 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i
 
         for config in validation_configs:
             if config['cameras'] and len(config['cameras']) > 0:
-                l1_test = 0
-                psnr_test = 0
+                l1_test = 0.0
+                psnr_test = 0.0
                 for idx, viewpoint in enumerate(config['cameras']):
                     image = torch.clamp(renderFunc(viewpoint, scene.gaussians, *renderArgs)["render"], 0.0, 1.0)
                     gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0)
@@ -171,8 +171,8 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i
                         tb_writer.add_images(config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration)
                         if iteration == testing_iterations[0]:
                             tb_writer.add_images(config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration)
-                    l1_test += l1_loss(image, gt_image).mean()
-                    psnr_test += psnr(image, gt_image).mean()
+                    l1_test += l1_loss(image, gt_image).mean().double()
+                    psnr_test += psnr(image, gt_image).mean().double()
                 psnr_test /= len(config['cameras'])
                 l1_test /= len(config['cameras'])          
                 print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test))