From 7857f24260c60faa36c666bb90a0d306cb8d07f5 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Thu, 26 Oct 2023 04:25:56 -0700
Subject: [PATCH 1/7] moved magick to PIL as even singlethreaded is 4x the
 speed in resizing that magick

---
 convert.py | 41 ++++++++++++++---------------------------
 1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/convert.py b/convert.py
index 7894884..6974c63 100644
--- a/convert.py
+++ b/convert.py
@@ -8,12 +8,16 @@
 #
 # For inquiries contact  george.drettakis@inria.fr
 #
+# xvdp removed magick, it is 3x slower than single threaded PIL for resizing
+
 
 import os
 import logging
 from argparse import ArgumentParser
 import shutil
 
+from PIL import Image
+
 # This Python script is based on the shell converter script provided in the MipNerF 360 repository.
 parser = ArgumentParser("Colmap converter")
 parser.add_argument("--no_gpu", action='store_true')
@@ -25,7 +29,7 @@ parser.add_argument("--resize", action="store_true")
 parser.add_argument("--magick_executable", default="", type=str)
 args = parser.parse_args()
 colmap_command = '"{}"'.format(args.colmap_executable) if len(args.colmap_executable) > 0 else "colmap"
-magick_command = '"{}"'.format(args.magick_executable) if len(args.magick_executable) > 0 else "magick"
+
 use_gpu = 1 if not args.no_gpu else 0
 
 if not args.skip_matching:
@@ -87,38 +91,21 @@ for file in files:
     destination_file = os.path.join(args.source_path, "sparse", "0", file)
     shutil.move(source_file, destination_file)
 
-if(args.resize):
+if args.resize:
     print("Copying and resizing...")
 
     # Resize images.
-    os.makedirs(args.source_path + "/images_2", exist_ok=True)
-    os.makedirs(args.source_path + "/images_4", exist_ok=True)
-    os.makedirs(args.source_path + "/images_8", exist_ok=True)
+    for div in [2,4,8]:
+        os.makedirs(args.source_path + f"/images_{div}", exist_ok=True)
     # Get the list of files in the source directory
     files = os.listdir(args.source_path + "/images")
     # Copy each file from the source directory to the destination directory
-    for file in files:
+    for j, file in enumerate(files):
         source_file = os.path.join(args.source_path, "images", file)
-
-        destination_file = os.path.join(args.source_path, "images_2", file)
-        shutil.copy2(source_file, destination_file)
-        exit_code = os.system(magick_command + " mogrify -resize 50% " + destination_file)
-        if exit_code != 0:
-            logging.error(f"50% resize failed with code {exit_code}. Exiting.")
-            exit(exit_code)
-
-        destination_file = os.path.join(args.source_path, "images_4", file)
-        shutil.copy2(source_file, destination_file)
-        exit_code = os.system(magick_command + " mogrify -resize 25% " + destination_file)
-        if exit_code != 0:
-            logging.error(f"25% resize failed with code {exit_code}. Exiting.")
-            exit(exit_code)
-
-        destination_file = os.path.join(args.source_path, "images_8", file)
-        shutil.copy2(source_file, destination_file)
-        exit_code = os.system(magick_command + " mogrify -resize 12.5% " + destination_file)
-        if exit_code != 0:
-            logging.error(f"12.5% resize failed with code {exit_code}. Exiting.")
-            exit(exit_code)
+        im = Image.open(source_file)
+        logging.info(f"processing image [{j}/{len(files)}] {source_file}")
+        for div in [2,4,8]:
+            destination_file = os.path.join(args.source_path, f"images_{div}", file)
+            im.resize([round(i/div) for i in im.size], Image.BICUBIC).save(destination_file)
 
 print("Done.")

From a0dc5af86fc3f244161b20363a330ba3c252c1c2 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Thu, 26 Oct 2023 05:36:14 -0700
Subject: [PATCH 2/7] moved magick to PIL, fixed quality setting to 100 to
 match matgick

---
 convert.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/convert.py b/convert.py
index 6974c63..9351c64 100644
--- a/convert.py
+++ b/convert.py
@@ -8,7 +8,7 @@
 #
 # For inquiries contact  george.drettakis@inria.fr
 #
-# xvdp removed magick, it is 3x slower than single threaded PIL for resizing
+# xvdp removed magick, even single threaded PIL resizes 4X faster
 
 
 import os
@@ -106,6 +106,6 @@ if args.resize:
         logging.info(f"processing image [{j}/{len(files)}] {source_file}")
         for div in [2,4,8]:
             destination_file = os.path.join(args.source_path, f"images_{div}", file)
-            im.resize([round(i/div) for i in im.size], Image.BICUBIC).save(destination_file)
+            im.resize([round(i/div) for i in im.size], Image.BICUBIC).save(destination_file, quality=100)
 
 print("Done.")

From 2311f4e764f8f49f1f2213c604ed20a934002d42 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Tue, 31 Oct 2023 04:48:52 -0700
Subject: [PATCH 3/7] Added option to disable asynchronous operations which can
 cause cuda to fails, network_gui.listener can block resources, clean up on
 break.

---
 train.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/train.py b/train.py
index 36faf0d..cfaf369 100644
--- a/train.py
+++ b/train.py
@@ -48,7 +48,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
     ema_loss_for_log = 0.0
     progress_bar = tqdm(range(first_iter, opt.iterations), desc="Training progress")
     first_iter += 1
-    for iteration in range(first_iter, opt.iterations + 1):        
+    for iteration in range(first_iter, opt.iterations + 1):
         if network_gui.conn == None:
             network_gui.try_connect()
         while network_gui.conn != None:
@@ -62,7 +62,10 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
                 if do_training and ((iteration < int(opt.iterations)) or not keep_alive):
                     break
             except Exception as e:
+                network_gui.conn.close()
                 network_gui.conn = None
+                network_gui.listener.close()
+                network_gui.listener = None
 
         iter_start.record()
 
@@ -159,7 +162,7 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i
     # Report test and samples of training set
     if iteration in testing_iterations:
         torch.cuda.empty_cache()
-        validation_configs = ({'name': 'test', 'cameras' : scene.getTestCameras()}, 
+        validation_configs = ({'name': 'test', 'cameras' : scene.getTestCameras()},
                               {'name': 'train', 'cameras' : [scene.getTrainCameras()[idx % len(scene.getTrainCameras())] for idx in range(5, 30, 5)]})
 
         for config in validation_configs:
@@ -202,18 +205,23 @@ if __name__ == "__main__":
     parser.add_argument("--quiet", action="store_true")
     parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[])
     parser.add_argument("--start_checkpoint", type=str, default = None)
+    parser.add_argument('--cuda_blocking', action='store_true', default=True)
     args = parser.parse_args(sys.argv[1:])
     args.save_iterations.append(args.iterations)
-    
+
     print("Optimizing " + args.model_path)
 
     # Initialize system state (RNG)
     safe_state(args.quiet)
 
+    # CUDA sometimes fails - option to disable asynchronous operations
+    if args.cuda_blocking:
+        os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
     # Start GUI server, configure and run training
     network_gui.init(args.ip, args.port)
     torch.autograd.set_detect_anomaly(args.detect_anomaly)
-    training(lp.extract(args), op.extract(args), pp.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
+    training(lp.extract(args), op.extract(args), pp.extract(args), args.test_iterations, args.save_iterations,
+             args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
 
     # All done
     print("\nTraining complete.")

From 25b3cb8cc95fca3b194928798c4885f664b23253 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Tue, 31 Oct 2023 05:11:09 -0700
Subject: [PATCH 4/7] save checkpoints by default

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index 223b2d9..7b73515 100644
--- a/train.py
+++ b/train.py
@@ -206,7 +206,7 @@ if __name__ == "__main__":
     parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000])
     parser.add_argument("--save_iterations", nargs="+", type=int, default=[7_000, 30_000])
     parser.add_argument("--quiet", action="store_true")
-    parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[])
+    parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[7_000, 30_000])
     parser.add_argument("--start_checkpoint", type=str, default = None)
     parser.add_argument('--cuda_blocking', action='store_true', default=True)
     args = parser.parse_args(sys.argv[1:])

From 27ef163bdfbd8e0ca750c73cf9faa57729f0c188 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Sat, 4 Nov 2023 03:52:01 -0700
Subject: [PATCH 5/7] added __repr__ to GaussianModel

---
 scene/gaussian_model.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py
index 632a1e8..23bee42 100644
--- a/scene/gaussian_model.py
+++ b/scene/gaussian_model.py
@@ -58,6 +58,17 @@ class GaussianModel:
         self.spatial_lr_scale = 0
         self.setup_functions()
 
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '()'
+        for k, v in self.__dict__.items():
+            if torch.is_tensor(v):
+                format_string +=f"  {k}:\t{tuple(v.shape)}\n"
+            else:
+                format_string += f"{k}:\t{v}\n"
+        return format_string
+
+
     def capture(self):
         return (
             self.active_sh_degree,

From f934e701b25f31cda4b341d832aede40effd5055 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Sat, 4 Nov 2023 08:29:03 -0700
Subject: [PATCH 6/7]  added __repr__ to Camera()

---
 scene/cameras.py        | 10 ++++++++++
 scene/gaussian_model.py |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/scene/cameras.py b/scene/cameras.py
index abf6e52..bc59fcc 100644
--- a/scene/cameras.py
+++ b/scene/cameras.py
@@ -56,6 +56,16 @@ class Camera(nn.Module):
         self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
         self.camera_center = self.world_view_transform.inverse()[3, :3]
 
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '()\n'
+        for k, v in self.__dict__.items():
+            if torch.is_tensor(v) and v.numel() > 16:
+                format_string +=f"  {k}:\t{tuple(v.shape)}\n"
+            else:
+                format_string += f"{k}:\t{v}\n"
+        return format_string
+
+
 class MiniCam:
     def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
         self.image_width = width
diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py
index 23bee42..07a30d0 100644
--- a/scene/gaussian_model.py
+++ b/scene/gaussian_model.py
@@ -60,7 +60,7 @@ class GaussianModel:
 
 
     def __repr__(self):
-        format_string = self.__class__.__name__ + '()'
+        format_string = self.__class__.__name__ + '()\n'
         for k, v in self.__dict__.items():
             if torch.is_tensor(v):
                 format_string +=f"  {k}:\t{tuple(v.shape)}\n"
@@ -84,7 +84,7 @@ class GaussianModel:
             self.optimizer.state_dict(),
             self.spatial_lr_scale,
         )
-    
+
     def restore(self, model_args, training_args):
         (self.active_sh_degree, 
         self._xyz, 

From 4ea5609081b9c2df83f7774c615972b4bf21a5d1 Mon Sep 17 00:00:00 2001
From: xvdp <xvpdahlen@gmail.com>
Date: Sun, 5 Nov 2023 04:15:39 -0800
Subject: [PATCH 7/7] fix Conversion pil to torch introduces potential cuda
 error #430

---
 train.py               |  5 +----
 utils/general_utils.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/train.py b/train.py
index 7b73515..d57acae 100644
--- a/train.py
+++ b/train.py
@@ -208,7 +208,7 @@ if __name__ == "__main__":
     parser.add_argument("--quiet", action="store_true")
     parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[7_000, 30_000])
     parser.add_argument("--start_checkpoint", type=str, default = None)
-    parser.add_argument('--cuda_blocking', action='store_true', default=True)
+  
     args = parser.parse_args(sys.argv[1:])
     args.save_iterations.append(args.iterations)
 
@@ -217,9 +217,6 @@ if __name__ == "__main__":
     # Initialize system state (RNG)
     safe_state(args.quiet)
 
-    # CUDA sometimes fails - option to disable asynchronous operations
-    if args.cuda_blocking:
-        os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
     # Start GUI server, configure and run training
     network_gui.init(args.ip, args.port)
     torch.autograd.set_detect_anomaly(args.detect_anomaly)
diff --git a/utils/general_utils.py b/utils/general_utils.py
index 541c082..43f56ee 100644
--- a/utils/general_utils.py
+++ b/utils/general_utils.py
@@ -18,13 +18,15 @@ import random
 def inverse_sigmoid(x):
     return torch.log(x/(1-x))
 
-def PILtoTorch(pil_image, resolution):
+def PILtoTorch(pil_image, resolution, pin_memory=True):
     resized_image_PIL = pil_image.resize(resolution)
-    resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0
-    if len(resized_image.shape) == 3:
-        return resized_image.permute(2, 0, 1)
-    else:
-        return resized_image.unsqueeze(dim=-1).permute(2, 0, 1)
+    resized_image = torch.from_numpy(np.array(resized_image_PIL, dtype=np.float32)) / 255.0
+    if resized_image.ndim == 2:
+        resized_image = resized_image[None]
+    resized_image = resized_image.permute(2, 0, 1).contiguous()
+    if pin_memory:
+        resized_image.pin_memory = True
+    return resized_image
 
 def get_expon_lr_func(
     lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000