From 490fab14dd045bc135b2d00cbe441d9e31aaeb2a Mon Sep 17 00:00:00 2001
From: bkerbl <bkerbl@ad.inria.fr>
Date: Sat, 8 Jul 2023 18:20:08 +0200
Subject: [PATCH] Auto rescaling, including warning and documentation

---
 README.md             |  7 ++++---
 arguments/__init__.py |  2 +-
 utils/camera_utils.py | 16 +++++++++++++++-
 3 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index fef9589..a9881e8 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ The codebase has 4 main components:
 - An OpenGL-based real-time viewer to render trained models in real-time.
 - A script to help you turn your own images into optimization-ready SfM data sets
 
-The components have different requirements w.r.t. both hardware and software. They have been tested on Windows 10 and Linux Ubuntu 22. Instructions for setting up and running each of them are found in the sections below.
+The components have different requirements w.r.t. both hardware and software. They have been tested on Windows 10 and Linux Ubuntu 22.04. Instructions for setting up and running each of them are found in the sections below.
 
 ## Optimizer
 
@@ -114,7 +114,7 @@ python train.py -s <path to COLMAP or NeRF Synthetic dataset>
   #### --eval
   Add this flag to use a MipNeRF360-style training/test split for evaluation.
   #### --resolution / -r
-  Changes the resolution of the loaded images before training. If provided ```1, 2, 4``` or ```8```, uses original, 1/2, 1/4 or 1/8 resolution, respectively. For all other values, rescales the width to the given number while maintaining image aspect. ```1``` by default.
+  Specifies resolution of the loaded images before training. If provided ```1, 2, 4``` or ```8```, uses original, 1/2, 1/4 or 1/8 resolution, respectively. For all other values, rescales the width to the given number while maintaining image aspect. **If not set and input image width exceeds 1.5 megapixels, inputs are automatically rescaled to this target.**
   #### --white_background / -w
   Add this flag to use white background instead of black (default), e.g., for evaluation of NeRF Synthetic dataset.
   #### --sh_degree
@@ -169,8 +169,9 @@ python train.py -s <path to COLMAP or NeRF Synthetic dataset>
 </details>
 <br>
 
-The MipNeRF360 scenes are hosted by the paper authors [here](https://jonbarron.info/mipnerf360/). You can find our SfM data sets for Tanks&Temples and Deep Blending [here](https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/datasets/input/tandt+db.zip). If you do not provide an output model directory (```-m```), trained models are written to folders with randomized unique names inside the ```output``` directory. At this point, the trained models may be viewed with the real-time viewer (see further below).
+Note that similar to MipNeRF360, we target images at resolutions in the 1-1.5 megapixel range. For convenience, arbitrary-size inputs can be passed and will be automatically resized if their width exceeds 1500 pixels. We recommend to keep this behavior, but you may force training to use your higher-resolution images by specifying ```-r 1```.
 
+The MipNeRF360 scenes are hosted by the paper authors [here](https://jonbarron.info/mipnerf360/). You can find our SfM data sets for Tanks&Temples and Deep Blending [here](https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/datasets/input/tandt+db.zip). If you do not provide an output model directory (```-m```), trained models are written to folders with randomized unique names inside the ```output``` directory. At this point, the trained models may be viewed with the real-time viewer (see further below).
 
 ### Evaluation
 By default, the trained models use all available images in the dataset. To train them while withholding a test set for evaluation, use the ```--eval``` flag. This way, you can render training/test sets and produce error metrics as follows:
diff --git a/arguments/__init__.py b/arguments/__init__.py
index 79856b3..582acf3 100644
--- a/arguments/__init__.py
+++ b/arguments/__init__.py
@@ -50,7 +50,7 @@ class ModelParams(ParamGroup):
         self._source_path = ""
         self._model_path = ""
         self._images = "images"
-        self._resolution = 1
+        self._resolution = -1
         self._white_background = False
         self.eval = False
         super().__init__(parser, "Loading Parameters", sentinel)
diff --git a/utils/camera_utils.py b/utils/camera_utils.py
index 4d1b76f..f8ab276 100644
--- a/utils/camera_utils.py
+++ b/utils/camera_utils.py
@@ -14,13 +14,27 @@ import numpy as np
 from utils.general_utils import PILtoTorch
 from utils.graphics_utils import fov2focal
 
+WARNED = False
+
 def loadCam(args, id, cam_info, resolution_scale):
     orig_w, orig_h = cam_info.image.size
 
     if args.resolution in [1, 2, 4, 8]:
         resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution))
     else:  # should be a type that converts to float
-        global_down = orig_w/args.resolution
+        if args.resolution == -1:
+            if orig_w > 1500:
+                global WARNED
+                if not WARNED:
+                    print("[ INFO ] Encountered quite large input images (>1.5Mpix), rescaling to 1.5Mpix. "
+                        "If this is not desired, please explicitly specify '--resolution/-r' as 1")
+                    WARNED = True
+                global_down = orig_w / 1500
+            else:
+                global_down = 1
+        else:
+            global_down = orig_w / args.resolution
+
         scale = float(global_down) * float(resolution_scale)
         resolution = (int(orig_w / scale), int(orig_h / scale))