add comments

This commit is contained in:
liuzhi 2024-06-04 21:01:43 +08:00
parent 8b998dfba6
commit 1a35943d4c
12 changed files with 853 additions and 84 deletions

View File

@ -63,7 +63,7 @@ class ModelParams(ParamGroup):
从args对象中提取出与 ModelParams类中定义的参数相匹配的值,并将它们封装到一个新的 GroupParams 对象中
args: 存储着 命令行和main中预设的参数
'''
g = super().extract(args) # 返回的GroupParams对象
g = super().extract(args) # 替换相匹配的值,并返回一个GroupParams对象
g.source_path = os.path.abspath(g.source_path) # 更新为绝对路径
return g

View File

@ -49,7 +49,7 @@ def evaluate(model_paths):
full_dict_polytopeonly[scene_dir] = {}
per_view_dict_polytopeonly[scene_dir] = {}
test_dir = Path(scene_dir) / "test"
test_dir = Path(scene_dir) / "train"
for method in os.listdir(test_dir):
print("Method:", method)

593
read_write_binary.py Normal file
View File

@ -0,0 +1,593 @@
import os
import collections
import numpy as np
import struct
import argparse
CameraModel = collections.namedtuple(
"CameraModel", ["model_id", "model_name", "num_params"]
)
Camera = collections.namedtuple(
"Camera", ["id", "model", "width", "height", "params"]
)
BaseImage = collections.namedtuple(
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]
)
Point3D = collections.namedtuple(
"Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]
)
class Image(BaseImage):
def qvec2rotmat(self):
return qvec2rotmat(self.qvec)
CAMERA_MODELS = {
CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
CameraModel(model_id=3, model_name="RADIAL", num_params=5),
CameraModel(model_id=4, model_name="OPENCV", num_params=8),
CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
CameraModel(model_id=7, model_name="FOV", num_params=5),
CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12),
}
CAMERA_MODEL_IDS = dict(
[(camera_model.model_id, camera_model) for camera_model in CAMERA_MODELS]
)
CAMERA_MODEL_NAMES = dict(
[(camera_model.model_name, camera_model) for camera_model in CAMERA_MODELS]
)
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
"""Read and unpack the next bytes from a binary file.
:param fid:
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
:param endian_character: Any of {@, =, <, >, !}
:return: Tuple of read and unpacked values.
"""
data = fid.read(num_bytes)
return struct.unpack(endian_character + format_char_sequence, data)
def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
"""pack and write to a binary file.
:param fid:
:param data: data to send, if multiple elements are sent at the same time,
they should be encapsuled either in a list or a tuple
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
should be the same length as the data list or tuple
:param endian_character: Any of {@, =, <, >, !}
"""
if isinstance(data, (list, tuple)):
bytes = struct.pack(endian_character + format_char_sequence, *data)
else:
bytes = struct.pack(endian_character + format_char_sequence, data)
fid.write(bytes)
def read_cameras_text(path):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
"""
cameras = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
camera_id = int(elems[0])
model = elems[1]
width = int(elems[2])
height = int(elems[3])
params = np.array(tuple(map(float, elems[4:])))
cameras[camera_id] = Camera(
id=camera_id,
model=model,
width=width,
height=height,
params=params,
)
return cameras
def read_cameras_binary(path_to_model_file):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
"""
cameras = {}
with open(path_to_model_file, "rb") as fid:
num_cameras = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_cameras):
camera_properties = read_next_bytes(
fid, num_bytes=24, format_char_sequence="iiQQ"
)
camera_id = camera_properties[0]
model_id = camera_properties[1]
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
width = camera_properties[2]
height = camera_properties[3]
num_params = CAMERA_MODEL_IDS[model_id].num_params
params = read_next_bytes(
fid,
num_bytes=8 * num_params,
format_char_sequence="d" * num_params,
)
cameras[camera_id] = Camera(
id=camera_id,
model=model_name,
width=width,
height=height,
params=np.array(params),
)
assert len(cameras) == num_cameras
return cameras
def write_cameras_text(cameras, path):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
"""
HEADER = (
"# Camera list with one line of data per camera:\n"
+ "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n"
+ "# Number of cameras: {}\n".format(len(cameras))
)
with open(path, "w") as fid:
fid.write(HEADER)
for _, cam in cameras.items():
to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
line = " ".join([str(elem) for elem in to_write])
fid.write(line + "\n")
def write_cameras_binary(cameras, path_to_model_file):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
"""
with open(path_to_model_file, "wb") as fid:
write_next_bytes(fid, len(cameras), "Q")
for _, cam in cameras.items():
model_id = CAMERA_MODEL_NAMES[cam.model].model_id
camera_properties = [cam.id, model_id, cam.width, cam.height]
write_next_bytes(fid, camera_properties, "iiQQ")
for p in cam.params:
write_next_bytes(fid, float(p), "d")
return cameras
def read_images_text(path):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
"""
images = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
camera_id = int(elems[8])
image_name = elems[9]
elems = fid.readline().split()
xys = np.column_stack(
[
tuple(map(float, elems[0::3])),
tuple(map(float, elems[1::3])),
]
)
point3D_ids = np.array(tuple(map(int, elems[2::3])))
images[image_id] = Image(
id=image_id,
qvec=qvec,
tvec=tvec,
camera_id=camera_id,
name=image_name,
xys=xys,
point3D_ids=point3D_ids,
)
return images
def read_images_binary(path_to_model_file):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
"""
images = {}
with open(path_to_model_file, "rb") as fid:
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_reg_images):
binary_image_properties = read_next_bytes(
fid, num_bytes=64, format_char_sequence="idddddddi"
)
image_id = binary_image_properties[0]
qvec = np.array(binary_image_properties[1:5])
tvec = np.array(binary_image_properties[5:8])
camera_id = binary_image_properties[8]
image_name = ""
current_char = read_next_bytes(fid, 1, "c")[0]
while current_char != b"\x00": # look for the ASCII 0 entry
image_name += current_char.decode("utf-8")
current_char = read_next_bytes(fid, 1, "c")[0]
num_points2D = read_next_bytes(
fid, num_bytes=8, format_char_sequence="Q"
)[0]
x_y_id_s = read_next_bytes(
fid,
num_bytes=24 * num_points2D,
format_char_sequence="ddq" * num_points2D,
)
xys = np.column_stack(
[
tuple(map(float, x_y_id_s[0::3])),
tuple(map(float, x_y_id_s[1::3])),
]
)
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
images[image_id] = Image(
id=image_id,
qvec=qvec,
tvec=tvec,
camera_id=camera_id,
name=image_name,
xys=xys,
point3D_ids=point3D_ids,
)
return images
def write_images_text(images, path):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
"""
if len(images) == 0:
mean_observations = 0
else:
mean_observations = sum(
(len(img.point3D_ids) for _, img in images.items())
) / len(images)
HEADER = (
"# Image list with two lines of data per image:\n"
+ "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n"
+ "# POINTS2D[] as (X, Y, POINT3D_ID)\n"
+ "# Number of images: {}, mean observations per image: {}\n".format(
len(images), mean_observations
)
)
with open(path, "w") as fid:
fid.write(HEADER)
for _, img in images.items():
image_header = [
img.id,
*img.qvec,
*img.tvec,
img.camera_id,
img.name,
]
first_line = " ".join(map(str, image_header))
fid.write(first_line + "\n")
points_strings = []
for xy, point3D_id in zip(img.xys, img.point3D_ids):
points_strings.append(" ".join(map(str, [*xy, point3D_id])))
fid.write(" ".join(points_strings) + "\n")
def write_images_binary(images, path_to_model_file):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
"""
with open(path_to_model_file, "wb") as fid:
write_next_bytes(fid, len(images), "Q")
for _, img in images.items():
write_next_bytes(fid, img.id, "i")
write_next_bytes(fid, img.qvec.tolist(), "dddd")
write_next_bytes(fid, img.tvec.tolist(), "ddd")
write_next_bytes(fid, img.camera_id, "i")
for char in img.name:
write_next_bytes(fid, char.encode("utf-8"), "c")
write_next_bytes(fid, b"\x00", "c")
write_next_bytes(fid, len(img.point3D_ids), "Q")
for xy, p3d_id in zip(img.xys, img.point3D_ids):
write_next_bytes(fid, [*xy, p3d_id], "ddq")
def read_points3D_text(path):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
"""
points3D = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
point3D_id = int(elems[0])
xyz = np.array(tuple(map(float, elems[1:4])))
rgb = np.array(tuple(map(int, elems[4:7])))
error = float(elems[7])
image_ids = np.array(tuple(map(int, elems[8::2])))
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
points3D[point3D_id] = Point3D(
id=point3D_id,
xyz=xyz,
rgb=rgb,
error=error,
image_ids=image_ids,
point2D_idxs=point2D_idxs,
)
return points3D
def read_points3D_binary(path_to_model_file):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
points3D = {}
with open(path_to_model_file, "rb") as fid:
num_points = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_points):
binary_point_line_properties = read_next_bytes(
fid, num_bytes=43, format_char_sequence="QdddBBBd"
)
point3D_id = binary_point_line_properties[0]
xyz = np.array(binary_point_line_properties[1:4])
rgb = np.array(binary_point_line_properties[4:7])
error = np.array(binary_point_line_properties[7])
track_length = read_next_bytes(
fid, num_bytes=8, format_char_sequence="Q"
)[0]
track_elems = read_next_bytes(
fid,
num_bytes=8 * track_length,
format_char_sequence="ii" * track_length,
)
image_ids = np.array(tuple(map(int, track_elems[0::2])))
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
points3D[point3D_id] = Point3D(
id=point3D_id,
xyz=xyz,
rgb=rgb,
error=error,
image_ids=image_ids,
point2D_idxs=point2D_idxs,
)
return points3D
def write_points3D_text(points3D, path):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
"""
if len(points3D) == 0:
mean_track_length = 0
else:
mean_track_length = sum(
(len(pt.image_ids) for _, pt in points3D.items())
) / len(points3D)
HEADER = (
"# 3D point list with one line of data per point:\n"
+ "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n"
+ "# Number of points: {}, mean track length: {}\n".format(
len(points3D), mean_track_length
)
)
with open(path, "w") as fid:
fid.write(HEADER)
for _, pt in points3D.items():
point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
fid.write(" ".join(map(str, point_header)) + " ")
track_strings = []
for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
track_strings.append(" ".join(map(str, [image_id, point2D])))
fid.write(" ".join(track_strings) + "\n")
def write_points3D_binary(points3D, path_to_model_file):
"""
see: src/colmap/scene/reconstruction.cc
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
with open(path_to_model_file, "wb") as fid:
write_next_bytes(fid, len(points3D), "Q")
for _, pt in points3D.items():
write_next_bytes(fid, pt.id, "Q")
write_next_bytes(fid, pt.xyz.tolist(), "ddd")
write_next_bytes(fid, pt.rgb.tolist(), "BBB")
write_next_bytes(fid, pt.error, "d")
track_length = pt.image_ids.shape[0]
write_next_bytes(fid, track_length, "Q")
for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
write_next_bytes(fid, [image_id, point2D_id], "ii")
def detect_model_format(path, ext):
if (
os.path.isfile(os.path.join(path, "cameras" + ext))
and os.path.isfile(os.path.join(path, "images" + ext))
and os.path.isfile(os.path.join(path, "points3D" + ext))
):
print("Detected model format: '" + ext + "'")
return True
return False
def read_model(path, ext=""):
# try to detect the extension automatically
if ext == "":
if detect_model_format(path, ".bin"):
ext = ".bin"
elif detect_model_format(path, ".txt"):
ext = ".txt"
else:
print("Provide model format: '.bin' or '.txt'")
return
if ext == ".txt":
cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
images = read_images_text(os.path.join(path, "images" + ext))
points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
else:
cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
images = read_images_binary(os.path.join(path, "images" + ext))
points3D = read_points3D_binary(os.path.join(path, "points3D") + ext)
return cameras, images, points3D
def write_model(cameras, images, points3D, path, ext=".bin"):
if ext == ".txt":
write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
write_images_text(images, os.path.join(path, "images" + ext))
write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
else:
write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
write_images_binary(images, os.path.join(path, "images" + ext))
write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
return cameras, images, points3D
def qvec2rotmat(qvec):
return np.array(
[
[
1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
],
[
2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
],
[
2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
],
]
)
def rotmat2qvec(R):
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
K = (
np.array(
[
[Rxx - Ryy - Rzz, 0, 0, 0],
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz],
]
)
/ 3.0
)
eigvals, eigvecs = np.linalg.eigh(K)
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
if qvec[0] < 0:
qvec *= -1
return qvec
def main():
parser = argparse.ArgumentParser(
description="Read and write COLMAP binary and text models"
)
parser.add_argument("--input_model", help="path to input model folder")
parser.add_argument(
"--input_format",
choices=[".bin", ".txt"],
help="input model format",
default="",
)
parser.add_argument("--output_model", help="path to output model folder")
parser.add_argument(
"--output_format",
choices=[".bin", ".txt"],
help="outut model format",
default=".txt",
)
args = parser.parse_args()
cameras, images, points3D = read_model(
path=args.input_model, ext=args.input_format
)
print("num_cameras:", len(cameras))
print("num_images:", len(images))
print("num_points3D:", len(points3D))
if args.output_model is not None:
write_model(
cameras,
images,
points3D,
path=args.output_model,
ext=args.output_format,
)
def images_points_scale(root, scale_factor):
images_path = os.path.join(root, 'images.bin')
new_images_path = os.path.join(root, 'images_scaled.bin')
points3D_path = os.path.join(root, 'points3D.bin')
new_points3D_path = os.path.join(root, 'points3D_scaled.bin')
images = read_images_binary(images_path)
points = read_points3D_binary(points3D_path)
for k, v in images.items():
new_image =v._replace(tvec=v.tvec*scale_factor)
images[k] = new_image
write_images_binary(images, new_images_path)
for k, v in points.items():
new_point =v._replace(xyz=v.xyz*scale_factor)
points[k] = new_point
write_points3D_binary(points, new_points3D_path)
# if __name__ == "__main__":
# root = "/home/ubuntu/Downloads/outputs/dense/sparse"
# images_points_scale(root, 1000)

101
render_reconeval.py Normal file
View File

@ -0,0 +1,101 @@
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact george.drettakis@inria.fr
#
import numpy as np
import torch
from scene import Scene
import os
import shutil
from tqdm import tqdm
from os import makedirs
from gaussian_renderer import render
import torchvision
from utils.general_utils import safe_state
from argparse import ArgumentParser
from arguments import ModelParams, PipelineParams, get_combined_args
from gaussian_renderer import GaussianModel
import read_write_binary as im
def render_set(model_path, name, iteration, views, gaussians, pipeline, background):
render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders")
gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt")
makedirs(render_path, exist_ok=True)
makedirs(gts_path, exist_ok=True)
for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
rendering = render(view, gaussians, pipeline, background)["render"]
gt = view.original_image[0:3, :, :]
# image = view.image_name
torchvision.utils.save_image(rendering, os.path.join(render_path, '{}.png'.format(view.image_name)))
torchvision.utils.save_image(gt, os.path.join(gts_path, '{}.png'.format(view.image_name)))
def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool):
with torch.no_grad():
gaussians = GaussianModel(dataset.sh_degree)
scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False,override_quantization=True)
bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
if not skip_train:
render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background)
if not skip_test:
render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background)
if __name__ == "__main__":
# Set up command line argument parser
parser = ArgumentParser(description="Testing script parameters")
model = ModelParams(parser, sentinel=True)
pipeline = PipelineParams(parser)
parser.add_argument("--iteration", default=-1, type=int)
parser.add_argument("--skip_train", action="store_true")
parser.add_argument("--skip_test", action="store_true")
parser.add_argument("--quiet", action="store_true")
args = get_combined_args(parser)
# Initialize system state (RNG)
safe_state(args.quiet)
if os.path.exists(args.source_path):
shutil.rmtree(args.source_path)
os.mkdir(args.source_path)
else:
os.mkdir(args.source_path)
shutil.copytree(os.path.join(args.source_path, "../sparse"), os.path.join(args.source_path, "sparse"))
shutil.copytree(os.path.join(args.source_path, "../images"), os.path.join(args.source_path, "images"))
shutil.copy(os.path.join(args.source_path, "../test_aligned_pose.txt"),
os.path.join(args.source_path, "test_aligned_pose.txt"))
data = im.read_images_binary(os.path.join(args.source_path, "sparse", "images.bin"))
image = data[1]
new_data = {}
with open(os.path.join(args.source_path, "test_aligned_pose.txt"), "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
n, tx, ty, tz, qx, qy, qz, qw = line.split(" ")
name = "{}.png".format(n)
if not os.path.exists(os.path.join(args.source_path, "images", name)):
images = [i for i in os.listdir(os.path.join(args.source_path, "images")) if ".png" in i]
shutil.copy(os.path.join(os.path.join(args.source_path, "images", images[0])),
os.path.join(os.path.join(args.source_path, "images", name)))
i = int(n)
qvec = [float(i) for i in [qw, qx, qy, qz]]
tvec = [float(i) for i in [tx, ty, tz]]
#image = data[1]
image = image._replace(id=i, qvec=np.array(qvec), tvec=np.array(tvec), name=name)
#data[1 + i] = image
new_data[i] = image
print(len(new_data))
im.write_images_binary(new_data, os.path.join(args.source_path, "sparse/0", "images.bin"))
render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test)

View File

@ -1,11 +1,46 @@
# python train.py --source_path ../../Dataset/3DGS_Dataset/linggongtang --model_path output/linggongtang --data_device 'cpu' --eval --resolution 1
# scene: {'kejiguan': 'cuda', 'wanfota': 'cuda', 'zhiwu': 'cuda', 'linggongtang': 'cpu', 'xiangjiadang': 'cpu', 'sipingguzhai': 'cpu'}
# scene: {'kejiguan': 'cuda', 'wanfota': 'cuda', 'zhiwu': 'cuda', 'linggongtang': 'cuda', 'xiangjiadang': 'cuda', 'town-train-cpy': 'cuda', 'town2-train-cpy': 'cuda', 'sipingguzhai': 'cpu'}
# device = cuda: 科技馆、万佛塔、植物
# = cpu: 凌公塘、湘家荡、寺平古宅
import os
for cuda, scene in enumerate({'linggongtang': 'cpu', 'xiangjiadang': 'cpu', 'sipingguzhai': 'cpu'}.items()):
# for idx, scene in enumerate({'town-train': 'cuda', 'town2-train': 'cuda', 'building1-train': 'cuda'}.items()):
# print('---------------------------------------------------------------------------------')
# one_cmd = f'python train.py --source_path /data2/lpl/data/carla-dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --checkpoint_iterations 30000'
# print(one_cmd)
# os.system(one_cmd)
#
# # python render.py -m <path to trained model>
# for idx, scene in enumerate(['town-train-cpy', 'town2-train-cpy', 'building1-train']):
# print('---------------------------------------------------------------------------------')
# one_cmd = f'python render.py -m output/{scene}'
# print(one_cmd)
# os.system(one_cmd)
#
# # python metrics.py -m <path to trained model>
# for idx, scene in enumerate(['town-train-cpy', 'town2-train-cpy', 'building1-train']):
# print('---------------------------------------------------------------------------------')
# one_cmd = f'python metrics.py -m output/{scene}'
# print(one_cmd)
# os.system(one_cmd)
for idx, scene in enumerate({'building2-train': 'cpu', 'building3-train': 'cuda'}.items()):
print('---------------------------------------------------------------------------------')
one_cmd = f'python train.py --source_path ../../Dataset/3DGS_Dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --eval'
one_cmd = f'python train.py --source_path /data2/lpl/data/carla-dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --checkpoint_iterations 30000 --port 6009'
print(one_cmd)
os.system(one_cmd)
# python render.py -m <path to trained model>
for idx, scene in enumerate(['building2-train', 'building3-train']):
print('---------------------------------------------------------------------------------')
one_cmd = f'python render.py -m output/{scene}'
print(one_cmd)
os.system(one_cmd)
# python metrics.py -m <path to trained model>
for idx, scene in enumerate(['building2-train', 'building3-train']):
print('---------------------------------------------------------------------------------')
one_cmd = f'python metrics.py -m output/{scene}'
print(one_cmd)
os.system(one_cmd)

View File

@ -26,32 +26,33 @@ class Scene:
def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0]):
"""
初始化场景对象
:param args: 包含模型路径和源路径等模型参数
:param gaussians: 高斯模型对象用于场景点的3D表示
:param load_iteration: 指定加载模型的迭代次数如果不为None且为-1则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型且不为-1则加载指定迭代次数的
:param shuffle: 是否在训练前打乱相机列表
:param resolution_scales: 分辨率比例列表用于处理不同分辨率的相机
初始化3D场景对象
args: 存储着与 GaussianMoedl 相关参数 的args即包含scene/__init__.py/ModelParams()中的参数
gaussians: 3D高斯模型对象用于场景点的3D表示
load_iteration: 指定加载模型的迭代次数如果是-1则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型如果不是None且不是-1则加载指定迭代次数的
shuffle: 是否在训练前打乱相机列表
resolution_scales: 分辨率比例列表用于处理不同分辨率的相机
"""
self.model_path = args.model_path # 模型文件保存路径
self.loaded_iter = None # 已加载的迭代次数
self.gaussians = gaussians # 高斯模型对象
# 检查并加载已有的训练模型
# 如果已有训练模型,则加载
if load_iteration:
# 不为None
if load_iteration == -1:
# 且为-1则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,记录最大迭代次数
# -1则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,记录最大迭代次数
self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud"))
else:
# 不-1则加载指定迭代次数的
# 不是None且不是-1则加载指定迭代次数的
self.loaded_iter = load_iteration
print("Loading trained model at iteration {}".format(self.loaded_iter))
self.train_cameras = {} # 用于训练的相机参数
self.test_cameras = {} # 用于测试的相机参数
self.train_cameras = {} # 用于训练的相机
self.test_cameras = {} # 用于测试的相机
# 判断数据集类型是COLMAP的输出还是Blender得输出并从中加载场景信息
# 从COLMAP或Blender的输出结果中构建 场景信息(包括点云、训练用相机、测试用相机、场景归一化参数和点云文件路径)
if os.path.exists(os.path.join(args.source_path, "sparse")):
scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval)
elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")):
@ -60,28 +61,33 @@ class Scene:
else:
assert False, "Could not recognize scene type!"
# loaded_iter = None模型还未训练过
if not self.loaded_iter:
# 如果没有加载模型则将点云文件point3D.ply文件复制到input.ply文件
with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file:
dest_file.write(src_file.read())
json_cams = []
camlist = []
if scene_info.test_cameras:
# 测试相机添加到 camlist 中
camlist.extend(scene_info.test_cameras)
if scene_info.train_cameras:
# 训练相机添加到 camlist 中
camlist.extend(scene_info.train_cameras)
# 遍历 camlist 中的所有相机,使用 camera_to_JSON 函数将每个相机转换为 JSON 格式,并添加到 json_cams 列表中,并将 json_cams 写入 cameras.json 文件中
for id, cam in enumerate(camlist):
json_cams.append(camera_to_JSON(id, cam))
with open(os.path.join(self.model_path, "cameras.json"), 'w') as file:
json.dump(json_cams, file)
if shuffle:
# 随机打乱训练和测试相机列表
random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling
random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling
self.cameras_extent = scene_info.nerf_normalization["radius"]
# 根据resolution_scales加载不同分辨率的训练和测试位姿
# 根据resolution_scales加载不同分辨率的训练和测试相机包含R、T、视场角
for resolution_scale in resolution_scales:
print("Loading Training Cameras")
self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args)
@ -89,13 +95,13 @@ class Scene:
self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args)
if self.loaded_iter:
# 直接读取对应(已经迭代出来的)场景
# 如果加载已训练模型,则直接读取对应(已经迭代出来的)场景
self.gaussians.load_ply(os.path.join(self.model_path,
"point_cloud",
"iteration_" + str(self.loaded_iter),
"point_cloud.ply"))
else:
# loaded_iter = None模型还未训练过调用GaussianModel.create_from_pcd从scene_info.point_cloud中建立模型
# 不加载训练模型,则调用 GaussianModel.create_from_pcd 从稀疏点云 scene_info.point_cloud 中建立模型
self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent)
def save(self, iteration):

View File

@ -41,6 +41,9 @@ CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
def qvec2rotmat(qvec):
'''
四元数qvec=[w, x, y, z] 旋转矩阵
'''
return np.array([
[1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
@ -129,7 +132,6 @@ def read_points3D_binary(path_to_model_file):
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
with open(path_to_model_file, "rb") as fid:
num_points = read_next_bytes(fid, 8, "Q")[0]

View File

@ -66,28 +66,34 @@ def getNerfppNorm(cam_info):
return {"translate": translate, "radius": radius}
def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
cam_infos = [] # 初始化用于存储相机信息的列表
'''
cam_extrinsics: 存储每张图片相机的外参类Imgae 的字典
cam_intrinsics: 存储每张图片相机的内参类Camera 的字典
images_folder: 保存原图的文件夹路径
'''
# 初始化存储相机信息类CameraInfo对象的列表
cam_infos = []
# 遍历所有相机的外参
for idx, key in enumerate(cam_extrinsics):
# 动态显示读取相机信息的进度
sys.stdout.write('\r')
# the exact output you're looking for:
sys.stdout.write('\r') # 光标回到当前行的最前面
sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics)))
sys.stdout.flush()
sys.stdout.flush() # 立即将缓冲区中的内容输出到控制台
# 获取当前相机的外参和内参
extr = cam_extrinsics[key] # 当前相机的外参
intr = cam_intrinsics[extr.camera_id] # 根据外参中的camera_id找到对应的内参
height = intr.height # 相机图片高度
width = intr.width # 相机图片宽度
extr = cam_extrinsics[key] # 当前相机的外参类Imgae对象
intr = cam_intrinsics[extr.camera_id] # 根据外参中的camera_id找到对应的内参类对象
height = intr.height # 图片高度
width = intr.width # 图片宽度
uid = intr.id # 相机的唯一标识符
R = np.transpose(qvec2rotmat(extr.qvec)) # 将四元数表示的旋转转换为旋转矩阵R
T = np.array(extr.tvec) # 外参中的平移向量
R = np.transpose(qvec2rotmat(extr.qvec)) # 将旋转四元数 转为 旋转矩阵 R并转置
T = np.array(extr.tvec) # 平移向量
# 根据相机内参模型计算视场角FoV
# 根据相机内参模型计算 视场角FoV
if intr.model=="SIMPLE_PINHOLE":
# 如果是简单针孔模型,只有一个焦距参数
focal_length_x = intr.params[0]
@ -97,14 +103,13 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
# 如果是针孔模型,有两个焦距参数
focal_length_x = intr.params[0]
focal_length_y = intr.params[1]
FovY = focal2fov(focal_length_y, height) # 使用y方向的焦距计算垂直视场角
FovX = focal2fov(focal_length_x, width) # 使用x方向的焦距计算水平视场角
FovY = focal2fov(focal_length_y, height) # 使用fy计算垂直视场角
FovX = focal2fov(focal_length_x, width) # 使用fx计算水平视场角
elif intr.model=="SIMPLE_RADIAL":
# 如果是针孔模型,有两个焦距参数
focal_length_x = intr.params[0]
focal_length_y = intr.params[1]
FovY = focal2fov(focal_length_y, height) # 使用y方向的焦距计算垂直视场角
FovX = focal2fov(focal_length_x, width) # 使用x方向的焦距计算水平视场角
FovY = focal2fov(focal_length_x, height) # 使用fy计算垂直视场角
FovX = focal2fov(focal_length_x, width) # 使用fx计算水平视场角
else:
# 如果不是以上两种模型,抛出错误
assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
@ -116,6 +121,7 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
continue
image = Image.open(image_path)
# 创建相机信息类CameraInfo对象 (包含旋转矩阵、平移向量、视场角、图像数据、图片路径、图片名、宽度、高度)并添加到列表cam_infos中
cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=width, height=height)
cam_infos.append(cam_info)
@ -123,7 +129,6 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
sys.stdout.write('\n')
print("valid Colmap camera size: {}".format(len(cam_infos)))
# 返回整理好的相机信息列表
return cam_infos
def fetchPly(path):
@ -153,30 +158,38 @@ def storePly(path, xyz, rgb):
ply_data = PlyData([vertex_element])
ply_data.write(path)
# 尝试读取COLMAP处理结果中的二进制相机外参文件imags.bin 和 内参文件cameras.bin
def readColmapSceneInfo(path, images, eval, llffhold=8):
'''
加载COLMAP的结果中的二进制相机外参文件imags.bin 内参文件cameras.bin
path: GaussianModel中的源文件路径
images: 'images'
eval: 是否为eval模式
llffhold: 默认为8
'''
try:
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin")
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin")
cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file)
cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file)
except:
# 如果二进制文件读取失败,尝试读取文本格式的相机外参和内参文件
# 如果bin文件读取失败尝试读取txt格式的相机外参和内参文件
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt")
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt")
cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file)
cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file)
# 定义存放图片的目录,如果未指定则默认为"images"
# 存储原图片的文件夹名,默认为'images',要从中读取图片
reading_dir = "images" if images == None else images
# 读取并处理相机参数,转换为内部使用的格式
# 根据每个相机的内、外参构建CameraInfo类的对象 (包含旋转矩阵、平移向量、视场角、图像数据、图片路径、图片名、宽度、高度)存储cam_infos_unsorted列表中
cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir))
# 根据图片名称对相机信息进行排序,以保证顺序一致性
# 根据图片名称排序,以保证顺序一致性
cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : (x.image_path.split('/')[-2], int(x.image_name)))
# 根据是否为评估模式eval将相机分为训练集和测试集
# 如果为评估模式,根据llffhold参数通常用于LLFF数据集间隔选择测试相机
# 如果为评估模式,每llffhold张图片取一张作为测试集
if eval:
train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0]
test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0]
@ -188,7 +201,7 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
# 计算场景归一化参数,这是为了处理不同尺寸和位置的场景,使模型训练更稳定
nerf_normalization = getNerfppNorm(train_cam_infos)
# 尝试读取点云数据优先从PLY文件读取如果不存在则尝试从BIN或TXT文件转换并保存为PLY格式
# 尝试读取COLMAP生成的稀疏点云数据优先从PLY文件读取如果不存在则尝试从BIN或TXT文件转换并保存为PLY格式
ply_path = os.path.join(path, "sparse/0/points3D.ply")
bin_path = os.path.join(path, "sparse/0/points3D.bin")
txt_path = os.path.join(path, "sparse/0/points3D.txt")
@ -198,9 +211,12 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
xyz, rgb, _ = read_points3D_binary(bin_path) # 从points3D.bin读取COLMAP产生的稀疏点云
except:
xyz, rgb, _ = read_points3D_text(txt_path)
storePly(ply_path, xyz, rgb) # 转换成ply文件
try:
pcd = fetchPly(ply_path)
pcd = fetchPly(ply_path) # points3D.ply读取COLMAP产生的稀疏点云
except:
pcd = None
@ -293,4 +309,4 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
sceneLoadTypeCallbacks = {
"Colmap": readColmapSceneInfo,
"Blender" : readNerfSyntheticInfo
}
}

View File

@ -25,10 +25,9 @@ class GaussianModel:
def setup_functions(self):
"""
定义和初始化一些用于处理3D高斯模型参数的函数
定义和初始化处理高斯模型参数的 激活函数
"""
# 定义构建3D高斯协方差矩阵的函数
# 定义 从尺度、旋转构建3D高斯的 协方差矩阵 的函数
def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation):
L = build_scaling_rotation(scaling_modifier * scaling, rotation) # 从尺度、尺度的缩放、旋转得到L矩阵
actual_covariance = L @ L.transpose(1, 2) # 计算实际的协方差矩阵
@ -49,29 +48,30 @@ class GaussianModel:
def __init__(self, sh_degree : int):
"""
初始化3D高斯模型的参数
sh_degree: 球谐函数的最大阶数用于控制颜色表示的复杂度
初始化3D高斯模型的参数
sh_degree: 设定的 球谐函数的最大阶数默认为3用于控制颜色表示的复杂度
"""
# 初始化球谐阶数和最大球谐阶数j
self.active_sh_degree = 0 # 当前激活的球谐阶数初始为0
self.max_sh_degree = sh_degree # 允许的最大球谐阶数
self.max_sh_degree = sh_degree # 允许的最大球谐阶数j
# 初始化3D高斯模型的各项参数
self._xyz = torch.empty(0) # 3D高斯的中心位置(均值)
self._xyz = torch.empty(0) # 3D高斯的 中心位置(均值)
self._features_dc = torch.empty(0) # 第一个球谐系数,用于表示基础颜色
self._features_rest = torch.empty(0) # 其余球谐系数,用于表示颜色的细节和变化
self._scaling = torch.empty(0) # 3D高斯的尺度参数,控制高斯的形状
self._rotation = torch.empty(0) # 3D高斯的旋转参数(一系列四元数)
self._features_rest = torch.empty(0) # 其余球谐系数,用于表示颜色的细节和变化
self._scaling = torch.empty(0) # 3D高斯的尺度,控制高斯的形状
self._rotation = torch.empty(0) # 3D高斯的旋转(一系列四元数)
self._opacity = torch.empty(0) # 3D高斯的不透明度sigmoid前的控制可见性
self.max_radii2D = torch.empty(0) # 在2D投影中每个高斯的最大半径
self.xyz_gradient_accum = torch.empty(0) # 累积3D高斯中心位置的梯度当它太大的时候要对Gaussian进行分裂小时代表under要复制
self.denom = torch.empty(0) # 与累积梯度配合使用表示统计了多少次累积梯度算平均梯度时除掉这个denom = denominator分母
self.optimizer = None # 优化器用于调整上述参数以改进模型论文中采用Adam见附录B Algorithm 1的伪代码
self.percent_dense = 0 # 控制Gaussian密集程度的超参数
self.spatial_lr_scale = 0 # 位置坐标的学习率要乘上这个,抵消在不同尺度下应用同一个学习率带来的问题
# 调用setup_functions来初始化一些处理函数
# 调用 setup_functions初始化处理高斯体模型参数的 激活函数
self.setup_functions()
def capture(self):
@ -139,24 +139,23 @@ class GaussianModel:
def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float):
"""
从点云数据初始化模型参数
:param pcd: 稀疏点云数据包含点的位置和颜色
:param spatial_lr_scale: 空间学习率缩放因子影响 位置坐标参数的学习率
从稀疏点云数据 初始化模型参数
pcd: 稀疏点云包含点的位置和颜色
spatial_lr_scale: 空间学习率缩放因子影响 位置坐标参数的学习率
"""
# 根据scene.Scene.__init__ 以及 scene.dataset_readers.SceneInfo.nerf_normalization即scene.dataset_readers.getNerfppNorm的代码
# 这个值似乎是训练相机中离它们的坐标平均值即中心最远距离的1.1倍,根据命名推断应该与学习率有关,防止固定的学习率适配不同尺度的场景时出现问题。
self.spatial_lr_scale = spatial_lr_scale
# 将点云的位置和颜色数据从numpy数组转换为PyTorch张量并传送到CUDA设备上
# 将点云的 位置 颜色 数据从numpy数组转换为PyTorch张量并传送到CUDA设备上
fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda() # 稀疏点云的3D坐标大小为(P, 3)
fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda()) # 球谐的直流分量,大小为(P, 3)
# RGB2SH(x) = (x - 0.5) / 0.28209479177387814看样子pcd.colors的原始范围应该是0到1。0.28209479177387814是1 / (2*sqrt(pi))是直流分量Y(l=0,m=0)的值
# 初始化存储球谐系数的张量RGB三通道球谐的所有系数每个通道有(max_sh_degree + 1) ** 2个球谐系数
# 初始化存储 球谐系数 的张量RGB三通道球谐的所有系数每个通道有(max_sh_degree + 1) ** 2个球谐系数
features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda() # (P, 3, 16)
features[:, :3, 0 ] = fused_color # 将RGB转换后的球谐系数C0项的系数(直流分量)存入
features[:, :3, 0 ] = fused_color # 将RGB转换后的球谐系数C0项的系数(直流分量)存入每个3D点的直流分量球谐系数中
features[:, 3:, 1:] = 0.0 # 其余球谐系数初始化为0
# 打印初始点的数量

View File

@ -34,18 +34,21 @@ except ImportError:
def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from):
'''
dataset: 存储与Moedl相关参数的args
opt: 优化相关参数
pipe: 管道相关参数
dataset: 存储 GaussianMoedl 相关参数 的args
opt: 存储着与 优化 相关参数 的args
pipe: 存储着与 管道相关参数 的args
checkpoint: 已训练模型的路径
debug_from: 从哪一个迭代开始debug
'''
first_iter = 0
# 创建保存结果的文件夹并保存模型相关的参数到cfg_args文件尝试创建tensorboard_writer记录训练过程
# 创建保存结果的文件夹output/scene并保存模型相关的参数到cfg_args文件尝试创建tensorboard_writer记录训练过程
tb_writer = prepare_output_and_logger(dataset)
gaussians = GaussianModel(dataset.sh_degree) # 创建初始化高斯模型用于表示场景中的每个点的3D高斯分布
scene = Scene(dataset, gaussians) # 创建初始3D场景对象加载数据集和对应的相机参数
# 创建高斯模型对象用于表示场景中的每个点的3D高斯分布
gaussians = GaussianModel(dataset.sh_degree)
# 创建初始3D场景对象加载数据集和对应的相机参数
scene = Scene(dataset, gaussians)
gaussians.training_setup(opt) # 为高斯模型参数设置优化器和学习率调度器
# 如果提供了checkpoint则从checkpoint加载模型参数并恢复训练进度
@ -131,7 +134,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
print("\n[ITER {}] Saving Gaussians".format(iteration))
scene.save(iteration)
# 在指定迭代区间内对3D高斯模型进行增密和修剪Densification
# Densification在指定迭代区间内对3D高斯模型进行增密和修剪
if iteration < opt.densify_until_iter:
# Keep track of max radii in image-space for pruning
gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter])
@ -144,7 +147,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter):
gaussians.reset_opacity()
# 执行优化器的一步,并准备下一次迭代Optimizer step
# Optimizer step执行优化器的一步,并准备下一次迭代
if iteration < opt.iterations:
gaussians.optimizer.step()
gaussians.optimizer.zero_grad(set_to_none = True)
@ -219,12 +222,13 @@ if __name__ == "__main__":
# Set up command line argument parser
parser = ArgumentParser(description="Training script parameters")
model_prams = ModelParams(parser) # 定义存储 模型 相关参数的arg对象
optim_prams = OptimizationParams(parser) # 定义存储 优化 相关参数的arg对象
pipeline_prams = PipelineParams(parser) # 定义存储 渲染 相关参数的arg对象
# 创建 模型、优化、渲染 相关参数的对象
lp = ModelParams(parser)
op = OptimizationParams(parser)
pp = PipelineParams(parser)
parser.add_argument('--ip', type=str, default="127.0.0.1")
parser.add_argument('--port', type=int, default=6009)
parser.add_argument('--port', type=int, default=6007)
parser.add_argument('--debug_from', type=int, default=-1) # 指定从哪一迭代(>= 0开始debug
parser.add_argument('--detect_anomaly', action='store_true', default=False) # action='store_true' 如果命令行中包含了这个参数,它的值将被设置为 True
parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000])
@ -233,7 +237,7 @@ if __name__ == "__main__":
parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[30_000])
parser.add_argument("--start_checkpoint", type=str, default = None)
# 从命令行参数中解析出所有的参数值,并与上面设置的参数一起存储到 Namespace 对象中,即args
# 从命令行参数覆盖parser内的参数并存储到args
args = parser.parse_args(sys.argv[1:])
args.save_iterations.append(args.iterations)
@ -248,8 +252,12 @@ if __name__ == "__main__":
torch.autograd.set_detect_anomaly(args.detect_anomaly) # 设置pytorch是否检测梯度异常
# model_prams.extract(args)将args中的属性即命令行和预设的参数中 与 ModelParams类中定义的参数相匹配的值并将它们封装到一个新的 GroupParams 对象中
training(model_prams.extract(args), optim_prams.extract(args), pipeline_prams.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
# lp.extract(args)args中参数 覆盖 模型、优化、渲染 的参数并形成新的args
lp_args = lp.extract(args)
op_args = op.extract(args)
pp_args = pp.extract(args)
training(lp_args, op_args, pp_args, args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
# All done
print("\nTraining complete.")

View File

@ -52,6 +52,11 @@ def loadCam(args, id, cam_info, resolution_scale):
image_name=cam_info.image_name, uid=id, data_device=args.data_device)
def cameraList_from_camInfos(cam_infos, resolution_scale, args):
'''
cam_infos: 训练或测试相机对象列表
resolution_scale: 不同分辨率列表
args: 高斯模型参数
'''
camera_list = []
for id, c in enumerate(cam_infos):

View File

@ -74,4 +74,8 @@ def fov2focal(fov, pixels):
return pixels / (2 * math.tan(fov / 2))
def focal2fov(focal, pixels):
return 2*math.atan(pixels/(2*focal))
'''
focal: fx fy
pixels: 宽度或高度,单位为像素
'''
return 2 * math.atan(pixels / (2 * focal))