diff --git a/arguments/__init__.py b/arguments/__init__.py index b15a16d..5d8468e 100644 --- a/arguments/__init__.py +++ b/arguments/__init__.py @@ -63,7 +63,7 @@ class ModelParams(ParamGroup): 从args对象中提取出与 ModelParams类中定义的参数相匹配的值,并将它们封装到一个新的 GroupParams 对象中 args: 存储着 命令行和main中预设的参数 ''' - g = super().extract(args) # 返回的GroupParams对象 + g = super().extract(args) # 替换相匹配的值,并返回一个GroupParams对象 g.source_path = os.path.abspath(g.source_path) # 更新为绝对路径 return g diff --git a/metrics.py b/metrics.py index f7393a4..c191534 100644 --- a/metrics.py +++ b/metrics.py @@ -49,7 +49,7 @@ def evaluate(model_paths): full_dict_polytopeonly[scene_dir] = {} per_view_dict_polytopeonly[scene_dir] = {} - test_dir = Path(scene_dir) / "test" + test_dir = Path(scene_dir) / "train" for method in os.listdir(test_dir): print("Method:", method) diff --git a/read_write_binary.py b/read_write_binary.py new file mode 100644 index 0000000..64ea98f --- /dev/null +++ b/read_write_binary.py @@ -0,0 +1,593 @@ +import os +import collections +import numpy as np +import struct +import argparse + + +CameraModel = collections.namedtuple( + "CameraModel", ["model_id", "model_name", "num_params"] +) +Camera = collections.namedtuple( + "Camera", ["id", "model", "width", "height", "params"] +) +BaseImage = collections.namedtuple( + "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"] +) +Point3D = collections.namedtuple( + "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"] +) + + +class Image(BaseImage): + def qvec2rotmat(self): + return qvec2rotmat(self.qvec) + + +CAMERA_MODELS = { + CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), + CameraModel(model_id=1, model_name="PINHOLE", num_params=4), + CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), + CameraModel(model_id=3, model_name="RADIAL", num_params=5), + CameraModel(model_id=4, model_name="OPENCV", num_params=8), + CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), + CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), + CameraModel(model_id=7, model_name="FOV", num_params=5), + CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), + CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), + CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12), +} +CAMERA_MODEL_IDS = dict( + [(camera_model.model_id, camera_model) for camera_model in CAMERA_MODELS] +) +CAMERA_MODEL_NAMES = dict( + [(camera_model.model_name, camera_model) for camera_model in CAMERA_MODELS] +) + + +def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): + """Read and unpack the next bytes from a binary file. + :param fid: + :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. + :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. + :param endian_character: Any of {@, =, <, >, !} + :return: Tuple of read and unpacked values. + """ + data = fid.read(num_bytes) + return struct.unpack(endian_character + format_char_sequence, data) + + +def write_next_bytes(fid, data, format_char_sequence, endian_character="<"): + """pack and write to a binary file. + :param fid: + :param data: data to send, if multiple elements are sent at the same time, + they should be encapsuled either in a list or a tuple + :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. + should be the same length as the data list or tuple + :param endian_character: Any of {@, =, <, >, !} + """ + if isinstance(data, (list, tuple)): + bytes = struct.pack(endian_character + format_char_sequence, *data) + else: + bytes = struct.pack(endian_character + format_char_sequence, data) + fid.write(bytes) + + +def read_cameras_text(path): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::WriteCamerasText(const std::string& path) + void Reconstruction::ReadCamerasText(const std::string& path) + """ + cameras = {} + with open(path, "r") as fid: + while True: + line = fid.readline() + if not line: + break + line = line.strip() + if len(line) > 0 and line[0] != "#": + elems = line.split() + camera_id = int(elems[0]) + model = elems[1] + width = int(elems[2]) + height = int(elems[3]) + params = np.array(tuple(map(float, elems[4:]))) + cameras[camera_id] = Camera( + id=camera_id, + model=model, + width=width, + height=height, + params=params, + ) + return cameras + + +def read_cameras_binary(path_to_model_file): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::WriteCamerasBinary(const std::string& path) + void Reconstruction::ReadCamerasBinary(const std::string& path) + """ + cameras = {} + with open(path_to_model_file, "rb") as fid: + num_cameras = read_next_bytes(fid, 8, "Q")[0] + for _ in range(num_cameras): + camera_properties = read_next_bytes( + fid, num_bytes=24, format_char_sequence="iiQQ" + ) + camera_id = camera_properties[0] + model_id = camera_properties[1] + model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name + width = camera_properties[2] + height = camera_properties[3] + num_params = CAMERA_MODEL_IDS[model_id].num_params + params = read_next_bytes( + fid, + num_bytes=8 * num_params, + format_char_sequence="d" * num_params, + ) + cameras[camera_id] = Camera( + id=camera_id, + model=model_name, + width=width, + height=height, + params=np.array(params), + ) + assert len(cameras) == num_cameras + return cameras + + +def write_cameras_text(cameras, path): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::WriteCamerasText(const std::string& path) + void Reconstruction::ReadCamerasText(const std::string& path) + """ + HEADER = ( + "# Camera list with one line of data per camera:\n" + + "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + + "# Number of cameras: {}\n".format(len(cameras)) + ) + with open(path, "w") as fid: + fid.write(HEADER) + for _, cam in cameras.items(): + to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params] + line = " ".join([str(elem) for elem in to_write]) + fid.write(line + "\n") + + +def write_cameras_binary(cameras, path_to_model_file): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::WriteCamerasBinary(const std::string& path) + void Reconstruction::ReadCamerasBinary(const std::string& path) + """ + with open(path_to_model_file, "wb") as fid: + write_next_bytes(fid, len(cameras), "Q") + for _, cam in cameras.items(): + model_id = CAMERA_MODEL_NAMES[cam.model].model_id + camera_properties = [cam.id, model_id, cam.width, cam.height] + write_next_bytes(fid, camera_properties, "iiQQ") + for p in cam.params: + write_next_bytes(fid, float(p), "d") + return cameras + + +def read_images_text(path): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadImagesText(const std::string& path) + void Reconstruction::WriteImagesText(const std::string& path) + """ + images = {} + with open(path, "r") as fid: + while True: + line = fid.readline() + if not line: + break + line = line.strip() + if len(line) > 0 and line[0] != "#": + elems = line.split() + image_id = int(elems[0]) + qvec = np.array(tuple(map(float, elems[1:5]))) + tvec = np.array(tuple(map(float, elems[5:8]))) + camera_id = int(elems[8]) + image_name = elems[9] + elems = fid.readline().split() + xys = np.column_stack( + [ + tuple(map(float, elems[0::3])), + tuple(map(float, elems[1::3])), + ] + ) + point3D_ids = np.array(tuple(map(int, elems[2::3]))) + images[image_id] = Image( + id=image_id, + qvec=qvec, + tvec=tvec, + camera_id=camera_id, + name=image_name, + xys=xys, + point3D_ids=point3D_ids, + ) + return images + + +def read_images_binary(path_to_model_file): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadImagesBinary(const std::string& path) + void Reconstruction::WriteImagesBinary(const std::string& path) + """ + images = {} + with open(path_to_model_file, "rb") as fid: + num_reg_images = read_next_bytes(fid, 8, "Q")[0] + for _ in range(num_reg_images): + binary_image_properties = read_next_bytes( + fid, num_bytes=64, format_char_sequence="idddddddi" + ) + image_id = binary_image_properties[0] + qvec = np.array(binary_image_properties[1:5]) + tvec = np.array(binary_image_properties[5:8]) + camera_id = binary_image_properties[8] + image_name = "" + current_char = read_next_bytes(fid, 1, "c")[0] + while current_char != b"\x00": # look for the ASCII 0 entry + image_name += current_char.decode("utf-8") + current_char = read_next_bytes(fid, 1, "c")[0] + num_points2D = read_next_bytes( + fid, num_bytes=8, format_char_sequence="Q" + )[0] + x_y_id_s = read_next_bytes( + fid, + num_bytes=24 * num_points2D, + format_char_sequence="ddq" * num_points2D, + ) + xys = np.column_stack( + [ + tuple(map(float, x_y_id_s[0::3])), + tuple(map(float, x_y_id_s[1::3])), + ] + ) + point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) + images[image_id] = Image( + id=image_id, + qvec=qvec, + tvec=tvec, + camera_id=camera_id, + name=image_name, + xys=xys, + point3D_ids=point3D_ids, + ) + return images + + +def write_images_text(images, path): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadImagesText(const std::string& path) + void Reconstruction::WriteImagesText(const std::string& path) + """ + if len(images) == 0: + mean_observations = 0 + else: + mean_observations = sum( + (len(img.point3D_ids) for _, img in images.items()) + ) / len(images) + HEADER = ( + "# Image list with two lines of data per image:\n" + + "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + + "# POINTS2D[] as (X, Y, POINT3D_ID)\n" + + "# Number of images: {}, mean observations per image: {}\n".format( + len(images), mean_observations + ) + ) + + with open(path, "w") as fid: + fid.write(HEADER) + for _, img in images.items(): + image_header = [ + img.id, + *img.qvec, + *img.tvec, + img.camera_id, + img.name, + ] + first_line = " ".join(map(str, image_header)) + fid.write(first_line + "\n") + + points_strings = [] + for xy, point3D_id in zip(img.xys, img.point3D_ids): + points_strings.append(" ".join(map(str, [*xy, point3D_id]))) + fid.write(" ".join(points_strings) + "\n") + + +def write_images_binary(images, path_to_model_file): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadImagesBinary(const std::string& path) + void Reconstruction::WriteImagesBinary(const std::string& path) + """ + with open(path_to_model_file, "wb") as fid: + write_next_bytes(fid, len(images), "Q") + for _, img in images.items(): + write_next_bytes(fid, img.id, "i") + write_next_bytes(fid, img.qvec.tolist(), "dddd") + write_next_bytes(fid, img.tvec.tolist(), "ddd") + write_next_bytes(fid, img.camera_id, "i") + for char in img.name: + write_next_bytes(fid, char.encode("utf-8"), "c") + write_next_bytes(fid, b"\x00", "c") + write_next_bytes(fid, len(img.point3D_ids), "Q") + for xy, p3d_id in zip(img.xys, img.point3D_ids): + write_next_bytes(fid, [*xy, p3d_id], "ddq") + + +def read_points3D_text(path): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadPoints3DText(const std::string& path) + void Reconstruction::WritePoints3DText(const std::string& path) + """ + points3D = {} + with open(path, "r") as fid: + while True: + line = fid.readline() + if not line: + break + line = line.strip() + if len(line) > 0 and line[0] != "#": + elems = line.split() + point3D_id = int(elems[0]) + xyz = np.array(tuple(map(float, elems[1:4]))) + rgb = np.array(tuple(map(int, elems[4:7]))) + error = float(elems[7]) + image_ids = np.array(tuple(map(int, elems[8::2]))) + point2D_idxs = np.array(tuple(map(int, elems[9::2]))) + points3D[point3D_id] = Point3D( + id=point3D_id, + xyz=xyz, + rgb=rgb, + error=error, + image_ids=image_ids, + point2D_idxs=point2D_idxs, + ) + return points3D + + +def read_points3D_binary(path_to_model_file): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadPoints3DBinary(const std::string& path) + void Reconstruction::WritePoints3DBinary(const std::string& path) + """ + points3D = {} + with open(path_to_model_file, "rb") as fid: + num_points = read_next_bytes(fid, 8, "Q")[0] + for _ in range(num_points): + binary_point_line_properties = read_next_bytes( + fid, num_bytes=43, format_char_sequence="QdddBBBd" + ) + point3D_id = binary_point_line_properties[0] + xyz = np.array(binary_point_line_properties[1:4]) + rgb = np.array(binary_point_line_properties[4:7]) + error = np.array(binary_point_line_properties[7]) + track_length = read_next_bytes( + fid, num_bytes=8, format_char_sequence="Q" + )[0] + track_elems = read_next_bytes( + fid, + num_bytes=8 * track_length, + format_char_sequence="ii" * track_length, + ) + image_ids = np.array(tuple(map(int, track_elems[0::2]))) + point2D_idxs = np.array(tuple(map(int, track_elems[1::2]))) + points3D[point3D_id] = Point3D( + id=point3D_id, + xyz=xyz, + rgb=rgb, + error=error, + image_ids=image_ids, + point2D_idxs=point2D_idxs, + ) + return points3D + + +def write_points3D_text(points3D, path): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadPoints3DText(const std::string& path) + void Reconstruction::WritePoints3DText(const std::string& path) + """ + if len(points3D) == 0: + mean_track_length = 0 + else: + mean_track_length = sum( + (len(pt.image_ids) for _, pt in points3D.items()) + ) / len(points3D) + HEADER = ( + "# 3D point list with one line of data per point:\n" + + "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + + "# Number of points: {}, mean track length: {}\n".format( + len(points3D), mean_track_length + ) + ) + + with open(path, "w") as fid: + fid.write(HEADER) + for _, pt in points3D.items(): + point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error] + fid.write(" ".join(map(str, point_header)) + " ") + track_strings = [] + for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs): + track_strings.append(" ".join(map(str, [image_id, point2D]))) + fid.write(" ".join(track_strings) + "\n") + + +def write_points3D_binary(points3D, path_to_model_file): + """ + see: src/colmap/scene/reconstruction.cc + void Reconstruction::ReadPoints3DBinary(const std::string& path) + void Reconstruction::WritePoints3DBinary(const std::string& path) + """ + with open(path_to_model_file, "wb") as fid: + write_next_bytes(fid, len(points3D), "Q") + for _, pt in points3D.items(): + write_next_bytes(fid, pt.id, "Q") + write_next_bytes(fid, pt.xyz.tolist(), "ddd") + write_next_bytes(fid, pt.rgb.tolist(), "BBB") + write_next_bytes(fid, pt.error, "d") + track_length = pt.image_ids.shape[0] + write_next_bytes(fid, track_length, "Q") + for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs): + write_next_bytes(fid, [image_id, point2D_id], "ii") + + +def detect_model_format(path, ext): + if ( + os.path.isfile(os.path.join(path, "cameras" + ext)) + and os.path.isfile(os.path.join(path, "images" + ext)) + and os.path.isfile(os.path.join(path, "points3D" + ext)) + ): + print("Detected model format: '" + ext + "'") + return True + + return False + + +def read_model(path, ext=""): + # try to detect the extension automatically + if ext == "": + if detect_model_format(path, ".bin"): + ext = ".bin" + elif detect_model_format(path, ".txt"): + ext = ".txt" + else: + print("Provide model format: '.bin' or '.txt'") + return + + if ext == ".txt": + cameras = read_cameras_text(os.path.join(path, "cameras" + ext)) + images = read_images_text(os.path.join(path, "images" + ext)) + points3D = read_points3D_text(os.path.join(path, "points3D") + ext) + else: + cameras = read_cameras_binary(os.path.join(path, "cameras" + ext)) + images = read_images_binary(os.path.join(path, "images" + ext)) + points3D = read_points3D_binary(os.path.join(path, "points3D") + ext) + return cameras, images, points3D + + +def write_model(cameras, images, points3D, path, ext=".bin"): + if ext == ".txt": + write_cameras_text(cameras, os.path.join(path, "cameras" + ext)) + write_images_text(images, os.path.join(path, "images" + ext)) + write_points3D_text(points3D, os.path.join(path, "points3D") + ext) + else: + write_cameras_binary(cameras, os.path.join(path, "cameras" + ext)) + write_images_binary(images, os.path.join(path, "images" + ext)) + write_points3D_binary(points3D, os.path.join(path, "points3D") + ext) + return cameras, images, points3D + + +def qvec2rotmat(qvec): + return np.array( + [ + [ + 1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2, + 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], + 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2], + ], + [ + 2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], + 1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2, + 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1], + ], + [ + 2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], + 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], + 1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2, + ], + ] + ) + + +def rotmat2qvec(R): + Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat + K = ( + np.array( + [ + [Rxx - Ryy - Rzz, 0, 0, 0], + [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], + [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], + [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz], + ] + ) + / 3.0 + ) + eigvals, eigvecs = np.linalg.eigh(K) + qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] + if qvec[0] < 0: + qvec *= -1 + return qvec + + +def main(): + parser = argparse.ArgumentParser( + description="Read and write COLMAP binary and text models" + ) + parser.add_argument("--input_model", help="path to input model folder") + parser.add_argument( + "--input_format", + choices=[".bin", ".txt"], + help="input model format", + default="", + ) + parser.add_argument("--output_model", help="path to output model folder") + parser.add_argument( + "--output_format", + choices=[".bin", ".txt"], + help="outut model format", + default=".txt", + ) + args = parser.parse_args() + + cameras, images, points3D = read_model( + path=args.input_model, ext=args.input_format + ) + + print("num_cameras:", len(cameras)) + print("num_images:", len(images)) + print("num_points3D:", len(points3D)) + + if args.output_model is not None: + write_model( + cameras, + images, + points3D, + path=args.output_model, + ext=args.output_format, + ) + +def images_points_scale(root, scale_factor): + images_path = os.path.join(root, 'images.bin') + new_images_path = os.path.join(root, 'images_scaled.bin') + points3D_path = os.path.join(root, 'points3D.bin') + new_points3D_path = os.path.join(root, 'points3D_scaled.bin') + + images = read_images_binary(images_path) + points = read_points3D_binary(points3D_path) + for k, v in images.items(): + new_image =v._replace(tvec=v.tvec*scale_factor) + images[k] = new_image + write_images_binary(images, new_images_path) + + for k, v in points.items(): + new_point =v._replace(xyz=v.xyz*scale_factor) + points[k] = new_point + write_points3D_binary(points, new_points3D_path) + + +# if __name__ == "__main__": +# root = "/home/ubuntu/Downloads/outputs/dense/sparse" +# images_points_scale(root, 1000) diff --git a/render_reconeval.py b/render_reconeval.py new file mode 100644 index 0000000..2a913cf --- /dev/null +++ b/render_reconeval.py @@ -0,0 +1,101 @@ +# +# Copyright (C) 2023, Inria +# GRAPHDECO research group, https://team.inria.fr/graphdeco +# All rights reserved. +# +# This software is free for non-commercial, research and evaluation use +# under the terms of the LICENSE.md file. +# +# For inquiries contact george.drettakis@inria.fr +# +import numpy as np +import torch +from scene import Scene +import os +import shutil +from tqdm import tqdm +from os import makedirs +from gaussian_renderer import render +import torchvision +from utils.general_utils import safe_state +from argparse import ArgumentParser +from arguments import ModelParams, PipelineParams, get_combined_args +from gaussian_renderer import GaussianModel +import read_write_binary as im + +def render_set(model_path, name, iteration, views, gaussians, pipeline, background): + render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders") + gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt") + + makedirs(render_path, exist_ok=True) + makedirs(gts_path, exist_ok=True) + + for idx, view in enumerate(tqdm(views, desc="Rendering progress")): + rendering = render(view, gaussians, pipeline, background)["render"] + gt = view.original_image[0:3, :, :] + # image = view.image_name + torchvision.utils.save_image(rendering, os.path.join(render_path, '{}.png'.format(view.image_name))) + torchvision.utils.save_image(gt, os.path.join(gts_path, '{}.png'.format(view.image_name))) + +def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool): + with torch.no_grad(): + gaussians = GaussianModel(dataset.sh_degree) + scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False,override_quantization=True) + + bg_color = [1,1,1] if dataset.white_background else [0, 0, 0] + background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") + + if not skip_train: + render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background) + + if not skip_test: + render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background) + +if __name__ == "__main__": + # Set up command line argument parser + parser = ArgumentParser(description="Testing script parameters") + model = ModelParams(parser, sentinel=True) + pipeline = PipelineParams(parser) + parser.add_argument("--iteration", default=-1, type=int) + parser.add_argument("--skip_train", action="store_true") + parser.add_argument("--skip_test", action="store_true") + parser.add_argument("--quiet", action="store_true") + args = get_combined_args(parser) + + # Initialize system state (RNG) + safe_state(args.quiet) + + if os.path.exists(args.source_path): + shutil.rmtree(args.source_path) + os.mkdir(args.source_path) + else: + os.mkdir(args.source_path) + + shutil.copytree(os.path.join(args.source_path, "../sparse"), os.path.join(args.source_path, "sparse")) + shutil.copytree(os.path.join(args.source_path, "../images"), os.path.join(args.source_path, "images")) + shutil.copy(os.path.join(args.source_path, "../test_aligned_pose.txt"), + os.path.join(args.source_path, "test_aligned_pose.txt")) + data = im.read_images_binary(os.path.join(args.source_path, "sparse", "images.bin")) + image = data[1] + new_data = {} + with open(os.path.join(args.source_path, "test_aligned_pose.txt"), "r") as f: + lines = f.readlines() + for line in lines: + line = line.strip() + n, tx, ty, tz, qx, qy, qz, qw = line.split(" ") + name = "{}.png".format(n) + if not os.path.exists(os.path.join(args.source_path, "images", name)): + images = [i for i in os.listdir(os.path.join(args.source_path, "images")) if ".png" in i] + shutil.copy(os.path.join(os.path.join(args.source_path, "images", images[0])), + os.path.join(os.path.join(args.source_path, "images", name))) + i = int(n) + qvec = [float(i) for i in [qw, qx, qy, qz]] + tvec = [float(i) for i in [tx, ty, tz]] + #image = data[1] + image = image._replace(id=i, qvec=np.array(qvec), tvec=np.array(tvec), name=name) + #data[1 + i] = image + new_data[i] = image + print(len(new_data)) + im.write_images_binary(new_data, os.path.join(args.source_path, "sparse/0", "images.bin")) + + render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test) \ No newline at end of file diff --git a/run_train_ours.py b/run_train_ours.py index 51762f7..cbca27e 100644 --- a/run_train_ours.py +++ b/run_train_ours.py @@ -1,11 +1,46 @@ # python train.py --source_path ../../Dataset/3DGS_Dataset/linggongtang --model_path output/linggongtang --data_device 'cpu' --eval --resolution 1 -# scene: {'kejiguan': 'cuda', 'wanfota': 'cuda', 'zhiwu': 'cuda', 'linggongtang': 'cpu', 'xiangjiadang': 'cpu', 'sipingguzhai': 'cpu'} +# scene: {'kejiguan': 'cuda', 'wanfota': 'cuda', 'zhiwu': 'cuda', 'linggongtang': 'cuda', 'xiangjiadang': 'cuda', 'town-train-cpy': 'cuda', 'town2-train-cpy': 'cuda', 'sipingguzhai': 'cpu'} # device = cuda: 科技馆、万佛塔、植物 # = cpu: 凌公塘、湘家荡、寺平古宅 import os -for cuda, scene in enumerate({'linggongtang': 'cpu', 'xiangjiadang': 'cpu', 'sipingguzhai': 'cpu'}.items()): +# for idx, scene in enumerate({'town-train': 'cuda', 'town2-train': 'cuda', 'building1-train': 'cuda'}.items()): +# print('---------------------------------------------------------------------------------') +# one_cmd = f'python train.py --source_path /data2/lpl/data/carla-dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --checkpoint_iterations 30000' +# print(one_cmd) +# os.system(one_cmd) +# +# # python render.py -m +# for idx, scene in enumerate(['town-train-cpy', 'town2-train-cpy', 'building1-train']): +# print('---------------------------------------------------------------------------------') +# one_cmd = f'python render.py -m output/{scene}' +# print(one_cmd) +# os.system(one_cmd) +# +# # python metrics.py -m +# for idx, scene in enumerate(['town-train-cpy', 'town2-train-cpy', 'building1-train']): +# print('---------------------------------------------------------------------------------') +# one_cmd = f'python metrics.py -m output/{scene}' +# print(one_cmd) +# os.system(one_cmd) + +for idx, scene in enumerate({'building2-train': 'cpu', 'building3-train': 'cuda'}.items()): print('---------------------------------------------------------------------------------') - one_cmd = f'python train.py --source_path ../../Dataset/3DGS_Dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --eval' + one_cmd = f'python train.py --source_path /data2/lpl/data/carla-dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --checkpoint_iterations 30000 --port 6009' + print(one_cmd) + os.system(one_cmd) + +# python render.py -m +for idx, scene in enumerate(['building2-train', 'building3-train']): + print('---------------------------------------------------------------------------------') + one_cmd = f'python render.py -m output/{scene}' + print(one_cmd) + os.system(one_cmd) + +# python metrics.py -m +for idx, scene in enumerate(['building2-train', 'building3-train']): + print('---------------------------------------------------------------------------------') + one_cmd = f'python metrics.py -m output/{scene}' + print(one_cmd) os.system(one_cmd) \ No newline at end of file diff --git a/scene/__init__.py b/scene/__init__.py index 8622629..4ab553c 100644 --- a/scene/__init__.py +++ b/scene/__init__.py @@ -26,32 +26,33 @@ class Scene: def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0]): """ - 初始化场景对象 - :param args: 包含模型路径和源路径等模型参数 - :param gaussians: 高斯模型对象,用于场景点的3D表示 - :param load_iteration: 指定加载模型的迭代次数,如果不为None且为-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,且不为-1,则加载指定迭代次数的 - :param shuffle: 是否在训练前打乱相机列表 - :param resolution_scales: 分辨率比例列表,用于处理不同分辨率的相机 + 初始化3D场景对象 + + args: 存储着与 GaussianMoedl 相关参数 的args,即包含scene/__init__.py/ModelParams()中的参数 + gaussians: 3D高斯模型对象,用于场景点的3D表示 + + load_iteration: 指定加载模型的迭代次数,如果是-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型;如果不是None且不是-1,则加载指定迭代次数的 + shuffle: 是否在训练前打乱相机列表 + resolution_scales: 分辨率比例列表,用于处理不同分辨率的相机 """ self.model_path = args.model_path # 模型文件保存路径 self.loaded_iter = None # 已加载的迭代次数 self.gaussians = gaussians # 高斯模型对象 - # 检查并加载已有的训练模型 + # 如果已有训练模型,则加载 if load_iteration: - # 不为None if load_iteration == -1: - # 且为-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,记录最大迭代次数 + # 是-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,记录最大迭代次数 self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud")) else: - # 不为-1,则加载指定迭代次数的 + # 不是None且不是-1,则加载指定迭代次数的 self.loaded_iter = load_iteration print("Loading trained model at iteration {}".format(self.loaded_iter)) - self.train_cameras = {} # 用于训练的相机参数 - self.test_cameras = {} # 用于测试的相机参数 + self.train_cameras = {} # 用于训练的相机 + self.test_cameras = {} # 用于测试的相机 - # 判断数据集类型是COLMAP的输出,还是Blender得输出,并从中加载场景信息 + # 从COLMAP或Blender的输出结果中构建 场景信息(包括点云、训练用相机、测试用相机、场景归一化参数和点云文件路径) if os.path.exists(os.path.join(args.source_path, "sparse")): scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval) elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")): @@ -60,28 +61,33 @@ class Scene: else: assert False, "Could not recognize scene type!" - # loaded_iter = None,模型还未训练过, if not self.loaded_iter: + # 如果没有加载模型,则将点云文件point3D.ply文件复制到input.ply文件 with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file: dest_file.write(src_file.read()) + json_cams = [] camlist = [] if scene_info.test_cameras: + # 测试相机添加到 camlist 中 camlist.extend(scene_info.test_cameras) if scene_info.train_cameras: + # 训练相机添加到 camlist 中 camlist.extend(scene_info.train_cameras) + # 遍历 camlist 中的所有相机,使用 camera_to_JSON 函数将每个相机转换为 JSON 格式,并添加到 json_cams 列表中,并将 json_cams 写入 cameras.json 文件中 for id, cam in enumerate(camlist): json_cams.append(camera_to_JSON(id, cam)) with open(os.path.join(self.model_path, "cameras.json"), 'w') as file: json.dump(json_cams, file) if shuffle: + # 随机打乱训练和测试相机列表 random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling self.cameras_extent = scene_info.nerf_normalization["radius"] - # 根据resolution_scales加载不同分辨率的训练和测试位姿 + # 根据resolution_scales加载不同分辨率的训练和测试相机(包含R、T、视场角) for resolution_scale in resolution_scales: print("Loading Training Cameras") self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args) @@ -89,13 +95,13 @@ class Scene: self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args) if self.loaded_iter: - # 直接读取对应(已经迭代出来的)场景 + # 如果加载已训练模型,则直接读取对应(已经迭代出来的)场景 self.gaussians.load_ply(os.path.join(self.model_path, "point_cloud", "iteration_" + str(self.loaded_iter), "point_cloud.ply")) else: - # loaded_iter = None,模型还未训练过,调用GaussianModel.create_from_pcd从scene_info.point_cloud中建立模型 + # 不加载训练模型,则调用 GaussianModel.create_from_pcd 从稀疏点云 scene_info.point_cloud 中建立模型 self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent) def save(self, iteration): diff --git a/scene/colmap_loader.py b/scene/colmap_loader.py index 8f6fba6..5283615 100644 --- a/scene/colmap_loader.py +++ b/scene/colmap_loader.py @@ -41,6 +41,9 @@ CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model) def qvec2rotmat(qvec): + ''' + 四元数qvec=[w, x, y, z] 转 旋转矩阵 + ''' return np.array([ [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], @@ -129,7 +132,6 @@ def read_points3D_binary(path_to_model_file): void Reconstruction::WritePoints3DBinary(const std::string& path) """ - with open(path_to_model_file, "rb") as fid: num_points = read_next_bytes(fid, 8, "Q")[0] diff --git a/scene/dataset_readers.py b/scene/dataset_readers.py index 789e4bf..56a688b 100644 --- a/scene/dataset_readers.py +++ b/scene/dataset_readers.py @@ -66,28 +66,34 @@ def getNerfppNorm(cam_info): return {"translate": translate, "radius": radius} def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): - cam_infos = [] # 初始化用于存储相机信息的列表 + ''' + + cam_extrinsics: 存储每张图片相机的外参类Imgae 的字典 + cam_intrinsics: 存储每张图片相机的内参类Camera 的字典 + images_folder: 保存原图的文件夹路径 + ''' + # 初始化存储相机信息类CameraInfo对象的列表 + cam_infos = [] # 遍历所有相机的外参 for idx, key in enumerate(cam_extrinsics): # 动态显示读取相机信息的进度 - sys.stdout.write('\r') - # the exact output you're looking for: + sys.stdout.write('\r') # 光标回到当前行的最前面 sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics))) - sys.stdout.flush() + sys.stdout.flush() # 立即将缓冲区中的内容输出到控制台 # 获取当前相机的外参和内参 - extr = cam_extrinsics[key] # 当前相机的外参 - intr = cam_intrinsics[extr.camera_id] # 根据外参中的camera_id找到对应的内参 - height = intr.height # 相机图片的高度 - width = intr.width # 相机图片的宽度 + extr = cam_extrinsics[key] # 当前相机的外参类Imgae对象 + intr = cam_intrinsics[extr.camera_id] # 根据外参中的camera_id找到对应的内参类对象 + height = intr.height # 图片高度 + width = intr.width # 图片宽度 uid = intr.id # 相机的唯一标识符 - R = np.transpose(qvec2rotmat(extr.qvec)) # 将四元数表示的旋转转换为旋转矩阵R - T = np.array(extr.tvec) # 外参中的平移向量 + R = np.transpose(qvec2rotmat(extr.qvec)) # 将旋转四元数 转为 旋转矩阵 R,并转置 + T = np.array(extr.tvec) # 平移向量 - # 根据相机内参模型计算视场角(FoV) + # 根据相机内参模型计算 视场角(FoV) if intr.model=="SIMPLE_PINHOLE": # 如果是简单针孔模型,只有一个焦距参数 focal_length_x = intr.params[0] @@ -97,14 +103,13 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): # 如果是针孔模型,有两个焦距参数 focal_length_x = intr.params[0] focal_length_y = intr.params[1] - FovY = focal2fov(focal_length_y, height) # 使用y方向的焦距计算垂直视场角 - FovX = focal2fov(focal_length_x, width) # 使用x方向的焦距计算水平视场角 + FovY = focal2fov(focal_length_y, height) # 使用fy计算垂直视场角 + FovX = focal2fov(focal_length_x, width) # 使用fx计算水平视场角 elif intr.model=="SIMPLE_RADIAL": # 如果是针孔模型,有两个焦距参数 focal_length_x = intr.params[0] - focal_length_y = intr.params[1] - FovY = focal2fov(focal_length_y, height) # 使用y方向的焦距计算垂直视场角 - FovX = focal2fov(focal_length_x, width) # 使用x方向的焦距计算水平视场角 + FovY = focal2fov(focal_length_x, height) # 使用fy计算垂直视场角 + FovX = focal2fov(focal_length_x, width) # 使用fx计算水平视场角 else: # 如果不是以上两种模型,抛出错误 assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!" @@ -116,6 +121,7 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): continue image = Image.open(image_path) + # 创建相机信息类CameraInfo对象 (包含旋转矩阵、平移向量、视场角、图像数据、图片路径、图片名、宽度、高度),并添加到列表cam_infos中 cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image, image_path=image_path, image_name=image_name, width=width, height=height) cam_infos.append(cam_info) @@ -123,7 +129,6 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): sys.stdout.write('\n') print("valid Colmap camera size: {}".format(len(cam_infos))) - # 返回整理好的相机信息列表 return cam_infos def fetchPly(path): @@ -153,30 +158,38 @@ def storePly(path, xyz, rgb): ply_data = PlyData([vertex_element]) ply_data.write(path) -# 尝试读取COLMAP处理结果中的二进制相机外参文件imags.bin 和 内参文件cameras.bin def readColmapSceneInfo(path, images, eval, llffhold=8): + ''' + 加载COLMAP的结果中的二进制相机外参文件imags.bin 和 内参文件cameras.bin + + path: GaussianModel中的源文件路径 + images: 'images' + eval: 是否为eval模式 + llffhold: 默认为8 + ''' + try: cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin") cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin") cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file) cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file) except: - # 如果二进制文件读取失败,尝试读取文本格式的相机外参和内参文件 + # 如果bin文件读取失败,尝试读取txt格式的相机外参和内参文件 cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt") cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt") cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file) cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file) - # 定义存放图片的目录,如果未指定则默认为"images" + # 存储原图片的文件夹名,默认为'images',要从中读取图片 reading_dir = "images" if images == None else images - # 读取并处理相机参数,转换为内部使用的格式 + # 根据每个相机的内、外参,构建CameraInfo类的对象 (包含旋转矩阵、平移向量、视场角、图像数据、图片路径、图片名、宽度、高度),存储cam_infos_unsorted列表中 cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir)) - # 根据图片名称对相机信息进行排序,以保证顺序一致性 + # 根据图片名称排序,以保证顺序一致性 cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : (x.image_path.split('/')[-2], int(x.image_name))) # 根据是否为评估模式(eval),将相机分为训练集和测试集 - # 如果为评估模式,根据llffhold参数(通常用于LLFF数据集)间隔选择测试相机 + # 如果为评估模式,每llffhold张图片取一张作为测试集 if eval: train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0] test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0] @@ -188,7 +201,7 @@ def readColmapSceneInfo(path, images, eval, llffhold=8): # 计算场景归一化参数,这是为了处理不同尺寸和位置的场景,使模型训练更稳定 nerf_normalization = getNerfppNorm(train_cam_infos) - # 尝试读取点云数据,优先从PLY文件读取,如果不存在,则尝试从BIN或TXT文件转换并保存为PLY格式 + # 尝试读取COLMAP生成的稀疏点云数据,优先从PLY文件读取,如果不存在,则尝试从BIN或TXT文件转换并保存为PLY格式 ply_path = os.path.join(path, "sparse/0/points3D.ply") bin_path = os.path.join(path, "sparse/0/points3D.bin") txt_path = os.path.join(path, "sparse/0/points3D.txt") @@ -198,9 +211,12 @@ def readColmapSceneInfo(path, images, eval, llffhold=8): xyz, rgb, _ = read_points3D_binary(bin_path) # 从points3D.bin读取COLMAP产生的稀疏点云 except: xyz, rgb, _ = read_points3D_text(txt_path) + storePly(ply_path, xyz, rgb) # 转换成ply文件 + try: - pcd = fetchPly(ply_path) + pcd = fetchPly(ply_path) # points3D.ply读取COLMAP产生的稀疏点云 + except: pcd = None @@ -293,4 +309,4 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"): sceneLoadTypeCallbacks = { "Colmap": readColmapSceneInfo, "Blender" : readNerfSyntheticInfo -} \ No newline at end of file +} diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py index 3c8b592..3e31289 100644 --- a/scene/gaussian_model.py +++ b/scene/gaussian_model.py @@ -25,10 +25,9 @@ class GaussianModel: def setup_functions(self): """ - 定义和初始化一些用于处理3D高斯模型参数的函数 + 定义和初始化处理高斯体模型参数的 激活函数 """ - - # 定义构建3D高斯协方差矩阵的函数 + # 定义 从尺度、旋转构建3D高斯的 协方差矩阵 的函数 def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation): L = build_scaling_rotation(scaling_modifier * scaling, rotation) # 从尺度、尺度的缩放、旋转得到L矩阵 actual_covariance = L @ L.transpose(1, 2) # 计算实际的协方差矩阵 @@ -49,29 +48,30 @@ class GaussianModel: def __init__(self, sh_degree : int): """ - 初始化3D高斯模型的参数 - sh_degree: 球谐函数的最大阶数,用于控制颜色表示的复杂度 + 初始化3D高斯模型的参数 + sh_degree: 设定的 球谐函数的最大阶数,默认为3,用于控制颜色表示的复杂度 """ - # 初始化球谐阶数和最大球谐阶数j self.active_sh_degree = 0 # 当前激活的球谐阶数,初始为0 - self.max_sh_degree = sh_degree # 允许的最大球谐阶数 + self.max_sh_degree = sh_degree # 允许的最大球谐阶数j # 初始化3D高斯模型的各项参数 - self._xyz = torch.empty(0) # 3D高斯的中心位置(均值) + self._xyz = torch.empty(0) # 3D高斯的 中心位置(均值) self._features_dc = torch.empty(0) # 第一个球谐系数,用于表示基础颜色 - self._features_rest = torch.empty(0) # 其余的球谐系数,用于表示颜色的细节和变化 - self._scaling = torch.empty(0) # 3D高斯的尺度参数,控制高斯的形状 - self._rotation = torch.empty(0) # 3D高斯的旋转参数(一系列四元数) + self._features_rest = torch.empty(0) # 其余球谐系数,用于表示颜色的细节和变化 + self._scaling = torch.empty(0) # 3D高斯的尺度,控制高斯的形状 + self._rotation = torch.empty(0) # 3D高斯的旋转(一系列四元数) self._opacity = torch.empty(0) # 3D高斯的不透明度(sigmoid前的),控制可见性 self.max_radii2D = torch.empty(0) # 在2D投影中,每个高斯的最大半径 + self.xyz_gradient_accum = torch.empty(0) # 累积3D高斯中心位置的梯度,当它太大的时候要对Gaussian进行分裂,小时代表under要复制 self.denom = torch.empty(0) # 与累积梯度配合使用,表示统计了多少次累积梯度,算平均梯度时除掉这个(denom = denominator,分母) + self.optimizer = None # 优化器,用于调整上述参数以改进模型(论文中采用Adam,见附录B Algorithm 1的伪代码) self.percent_dense = 0 # 控制Gaussian密集程度的超参数 self.spatial_lr_scale = 0 # 位置坐标的学习率要乘上这个,抵消在不同尺度下应用同一个学习率带来的问题 - # 调用setup_functions来初始化一些处理函数 + # 调用 setup_functions,初始化处理高斯体模型参数的 激活函数 self.setup_functions() def capture(self): @@ -139,24 +139,23 @@ class GaussianModel: def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float): """ - 从点云数据初始化模型参数 - - :param pcd: 稀疏点云数据,包含点的位置和颜色 - :param spatial_lr_scale: 空间学习率缩放因子,影响 位置坐标参数的学习率 + 从稀疏点云数据 初始化模型参数 + pcd: 稀疏点云,包含点的位置和颜色 + spatial_lr_scale: 空间学习率缩放因子,影响 位置坐标参数的学习率 """ # 根据scene.Scene.__init__ 以及 scene.dataset_readers.SceneInfo.nerf_normalization,即scene.dataset_readers.getNerfppNorm的代码, # 这个值似乎是训练相机中离它们的坐标平均值(即中心)最远距离的1.1倍,根据命名推断应该与学习率有关,防止固定的学习率适配不同尺度的场景时出现问题。 self.spatial_lr_scale = spatial_lr_scale - # 将点云的位置和颜色数据从numpy数组转换为PyTorch张量,并传送到CUDA设备上 + # 将点云的 位置 和 颜色 数据从numpy数组转换为PyTorch张量,并传送到CUDA设备上 fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda() # 稀疏点云的3D坐标,大小为(P, 3) fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda()) # 球谐的直流分量,大小为(P, 3), # RGB2SH(x) = (x - 0.5) / 0.28209479177387814看样子pcd.colors的原始范围应该是0到1。0.28209479177387814是1 / (2*sqrt(pi)),是直流分量Y(l=0,m=0)的值 - # 初始化存储球谐系数的张量,RGB三通道球谐的所有系数,每个通道有(max_sh_degree + 1) ** 2个球谐系数 + # 初始化存储 球谐系数 的张量,RGB三通道球谐的所有系数,每个通道有(max_sh_degree + 1) ** 2个球谐系数 features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda() # (P, 3, 16) - features[:, :3, 0 ] = fused_color # 将RGB转换后的球谐系数C0项的系数(直流分量)存入 + features[:, :3, 0 ] = fused_color # 将RGB转换后的球谐系数C0项的系数(直流分量)存入每个3D点的直流分量球谐系数中 features[:, 3:, 1:] = 0.0 # 其余球谐系数初始化为0 # 打印初始点的数量 diff --git a/train.py b/train.py index 936559c..fdd10a7 100644 --- a/train.py +++ b/train.py @@ -34,18 +34,21 @@ except ImportError: def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from): ''' - dataset: 只存储与Moedl相关参数的args - opt: 优化相关参数 - pipe: 管道相关参数 + dataset: 存储着与 GaussianMoedl 相关参数 的args + opt: 存储着与 优化 相关参数 的args + pipe: 存储着与 管道相关参数 的args checkpoint: 已训练模型的路径 debug_from: 从哪一个迭代开始debug ''' first_iter = 0 - # 创建保存结果的文件夹,并保存模型相关的参数到cfg_args文件;尝试创建tensorboard_writer,记录训练过程 + # 创建保存结果的文件夹output/scene,并保存模型相关的参数到cfg_args文件;尝试创建tensorboard_writer,记录训练过程 tb_writer = prepare_output_and_logger(dataset) - gaussians = GaussianModel(dataset.sh_degree) # 创建初始化高斯模型,用于表示场景中的每个点的3D高斯分布 - scene = Scene(dataset, gaussians) # 创建初始3D场景对象,加载数据集和对应的相机参数 + # 创建高斯模型对象,用于表示场景中的每个点的3D高斯分布 + gaussians = GaussianModel(dataset.sh_degree) + # 创建初始3D场景对象,加载数据集和对应的相机参数 + scene = Scene(dataset, gaussians) + gaussians.training_setup(opt) # 为高斯模型参数设置优化器和学习率调度器 # 如果提供了checkpoint,则从checkpoint加载模型参数并恢复训练进度 @@ -131,7 +134,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi print("\n[ITER {}] Saving Gaussians".format(iteration)) scene.save(iteration) - # 在指定迭代区间内,对3D高斯模型进行增密和修剪,Densification + # Densification,在指定迭代区间内,对3D高斯模型进行增密和修剪 if iteration < opt.densify_until_iter: # Keep track of max radii in image-space for pruning gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter]) @@ -144,7 +147,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter): gaussians.reset_opacity() - # 执行优化器的一步,并准备下一次迭代,Optimizer step + # Optimizer step,执行优化器的一步,并准备下一次迭代 if iteration < opt.iterations: gaussians.optimizer.step() gaussians.optimizer.zero_grad(set_to_none = True) @@ -219,12 +222,13 @@ if __name__ == "__main__": # Set up command line argument parser parser = ArgumentParser(description="Training script parameters") - model_prams = ModelParams(parser) # 定义存储 模型 相关参数的arg对象 - optim_prams = OptimizationParams(parser) # 定义存储 优化 相关参数的arg对象 - pipeline_prams = PipelineParams(parser) # 定义存储 渲染 相关参数的arg对象 + # 创建 模型、优化、渲染 相关参数的对象 + lp = ModelParams(parser) + op = OptimizationParams(parser) + pp = PipelineParams(parser) parser.add_argument('--ip', type=str, default="127.0.0.1") - parser.add_argument('--port', type=int, default=6009) + parser.add_argument('--port', type=int, default=6007) parser.add_argument('--debug_from', type=int, default=-1) # 指定从哪一迭代(>= 0)开始debug parser.add_argument('--detect_anomaly', action='store_true', default=False) # action='store_true' 如果命令行中包含了这个参数,它的值将被设置为 True parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000]) @@ -233,7 +237,7 @@ if __name__ == "__main__": parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[30_000]) parser.add_argument("--start_checkpoint", type=str, default = None) - # 从命令行参数中解析出所有的参数值,并与上面设置的参数一起存储到 Namespace 对象中,即args + # 从命令行参数覆盖parser内的参数,并存储到args args = parser.parse_args(sys.argv[1:]) args.save_iterations.append(args.iterations) @@ -248,8 +252,12 @@ if __name__ == "__main__": torch.autograd.set_detect_anomaly(args.detect_anomaly) # 设置pytorch是否检测梯度异常 - # model_prams.extract(args):将args中的属性,即命令行和预设的参数中 与 ModelParams类中定义的参数相匹配的值,并将它们封装到一个新的 GroupParams 对象中 - training(model_prams.extract(args), optim_prams.extract(args), pipeline_prams.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from) + # lp.extract(args):args中参数 覆盖 模型、优化、渲染 的参数,并形成新的args + lp_args = lp.extract(args) + op_args = op.extract(args) + pp_args = pp.extract(args) + + training(lp_args, op_args, pp_args, args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from) # All done print("\nTraining complete.") diff --git a/utils/camera_utils.py b/utils/camera_utils.py index 1a54d0a..2451104 100644 --- a/utils/camera_utils.py +++ b/utils/camera_utils.py @@ -52,6 +52,11 @@ def loadCam(args, id, cam_info, resolution_scale): image_name=cam_info.image_name, uid=id, data_device=args.data_device) def cameraList_from_camInfos(cam_infos, resolution_scale, args): + ''' + cam_infos: 训练或测试相机对象列表 + resolution_scale: 不同分辨率列表 + args: 高斯模型参数 + ''' camera_list = [] for id, c in enumerate(cam_infos): diff --git a/utils/graphics_utils.py b/utils/graphics_utils.py index b4627d8..68a8e6f 100644 --- a/utils/graphics_utils.py +++ b/utils/graphics_utils.py @@ -74,4 +74,8 @@ def fov2focal(fov, pixels): return pixels / (2 * math.tan(fov / 2)) def focal2fov(focal, pixels): - return 2*math.atan(pixels/(2*focal)) \ No newline at end of file + ''' + focal: fx 或 fy + pixels: 宽度或高度,单位为像素 + ''' + return 2 * math.atan(pixels / (2 * focal)) \ No newline at end of file