mirror of
https://github.com/graphdeco-inria/gaussian-splatting
synced 2025-06-08 15:46:12 +00:00
add comments
This commit is contained in:
parent
8b998dfba6
commit
1a35943d4c
@ -63,7 +63,7 @@ class ModelParams(ParamGroup):
|
||||
从args对象中提取出与 ModelParams类中定义的参数相匹配的值,并将它们封装到一个新的 GroupParams 对象中
|
||||
args: 存储着 命令行和main中预设的参数
|
||||
'''
|
||||
g = super().extract(args) # 返回的GroupParams对象
|
||||
g = super().extract(args) # 替换相匹配的值,并返回一个GroupParams对象
|
||||
g.source_path = os.path.abspath(g.source_path) # 更新为绝对路径
|
||||
return g
|
||||
|
||||
|
@ -49,7 +49,7 @@ def evaluate(model_paths):
|
||||
full_dict_polytopeonly[scene_dir] = {}
|
||||
per_view_dict_polytopeonly[scene_dir] = {}
|
||||
|
||||
test_dir = Path(scene_dir) / "test"
|
||||
test_dir = Path(scene_dir) / "train"
|
||||
|
||||
for method in os.listdir(test_dir):
|
||||
print("Method:", method)
|
||||
|
593
read_write_binary.py
Normal file
593
read_write_binary.py
Normal file
@ -0,0 +1,593 @@
|
||||
import os
|
||||
import collections
|
||||
import numpy as np
|
||||
import struct
|
||||
import argparse
|
||||
|
||||
|
||||
CameraModel = collections.namedtuple(
|
||||
"CameraModel", ["model_id", "model_name", "num_params"]
|
||||
)
|
||||
Camera = collections.namedtuple(
|
||||
"Camera", ["id", "model", "width", "height", "params"]
|
||||
)
|
||||
BaseImage = collections.namedtuple(
|
||||
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]
|
||||
)
|
||||
Point3D = collections.namedtuple(
|
||||
"Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]
|
||||
)
|
||||
|
||||
|
||||
class Image(BaseImage):
|
||||
def qvec2rotmat(self):
|
||||
return qvec2rotmat(self.qvec)
|
||||
|
||||
|
||||
CAMERA_MODELS = {
|
||||
CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
|
||||
CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
|
||||
CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
|
||||
CameraModel(model_id=3, model_name="RADIAL", num_params=5),
|
||||
CameraModel(model_id=4, model_name="OPENCV", num_params=8),
|
||||
CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
|
||||
CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
|
||||
CameraModel(model_id=7, model_name="FOV", num_params=5),
|
||||
CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
|
||||
CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
|
||||
CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12),
|
||||
}
|
||||
CAMERA_MODEL_IDS = dict(
|
||||
[(camera_model.model_id, camera_model) for camera_model in CAMERA_MODELS]
|
||||
)
|
||||
CAMERA_MODEL_NAMES = dict(
|
||||
[(camera_model.model_name, camera_model) for camera_model in CAMERA_MODELS]
|
||||
)
|
||||
|
||||
|
||||
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
|
||||
"""Read and unpack the next bytes from a binary file.
|
||||
:param fid:
|
||||
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
|
||||
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
|
||||
:param endian_character: Any of {@, =, <, >, !}
|
||||
:return: Tuple of read and unpacked values.
|
||||
"""
|
||||
data = fid.read(num_bytes)
|
||||
return struct.unpack(endian_character + format_char_sequence, data)
|
||||
|
||||
|
||||
def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
|
||||
"""pack and write to a binary file.
|
||||
:param fid:
|
||||
:param data: data to send, if multiple elements are sent at the same time,
|
||||
they should be encapsuled either in a list or a tuple
|
||||
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
|
||||
should be the same length as the data list or tuple
|
||||
:param endian_character: Any of {@, =, <, >, !}
|
||||
"""
|
||||
if isinstance(data, (list, tuple)):
|
||||
bytes = struct.pack(endian_character + format_char_sequence, *data)
|
||||
else:
|
||||
bytes = struct.pack(endian_character + format_char_sequence, data)
|
||||
fid.write(bytes)
|
||||
|
||||
|
||||
def read_cameras_text(path):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasText(const std::string& path)
|
||||
void Reconstruction::ReadCamerasText(const std::string& path)
|
||||
"""
|
||||
cameras = {}
|
||||
with open(path, "r") as fid:
|
||||
while True:
|
||||
line = fid.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if len(line) > 0 and line[0] != "#":
|
||||
elems = line.split()
|
||||
camera_id = int(elems[0])
|
||||
model = elems[1]
|
||||
width = int(elems[2])
|
||||
height = int(elems[3])
|
||||
params = np.array(tuple(map(float, elems[4:])))
|
||||
cameras[camera_id] = Camera(
|
||||
id=camera_id,
|
||||
model=model,
|
||||
width=width,
|
||||
height=height,
|
||||
params=params,
|
||||
)
|
||||
return cameras
|
||||
|
||||
|
||||
def read_cameras_binary(path_to_model_file):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasBinary(const std::string& path)
|
||||
void Reconstruction::ReadCamerasBinary(const std::string& path)
|
||||
"""
|
||||
cameras = {}
|
||||
with open(path_to_model_file, "rb") as fid:
|
||||
num_cameras = read_next_bytes(fid, 8, "Q")[0]
|
||||
for _ in range(num_cameras):
|
||||
camera_properties = read_next_bytes(
|
||||
fid, num_bytes=24, format_char_sequence="iiQQ"
|
||||
)
|
||||
camera_id = camera_properties[0]
|
||||
model_id = camera_properties[1]
|
||||
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
|
||||
width = camera_properties[2]
|
||||
height = camera_properties[3]
|
||||
num_params = CAMERA_MODEL_IDS[model_id].num_params
|
||||
params = read_next_bytes(
|
||||
fid,
|
||||
num_bytes=8 * num_params,
|
||||
format_char_sequence="d" * num_params,
|
||||
)
|
||||
cameras[camera_id] = Camera(
|
||||
id=camera_id,
|
||||
model=model_name,
|
||||
width=width,
|
||||
height=height,
|
||||
params=np.array(params),
|
||||
)
|
||||
assert len(cameras) == num_cameras
|
||||
return cameras
|
||||
|
||||
|
||||
def write_cameras_text(cameras, path):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasText(const std::string& path)
|
||||
void Reconstruction::ReadCamerasText(const std::string& path)
|
||||
"""
|
||||
HEADER = (
|
||||
"# Camera list with one line of data per camera:\n"
|
||||
+ "# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n"
|
||||
+ "# Number of cameras: {}\n".format(len(cameras))
|
||||
)
|
||||
with open(path, "w") as fid:
|
||||
fid.write(HEADER)
|
||||
for _, cam in cameras.items():
|
||||
to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
|
||||
line = " ".join([str(elem) for elem in to_write])
|
||||
fid.write(line + "\n")
|
||||
|
||||
|
||||
def write_cameras_binary(cameras, path_to_model_file):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::WriteCamerasBinary(const std::string& path)
|
||||
void Reconstruction::ReadCamerasBinary(const std::string& path)
|
||||
"""
|
||||
with open(path_to_model_file, "wb") as fid:
|
||||
write_next_bytes(fid, len(cameras), "Q")
|
||||
for _, cam in cameras.items():
|
||||
model_id = CAMERA_MODEL_NAMES[cam.model].model_id
|
||||
camera_properties = [cam.id, model_id, cam.width, cam.height]
|
||||
write_next_bytes(fid, camera_properties, "iiQQ")
|
||||
for p in cam.params:
|
||||
write_next_bytes(fid, float(p), "d")
|
||||
return cameras
|
||||
|
||||
|
||||
def read_images_text(path):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadImagesText(const std::string& path)
|
||||
void Reconstruction::WriteImagesText(const std::string& path)
|
||||
"""
|
||||
images = {}
|
||||
with open(path, "r") as fid:
|
||||
while True:
|
||||
line = fid.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if len(line) > 0 and line[0] != "#":
|
||||
elems = line.split()
|
||||
image_id = int(elems[0])
|
||||
qvec = np.array(tuple(map(float, elems[1:5])))
|
||||
tvec = np.array(tuple(map(float, elems[5:8])))
|
||||
camera_id = int(elems[8])
|
||||
image_name = elems[9]
|
||||
elems = fid.readline().split()
|
||||
xys = np.column_stack(
|
||||
[
|
||||
tuple(map(float, elems[0::3])),
|
||||
tuple(map(float, elems[1::3])),
|
||||
]
|
||||
)
|
||||
point3D_ids = np.array(tuple(map(int, elems[2::3])))
|
||||
images[image_id] = Image(
|
||||
id=image_id,
|
||||
qvec=qvec,
|
||||
tvec=tvec,
|
||||
camera_id=camera_id,
|
||||
name=image_name,
|
||||
xys=xys,
|
||||
point3D_ids=point3D_ids,
|
||||
)
|
||||
return images
|
||||
|
||||
|
||||
def read_images_binary(path_to_model_file):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadImagesBinary(const std::string& path)
|
||||
void Reconstruction::WriteImagesBinary(const std::string& path)
|
||||
"""
|
||||
images = {}
|
||||
with open(path_to_model_file, "rb") as fid:
|
||||
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
|
||||
for _ in range(num_reg_images):
|
||||
binary_image_properties = read_next_bytes(
|
||||
fid, num_bytes=64, format_char_sequence="idddddddi"
|
||||
)
|
||||
image_id = binary_image_properties[0]
|
||||
qvec = np.array(binary_image_properties[1:5])
|
||||
tvec = np.array(binary_image_properties[5:8])
|
||||
camera_id = binary_image_properties[8]
|
||||
image_name = ""
|
||||
current_char = read_next_bytes(fid, 1, "c")[0]
|
||||
while current_char != b"\x00": # look for the ASCII 0 entry
|
||||
image_name += current_char.decode("utf-8")
|
||||
current_char = read_next_bytes(fid, 1, "c")[0]
|
||||
num_points2D = read_next_bytes(
|
||||
fid, num_bytes=8, format_char_sequence="Q"
|
||||
)[0]
|
||||
x_y_id_s = read_next_bytes(
|
||||
fid,
|
||||
num_bytes=24 * num_points2D,
|
||||
format_char_sequence="ddq" * num_points2D,
|
||||
)
|
||||
xys = np.column_stack(
|
||||
[
|
||||
tuple(map(float, x_y_id_s[0::3])),
|
||||
tuple(map(float, x_y_id_s[1::3])),
|
||||
]
|
||||
)
|
||||
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
|
||||
images[image_id] = Image(
|
||||
id=image_id,
|
||||
qvec=qvec,
|
||||
tvec=tvec,
|
||||
camera_id=camera_id,
|
||||
name=image_name,
|
||||
xys=xys,
|
||||
point3D_ids=point3D_ids,
|
||||
)
|
||||
return images
|
||||
|
||||
|
||||
def write_images_text(images, path):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadImagesText(const std::string& path)
|
||||
void Reconstruction::WriteImagesText(const std::string& path)
|
||||
"""
|
||||
if len(images) == 0:
|
||||
mean_observations = 0
|
||||
else:
|
||||
mean_observations = sum(
|
||||
(len(img.point3D_ids) for _, img in images.items())
|
||||
) / len(images)
|
||||
HEADER = (
|
||||
"# Image list with two lines of data per image:\n"
|
||||
+ "# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n"
|
||||
+ "# POINTS2D[] as (X, Y, POINT3D_ID)\n"
|
||||
+ "# Number of images: {}, mean observations per image: {}\n".format(
|
||||
len(images), mean_observations
|
||||
)
|
||||
)
|
||||
|
||||
with open(path, "w") as fid:
|
||||
fid.write(HEADER)
|
||||
for _, img in images.items():
|
||||
image_header = [
|
||||
img.id,
|
||||
*img.qvec,
|
||||
*img.tvec,
|
||||
img.camera_id,
|
||||
img.name,
|
||||
]
|
||||
first_line = " ".join(map(str, image_header))
|
||||
fid.write(first_line + "\n")
|
||||
|
||||
points_strings = []
|
||||
for xy, point3D_id in zip(img.xys, img.point3D_ids):
|
||||
points_strings.append(" ".join(map(str, [*xy, point3D_id])))
|
||||
fid.write(" ".join(points_strings) + "\n")
|
||||
|
||||
|
||||
def write_images_binary(images, path_to_model_file):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadImagesBinary(const std::string& path)
|
||||
void Reconstruction::WriteImagesBinary(const std::string& path)
|
||||
"""
|
||||
with open(path_to_model_file, "wb") as fid:
|
||||
write_next_bytes(fid, len(images), "Q")
|
||||
for _, img in images.items():
|
||||
write_next_bytes(fid, img.id, "i")
|
||||
write_next_bytes(fid, img.qvec.tolist(), "dddd")
|
||||
write_next_bytes(fid, img.tvec.tolist(), "ddd")
|
||||
write_next_bytes(fid, img.camera_id, "i")
|
||||
for char in img.name:
|
||||
write_next_bytes(fid, char.encode("utf-8"), "c")
|
||||
write_next_bytes(fid, b"\x00", "c")
|
||||
write_next_bytes(fid, len(img.point3D_ids), "Q")
|
||||
for xy, p3d_id in zip(img.xys, img.point3D_ids):
|
||||
write_next_bytes(fid, [*xy, p3d_id], "ddq")
|
||||
|
||||
|
||||
def read_points3D_text(path):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DText(const std::string& path)
|
||||
void Reconstruction::WritePoints3DText(const std::string& path)
|
||||
"""
|
||||
points3D = {}
|
||||
with open(path, "r") as fid:
|
||||
while True:
|
||||
line = fid.readline()
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if len(line) > 0 and line[0] != "#":
|
||||
elems = line.split()
|
||||
point3D_id = int(elems[0])
|
||||
xyz = np.array(tuple(map(float, elems[1:4])))
|
||||
rgb = np.array(tuple(map(int, elems[4:7])))
|
||||
error = float(elems[7])
|
||||
image_ids = np.array(tuple(map(int, elems[8::2])))
|
||||
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
|
||||
points3D[point3D_id] = Point3D(
|
||||
id=point3D_id,
|
||||
xyz=xyz,
|
||||
rgb=rgb,
|
||||
error=error,
|
||||
image_ids=image_ids,
|
||||
point2D_idxs=point2D_idxs,
|
||||
)
|
||||
return points3D
|
||||
|
||||
|
||||
def read_points3D_binary(path_to_model_file):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
||||
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
||||
"""
|
||||
points3D = {}
|
||||
with open(path_to_model_file, "rb") as fid:
|
||||
num_points = read_next_bytes(fid, 8, "Q")[0]
|
||||
for _ in range(num_points):
|
||||
binary_point_line_properties = read_next_bytes(
|
||||
fid, num_bytes=43, format_char_sequence="QdddBBBd"
|
||||
)
|
||||
point3D_id = binary_point_line_properties[0]
|
||||
xyz = np.array(binary_point_line_properties[1:4])
|
||||
rgb = np.array(binary_point_line_properties[4:7])
|
||||
error = np.array(binary_point_line_properties[7])
|
||||
track_length = read_next_bytes(
|
||||
fid, num_bytes=8, format_char_sequence="Q"
|
||||
)[0]
|
||||
track_elems = read_next_bytes(
|
||||
fid,
|
||||
num_bytes=8 * track_length,
|
||||
format_char_sequence="ii" * track_length,
|
||||
)
|
||||
image_ids = np.array(tuple(map(int, track_elems[0::2])))
|
||||
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
|
||||
points3D[point3D_id] = Point3D(
|
||||
id=point3D_id,
|
||||
xyz=xyz,
|
||||
rgb=rgb,
|
||||
error=error,
|
||||
image_ids=image_ids,
|
||||
point2D_idxs=point2D_idxs,
|
||||
)
|
||||
return points3D
|
||||
|
||||
|
||||
def write_points3D_text(points3D, path):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DText(const std::string& path)
|
||||
void Reconstruction::WritePoints3DText(const std::string& path)
|
||||
"""
|
||||
if len(points3D) == 0:
|
||||
mean_track_length = 0
|
||||
else:
|
||||
mean_track_length = sum(
|
||||
(len(pt.image_ids) for _, pt in points3D.items())
|
||||
) / len(points3D)
|
||||
HEADER = (
|
||||
"# 3D point list with one line of data per point:\n"
|
||||
+ "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n"
|
||||
+ "# Number of points: {}, mean track length: {}\n".format(
|
||||
len(points3D), mean_track_length
|
||||
)
|
||||
)
|
||||
|
||||
with open(path, "w") as fid:
|
||||
fid.write(HEADER)
|
||||
for _, pt in points3D.items():
|
||||
point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
|
||||
fid.write(" ".join(map(str, point_header)) + " ")
|
||||
track_strings = []
|
||||
for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
|
||||
track_strings.append(" ".join(map(str, [image_id, point2D])))
|
||||
fid.write(" ".join(track_strings) + "\n")
|
||||
|
||||
|
||||
def write_points3D_binary(points3D, path_to_model_file):
|
||||
"""
|
||||
see: src/colmap/scene/reconstruction.cc
|
||||
void Reconstruction::ReadPoints3DBinary(const std::string& path)
|
||||
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
||||
"""
|
||||
with open(path_to_model_file, "wb") as fid:
|
||||
write_next_bytes(fid, len(points3D), "Q")
|
||||
for _, pt in points3D.items():
|
||||
write_next_bytes(fid, pt.id, "Q")
|
||||
write_next_bytes(fid, pt.xyz.tolist(), "ddd")
|
||||
write_next_bytes(fid, pt.rgb.tolist(), "BBB")
|
||||
write_next_bytes(fid, pt.error, "d")
|
||||
track_length = pt.image_ids.shape[0]
|
||||
write_next_bytes(fid, track_length, "Q")
|
||||
for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
|
||||
write_next_bytes(fid, [image_id, point2D_id], "ii")
|
||||
|
||||
|
||||
def detect_model_format(path, ext):
|
||||
if (
|
||||
os.path.isfile(os.path.join(path, "cameras" + ext))
|
||||
and os.path.isfile(os.path.join(path, "images" + ext))
|
||||
and os.path.isfile(os.path.join(path, "points3D" + ext))
|
||||
):
|
||||
print("Detected model format: '" + ext + "'")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def read_model(path, ext=""):
|
||||
# try to detect the extension automatically
|
||||
if ext == "":
|
||||
if detect_model_format(path, ".bin"):
|
||||
ext = ".bin"
|
||||
elif detect_model_format(path, ".txt"):
|
||||
ext = ".txt"
|
||||
else:
|
||||
print("Provide model format: '.bin' or '.txt'")
|
||||
return
|
||||
|
||||
if ext == ".txt":
|
||||
cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
|
||||
images = read_images_text(os.path.join(path, "images" + ext))
|
||||
points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
|
||||
else:
|
||||
cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
|
||||
images = read_images_binary(os.path.join(path, "images" + ext))
|
||||
points3D = read_points3D_binary(os.path.join(path, "points3D") + ext)
|
||||
return cameras, images, points3D
|
||||
|
||||
|
||||
def write_model(cameras, images, points3D, path, ext=".bin"):
|
||||
if ext == ".txt":
|
||||
write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
|
||||
write_images_text(images, os.path.join(path, "images" + ext))
|
||||
write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
|
||||
else:
|
||||
write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
|
||||
write_images_binary(images, os.path.join(path, "images" + ext))
|
||||
write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
|
||||
return cameras, images, points3D
|
||||
|
||||
|
||||
def qvec2rotmat(qvec):
|
||||
return np.array(
|
||||
[
|
||||
[
|
||||
1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
|
||||
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
|
||||
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
|
||||
],
|
||||
[
|
||||
2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
|
||||
1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
|
||||
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
|
||||
],
|
||||
[
|
||||
2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
|
||||
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
|
||||
1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
|
||||
],
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def rotmat2qvec(R):
|
||||
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
|
||||
K = (
|
||||
np.array(
|
||||
[
|
||||
[Rxx - Ryy - Rzz, 0, 0, 0],
|
||||
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
|
||||
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
|
||||
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz],
|
||||
]
|
||||
)
|
||||
/ 3.0
|
||||
)
|
||||
eigvals, eigvecs = np.linalg.eigh(K)
|
||||
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
|
||||
if qvec[0] < 0:
|
||||
qvec *= -1
|
||||
return qvec
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Read and write COLMAP binary and text models"
|
||||
)
|
||||
parser.add_argument("--input_model", help="path to input model folder")
|
||||
parser.add_argument(
|
||||
"--input_format",
|
||||
choices=[".bin", ".txt"],
|
||||
help="input model format",
|
||||
default="",
|
||||
)
|
||||
parser.add_argument("--output_model", help="path to output model folder")
|
||||
parser.add_argument(
|
||||
"--output_format",
|
||||
choices=[".bin", ".txt"],
|
||||
help="outut model format",
|
||||
default=".txt",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
cameras, images, points3D = read_model(
|
||||
path=args.input_model, ext=args.input_format
|
||||
)
|
||||
|
||||
print("num_cameras:", len(cameras))
|
||||
print("num_images:", len(images))
|
||||
print("num_points3D:", len(points3D))
|
||||
|
||||
if args.output_model is not None:
|
||||
write_model(
|
||||
cameras,
|
||||
images,
|
||||
points3D,
|
||||
path=args.output_model,
|
||||
ext=args.output_format,
|
||||
)
|
||||
|
||||
def images_points_scale(root, scale_factor):
|
||||
images_path = os.path.join(root, 'images.bin')
|
||||
new_images_path = os.path.join(root, 'images_scaled.bin')
|
||||
points3D_path = os.path.join(root, 'points3D.bin')
|
||||
new_points3D_path = os.path.join(root, 'points3D_scaled.bin')
|
||||
|
||||
images = read_images_binary(images_path)
|
||||
points = read_points3D_binary(points3D_path)
|
||||
for k, v in images.items():
|
||||
new_image =v._replace(tvec=v.tvec*scale_factor)
|
||||
images[k] = new_image
|
||||
write_images_binary(images, new_images_path)
|
||||
|
||||
for k, v in points.items():
|
||||
new_point =v._replace(xyz=v.xyz*scale_factor)
|
||||
points[k] = new_point
|
||||
write_points3D_binary(points, new_points3D_path)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# root = "/home/ubuntu/Downloads/outputs/dense/sparse"
|
||||
# images_points_scale(root, 1000)
|
101
render_reconeval.py
Normal file
101
render_reconeval.py
Normal file
@ -0,0 +1,101 @@
|
||||
#
|
||||
# Copyright (C) 2023, Inria
|
||||
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
||||
# All rights reserved.
|
||||
#
|
||||
# This software is free for non-commercial, research and evaluation use
|
||||
# under the terms of the LICENSE.md file.
|
||||
#
|
||||
# For inquiries contact george.drettakis@inria.fr
|
||||
#
|
||||
import numpy as np
|
||||
import torch
|
||||
from scene import Scene
|
||||
import os
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
from os import makedirs
|
||||
from gaussian_renderer import render
|
||||
import torchvision
|
||||
from utils.general_utils import safe_state
|
||||
from argparse import ArgumentParser
|
||||
from arguments import ModelParams, PipelineParams, get_combined_args
|
||||
from gaussian_renderer import GaussianModel
|
||||
import read_write_binary as im
|
||||
|
||||
def render_set(model_path, name, iteration, views, gaussians, pipeline, background):
|
||||
render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders")
|
||||
gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt")
|
||||
|
||||
makedirs(render_path, exist_ok=True)
|
||||
makedirs(gts_path, exist_ok=True)
|
||||
|
||||
for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
|
||||
rendering = render(view, gaussians, pipeline, background)["render"]
|
||||
gt = view.original_image[0:3, :, :]
|
||||
# image = view.image_name
|
||||
torchvision.utils.save_image(rendering, os.path.join(render_path, '{}.png'.format(view.image_name)))
|
||||
torchvision.utils.save_image(gt, os.path.join(gts_path, '{}.png'.format(view.image_name)))
|
||||
|
||||
def render_sets(dataset : ModelParams, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool):
|
||||
with torch.no_grad():
|
||||
gaussians = GaussianModel(dataset.sh_degree)
|
||||
scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False,override_quantization=True)
|
||||
|
||||
bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
|
||||
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
|
||||
|
||||
if not skip_train:
|
||||
render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background)
|
||||
|
||||
if not skip_test:
|
||||
render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Set up command line argument parser
|
||||
parser = ArgumentParser(description="Testing script parameters")
|
||||
model = ModelParams(parser, sentinel=True)
|
||||
pipeline = PipelineParams(parser)
|
||||
parser.add_argument("--iteration", default=-1, type=int)
|
||||
parser.add_argument("--skip_train", action="store_true")
|
||||
parser.add_argument("--skip_test", action="store_true")
|
||||
parser.add_argument("--quiet", action="store_true")
|
||||
args = get_combined_args(parser)
|
||||
|
||||
# Initialize system state (RNG)
|
||||
safe_state(args.quiet)
|
||||
|
||||
if os.path.exists(args.source_path):
|
||||
shutil.rmtree(args.source_path)
|
||||
os.mkdir(args.source_path)
|
||||
else:
|
||||
os.mkdir(args.source_path)
|
||||
|
||||
shutil.copytree(os.path.join(args.source_path, "../sparse"), os.path.join(args.source_path, "sparse"))
|
||||
shutil.copytree(os.path.join(args.source_path, "../images"), os.path.join(args.source_path, "images"))
|
||||
shutil.copy(os.path.join(args.source_path, "../test_aligned_pose.txt"),
|
||||
os.path.join(args.source_path, "test_aligned_pose.txt"))
|
||||
data = im.read_images_binary(os.path.join(args.source_path, "sparse", "images.bin"))
|
||||
image = data[1]
|
||||
new_data = {}
|
||||
with open(os.path.join(args.source_path, "test_aligned_pose.txt"), "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
n, tx, ty, tz, qx, qy, qz, qw = line.split(" ")
|
||||
name = "{}.png".format(n)
|
||||
if not os.path.exists(os.path.join(args.source_path, "images", name)):
|
||||
images = [i for i in os.listdir(os.path.join(args.source_path, "images")) if ".png" in i]
|
||||
shutil.copy(os.path.join(os.path.join(args.source_path, "images", images[0])),
|
||||
os.path.join(os.path.join(args.source_path, "images", name)))
|
||||
i = int(n)
|
||||
qvec = [float(i) for i in [qw, qx, qy, qz]]
|
||||
tvec = [float(i) for i in [tx, ty, tz]]
|
||||
#image = data[1]
|
||||
image = image._replace(id=i, qvec=np.array(qvec), tvec=np.array(tvec), name=name)
|
||||
#data[1 + i] = image
|
||||
new_data[i] = image
|
||||
print(len(new_data))
|
||||
im.write_images_binary(new_data, os.path.join(args.source_path, "sparse/0", "images.bin"))
|
||||
|
||||
render_sets(model.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test)
|
@ -1,11 +1,46 @@
|
||||
# python train.py --source_path ../../Dataset/3DGS_Dataset/linggongtang --model_path output/linggongtang --data_device 'cpu' --eval --resolution 1
|
||||
# scene: {'kejiguan': 'cuda', 'wanfota': 'cuda', 'zhiwu': 'cuda', 'linggongtang': 'cpu', 'xiangjiadang': 'cpu', 'sipingguzhai': 'cpu'}
|
||||
# scene: {'kejiguan': 'cuda', 'wanfota': 'cuda', 'zhiwu': 'cuda', 'linggongtang': 'cuda', 'xiangjiadang': 'cuda', 'town-train-cpy': 'cuda', 'town2-train-cpy': 'cuda', 'sipingguzhai': 'cpu'}
|
||||
# device = cuda: 科技馆、万佛塔、植物
|
||||
# = cpu: 凌公塘、湘家荡、寺平古宅
|
||||
|
||||
import os
|
||||
|
||||
for cuda, scene in enumerate({'linggongtang': 'cpu', 'xiangjiadang': 'cpu', 'sipingguzhai': 'cpu'}.items()):
|
||||
# for idx, scene in enumerate({'town-train': 'cuda', 'town2-train': 'cuda', 'building1-train': 'cuda'}.items()):
|
||||
# print('---------------------------------------------------------------------------------')
|
||||
# one_cmd = f'python train.py --source_path /data2/lpl/data/carla-dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --checkpoint_iterations 30000'
|
||||
# print(one_cmd)
|
||||
# os.system(one_cmd)
|
||||
#
|
||||
# # python render.py -m <path to trained model>
|
||||
# for idx, scene in enumerate(['town-train-cpy', 'town2-train-cpy', 'building1-train']):
|
||||
# print('---------------------------------------------------------------------------------')
|
||||
# one_cmd = f'python render.py -m output/{scene}'
|
||||
# print(one_cmd)
|
||||
# os.system(one_cmd)
|
||||
#
|
||||
# # python metrics.py -m <path to trained model>
|
||||
# for idx, scene in enumerate(['town-train-cpy', 'town2-train-cpy', 'building1-train']):
|
||||
# print('---------------------------------------------------------------------------------')
|
||||
# one_cmd = f'python metrics.py -m output/{scene}'
|
||||
# print(one_cmd)
|
||||
# os.system(one_cmd)
|
||||
|
||||
for idx, scene in enumerate({'building2-train': 'cpu', 'building3-train': 'cuda'}.items()):
|
||||
print('---------------------------------------------------------------------------------')
|
||||
one_cmd = f'python train.py --source_path ../../Dataset/3DGS_Dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --eval'
|
||||
one_cmd = f'python train.py --source_path /data2/lpl/data/carla-dataset/{scene[0]} --model_path output/{scene[0]} --data_device "{scene[1]}" --resolution 1 --checkpoint_iterations 30000 --port 6009'
|
||||
print(one_cmd)
|
||||
os.system(one_cmd)
|
||||
|
||||
# python render.py -m <path to trained model>
|
||||
for idx, scene in enumerate(['building2-train', 'building3-train']):
|
||||
print('---------------------------------------------------------------------------------')
|
||||
one_cmd = f'python render.py -m output/{scene}'
|
||||
print(one_cmd)
|
||||
os.system(one_cmd)
|
||||
|
||||
# python metrics.py -m <path to trained model>
|
||||
for idx, scene in enumerate(['building2-train', 'building3-train']):
|
||||
print('---------------------------------------------------------------------------------')
|
||||
one_cmd = f'python metrics.py -m output/{scene}'
|
||||
print(one_cmd)
|
||||
os.system(one_cmd)
|
@ -26,32 +26,33 @@ class Scene:
|
||||
|
||||
def __init__(self, args : ModelParams, gaussians : GaussianModel, load_iteration=None, shuffle=True, resolution_scales=[1.0]):
|
||||
"""
|
||||
初始化场景对象
|
||||
:param args: 包含模型路径和源路径等模型参数
|
||||
:param gaussians: 高斯模型对象,用于场景点的3D表示
|
||||
:param load_iteration: 指定加载模型的迭代次数,如果不为None且为-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,且不为-1,则加载指定迭代次数的
|
||||
:param shuffle: 是否在训练前打乱相机列表
|
||||
:param resolution_scales: 分辨率比例列表,用于处理不同分辨率的相机
|
||||
初始化3D场景对象
|
||||
|
||||
args: 存储着与 GaussianMoedl 相关参数 的args,即包含scene/__init__.py/ModelParams()中的参数
|
||||
gaussians: 3D高斯模型对象,用于场景点的3D表示
|
||||
|
||||
load_iteration: 指定加载模型的迭代次数,如果是-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型;如果不是None且不是-1,则加载指定迭代次数的
|
||||
shuffle: 是否在训练前打乱相机列表
|
||||
resolution_scales: 分辨率比例列表,用于处理不同分辨率的相机
|
||||
"""
|
||||
self.model_path = args.model_path # 模型文件保存路径
|
||||
self.loaded_iter = None # 已加载的迭代次数
|
||||
self.gaussians = gaussians # 高斯模型对象
|
||||
|
||||
# 检查并加载已有的训练模型
|
||||
# 如果已有训练模型,则加载
|
||||
if load_iteration:
|
||||
# 不为None
|
||||
if load_iteration == -1:
|
||||
# 且为-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,记录最大迭代次数
|
||||
# 是-1,则在输出文件夹下的point_cloud/文件夹下搜索迭代次数最大的模型,记录最大迭代次数
|
||||
self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud"))
|
||||
else:
|
||||
# 不为-1,则加载指定迭代次数的
|
||||
# 不是None且不是-1,则加载指定迭代次数的
|
||||
self.loaded_iter = load_iteration
|
||||
print("Loading trained model at iteration {}".format(self.loaded_iter))
|
||||
|
||||
self.train_cameras = {} # 用于训练的相机参数
|
||||
self.test_cameras = {} # 用于测试的相机参数
|
||||
self.train_cameras = {} # 用于训练的相机
|
||||
self.test_cameras = {} # 用于测试的相机
|
||||
|
||||
# 判断数据集类型是COLMAP的输出,还是Blender得输出,并从中加载场景信息
|
||||
# 从COLMAP或Blender的输出结果中构建 场景信息(包括点云、训练用相机、测试用相机、场景归一化参数和点云文件路径)
|
||||
if os.path.exists(os.path.join(args.source_path, "sparse")):
|
||||
scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval)
|
||||
elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")):
|
||||
@ -60,28 +61,33 @@ class Scene:
|
||||
else:
|
||||
assert False, "Could not recognize scene type!"
|
||||
|
||||
# loaded_iter = None,模型还未训练过,
|
||||
if not self.loaded_iter:
|
||||
# 如果没有加载模型,则将点云文件point3D.ply文件复制到input.ply文件
|
||||
with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file:
|
||||
dest_file.write(src_file.read())
|
||||
|
||||
json_cams = []
|
||||
camlist = []
|
||||
if scene_info.test_cameras:
|
||||
# 测试相机添加到 camlist 中
|
||||
camlist.extend(scene_info.test_cameras)
|
||||
if scene_info.train_cameras:
|
||||
# 训练相机添加到 camlist 中
|
||||
camlist.extend(scene_info.train_cameras)
|
||||
# 遍历 camlist 中的所有相机,使用 camera_to_JSON 函数将每个相机转换为 JSON 格式,并添加到 json_cams 列表中,并将 json_cams 写入 cameras.json 文件中
|
||||
for id, cam in enumerate(camlist):
|
||||
json_cams.append(camera_to_JSON(id, cam))
|
||||
with open(os.path.join(self.model_path, "cameras.json"), 'w') as file:
|
||||
json.dump(json_cams, file)
|
||||
|
||||
if shuffle:
|
||||
# 随机打乱训练和测试相机列表
|
||||
random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling
|
||||
random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling
|
||||
|
||||
self.cameras_extent = scene_info.nerf_normalization["radius"]
|
||||
|
||||
# 根据resolution_scales加载不同分辨率的训练和测试位姿
|
||||
# 根据resolution_scales加载不同分辨率的训练和测试相机(包含R、T、视场角)
|
||||
for resolution_scale in resolution_scales:
|
||||
print("Loading Training Cameras")
|
||||
self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args)
|
||||
@ -89,13 +95,13 @@ class Scene:
|
||||
self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args)
|
||||
|
||||
if self.loaded_iter:
|
||||
# 直接读取对应(已经迭代出来的)场景
|
||||
# 如果加载已训练模型,则直接读取对应(已经迭代出来的)场景
|
||||
self.gaussians.load_ply(os.path.join(self.model_path,
|
||||
"point_cloud",
|
||||
"iteration_" + str(self.loaded_iter),
|
||||
"point_cloud.ply"))
|
||||
else:
|
||||
# loaded_iter = None,模型还未训练过,调用GaussianModel.create_from_pcd从scene_info.point_cloud中建立模型
|
||||
# 不加载训练模型,则调用 GaussianModel.create_from_pcd 从稀疏点云 scene_info.point_cloud 中建立模型
|
||||
self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent)
|
||||
|
||||
def save(self, iteration):
|
||||
|
@ -41,6 +41,9 @@ CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
|
||||
|
||||
|
||||
def qvec2rotmat(qvec):
|
||||
'''
|
||||
四元数qvec=[w, x, y, z] 转 旋转矩阵
|
||||
'''
|
||||
return np.array([
|
||||
[1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
|
||||
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
|
||||
@ -129,7 +132,6 @@ def read_points3D_binary(path_to_model_file):
|
||||
void Reconstruction::WritePoints3DBinary(const std::string& path)
|
||||
"""
|
||||
|
||||
|
||||
with open(path_to_model_file, "rb") as fid:
|
||||
num_points = read_next_bytes(fid, 8, "Q")[0]
|
||||
|
||||
|
@ -66,28 +66,34 @@ def getNerfppNorm(cam_info):
|
||||
return {"translate": translate, "radius": radius}
|
||||
|
||||
def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
|
||||
cam_infos = [] # 初始化用于存储相机信息的列表
|
||||
'''
|
||||
|
||||
cam_extrinsics: 存储每张图片相机的外参类Imgae 的字典
|
||||
cam_intrinsics: 存储每张图片相机的内参类Camera 的字典
|
||||
images_folder: 保存原图的文件夹路径
|
||||
'''
|
||||
# 初始化存储相机信息类CameraInfo对象的列表
|
||||
cam_infos = []
|
||||
|
||||
# 遍历所有相机的外参
|
||||
for idx, key in enumerate(cam_extrinsics):
|
||||
# 动态显示读取相机信息的进度
|
||||
sys.stdout.write('\r')
|
||||
# the exact output you're looking for:
|
||||
sys.stdout.write('\r') # 光标回到当前行的最前面
|
||||
sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics)))
|
||||
sys.stdout.flush()
|
||||
sys.stdout.flush() # 立即将缓冲区中的内容输出到控制台
|
||||
|
||||
# 获取当前相机的外参和内参
|
||||
extr = cam_extrinsics[key] # 当前相机的外参
|
||||
intr = cam_intrinsics[extr.camera_id] # 根据外参中的camera_id找到对应的内参
|
||||
height = intr.height # 相机图片的高度
|
||||
width = intr.width # 相机图片的宽度
|
||||
extr = cam_extrinsics[key] # 当前相机的外参类Imgae对象
|
||||
intr = cam_intrinsics[extr.camera_id] # 根据外参中的camera_id找到对应的内参类对象
|
||||
height = intr.height # 图片高度
|
||||
width = intr.width # 图片宽度
|
||||
|
||||
uid = intr.id # 相机的唯一标识符
|
||||
|
||||
R = np.transpose(qvec2rotmat(extr.qvec)) # 将四元数表示的旋转转换为旋转矩阵R
|
||||
T = np.array(extr.tvec) # 外参中的平移向量
|
||||
R = np.transpose(qvec2rotmat(extr.qvec)) # 将旋转四元数 转为 旋转矩阵 R,并转置
|
||||
T = np.array(extr.tvec) # 平移向量
|
||||
|
||||
# 根据相机内参模型计算视场角(FoV)
|
||||
# 根据相机内参模型计算 视场角(FoV)
|
||||
if intr.model=="SIMPLE_PINHOLE":
|
||||
# 如果是简单针孔模型,只有一个焦距参数
|
||||
focal_length_x = intr.params[0]
|
||||
@ -97,14 +103,13 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
|
||||
# 如果是针孔模型,有两个焦距参数
|
||||
focal_length_x = intr.params[0]
|
||||
focal_length_y = intr.params[1]
|
||||
FovY = focal2fov(focal_length_y, height) # 使用y方向的焦距计算垂直视场角
|
||||
FovX = focal2fov(focal_length_x, width) # 使用x方向的焦距计算水平视场角
|
||||
FovY = focal2fov(focal_length_y, height) # 使用fy计算垂直视场角
|
||||
FovX = focal2fov(focal_length_x, width) # 使用fx计算水平视场角
|
||||
elif intr.model=="SIMPLE_RADIAL":
|
||||
# 如果是针孔模型,有两个焦距参数
|
||||
focal_length_x = intr.params[0]
|
||||
focal_length_y = intr.params[1]
|
||||
FovY = focal2fov(focal_length_y, height) # 使用y方向的焦距计算垂直视场角
|
||||
FovX = focal2fov(focal_length_x, width) # 使用x方向的焦距计算水平视场角
|
||||
FovY = focal2fov(focal_length_x, height) # 使用fy计算垂直视场角
|
||||
FovX = focal2fov(focal_length_x, width) # 使用fx计算水平视场角
|
||||
else:
|
||||
# 如果不是以上两种模型,抛出错误
|
||||
assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
|
||||
@ -116,6 +121,7 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
|
||||
continue
|
||||
image = Image.open(image_path)
|
||||
|
||||
# 创建相机信息类CameraInfo对象 (包含旋转矩阵、平移向量、视场角、图像数据、图片路径、图片名、宽度、高度),并添加到列表cam_infos中
|
||||
cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
|
||||
image_path=image_path, image_name=image_name, width=width, height=height)
|
||||
cam_infos.append(cam_info)
|
||||
@ -123,7 +129,6 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
|
||||
sys.stdout.write('\n')
|
||||
print("valid Colmap camera size: {}".format(len(cam_infos)))
|
||||
|
||||
# 返回整理好的相机信息列表
|
||||
return cam_infos
|
||||
|
||||
def fetchPly(path):
|
||||
@ -153,30 +158,38 @@ def storePly(path, xyz, rgb):
|
||||
ply_data = PlyData([vertex_element])
|
||||
ply_data.write(path)
|
||||
|
||||
# 尝试读取COLMAP处理结果中的二进制相机外参文件imags.bin 和 内参文件cameras.bin
|
||||
def readColmapSceneInfo(path, images, eval, llffhold=8):
|
||||
'''
|
||||
加载COLMAP的结果中的二进制相机外参文件imags.bin 和 内参文件cameras.bin
|
||||
|
||||
path: GaussianModel中的源文件路径
|
||||
images: 'images'
|
||||
eval: 是否为eval模式
|
||||
llffhold: 默认为8
|
||||
'''
|
||||
|
||||
try:
|
||||
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin")
|
||||
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin")
|
||||
cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file)
|
||||
cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file)
|
||||
except:
|
||||
# 如果二进制文件读取失败,尝试读取文本格式的相机外参和内参文件
|
||||
# 如果bin文件读取失败,尝试读取txt格式的相机外参和内参文件
|
||||
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt")
|
||||
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt")
|
||||
cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file)
|
||||
cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file)
|
||||
|
||||
# 定义存放图片的目录,如果未指定则默认为"images"
|
||||
# 存储原图片的文件夹名,默认为'images',要从中读取图片
|
||||
reading_dir = "images" if images == None else images
|
||||
|
||||
# 读取并处理相机参数,转换为内部使用的格式
|
||||
# 根据每个相机的内、外参,构建CameraInfo类的对象 (包含旋转矩阵、平移向量、视场角、图像数据、图片路径、图片名、宽度、高度),存储cam_infos_unsorted列表中
|
||||
cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir))
|
||||
# 根据图片名称对相机信息进行排序,以保证顺序一致性
|
||||
# 根据图片名称排序,以保证顺序一致性
|
||||
cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : (x.image_path.split('/')[-2], int(x.image_name)))
|
||||
|
||||
# 根据是否为评估模式(eval),将相机分为训练集和测试集
|
||||
# 如果为评估模式,根据llffhold参数(通常用于LLFF数据集)间隔选择测试相机
|
||||
# 如果为评估模式,每llffhold张图片取一张作为测试集
|
||||
if eval:
|
||||
train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0]
|
||||
test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0]
|
||||
@ -188,7 +201,7 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
|
||||
# 计算场景归一化参数,这是为了处理不同尺寸和位置的场景,使模型训练更稳定
|
||||
nerf_normalization = getNerfppNorm(train_cam_infos)
|
||||
|
||||
# 尝试读取点云数据,优先从PLY文件读取,如果不存在,则尝试从BIN或TXT文件转换并保存为PLY格式
|
||||
# 尝试读取COLMAP生成的稀疏点云数据,优先从PLY文件读取,如果不存在,则尝试从BIN或TXT文件转换并保存为PLY格式
|
||||
ply_path = os.path.join(path, "sparse/0/points3D.ply")
|
||||
bin_path = os.path.join(path, "sparse/0/points3D.bin")
|
||||
txt_path = os.path.join(path, "sparse/0/points3D.txt")
|
||||
@ -198,9 +211,12 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
|
||||
xyz, rgb, _ = read_points3D_binary(bin_path) # 从points3D.bin读取COLMAP产生的稀疏点云
|
||||
except:
|
||||
xyz, rgb, _ = read_points3D_text(txt_path)
|
||||
|
||||
storePly(ply_path, xyz, rgb) # 转换成ply文件
|
||||
|
||||
try:
|
||||
pcd = fetchPly(ply_path)
|
||||
pcd = fetchPly(ply_path) # points3D.ply读取COLMAP产生的稀疏点云
|
||||
|
||||
except:
|
||||
pcd = None
|
||||
|
||||
@ -293,4 +309,4 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
|
||||
sceneLoadTypeCallbacks = {
|
||||
"Colmap": readColmapSceneInfo,
|
||||
"Blender" : readNerfSyntheticInfo
|
||||
}
|
||||
}
|
||||
|
@ -25,10 +25,9 @@ class GaussianModel:
|
||||
|
||||
def setup_functions(self):
|
||||
"""
|
||||
定义和初始化一些用于处理3D高斯模型参数的函数
|
||||
定义和初始化处理高斯体模型参数的 激活函数
|
||||
"""
|
||||
|
||||
# 定义构建3D高斯协方差矩阵的函数
|
||||
# 定义 从尺度、旋转构建3D高斯的 协方差矩阵 的函数
|
||||
def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation):
|
||||
L = build_scaling_rotation(scaling_modifier * scaling, rotation) # 从尺度、尺度的缩放、旋转得到L矩阵
|
||||
actual_covariance = L @ L.transpose(1, 2) # 计算实际的协方差矩阵
|
||||
@ -49,29 +48,30 @@ class GaussianModel:
|
||||
|
||||
def __init__(self, sh_degree : int):
|
||||
"""
|
||||
初始化3D高斯模型的参数
|
||||
sh_degree: 球谐函数的最大阶数,用于控制颜色表示的复杂度
|
||||
初始化3D高斯模型的参数
|
||||
sh_degree: 设定的 球谐函数的最大阶数,默认为3,用于控制颜色表示的复杂度
|
||||
"""
|
||||
# 初始化球谐阶数和最大球谐阶数j
|
||||
self.active_sh_degree = 0 # 当前激活的球谐阶数,初始为0
|
||||
self.max_sh_degree = sh_degree # 允许的最大球谐阶数
|
||||
self.max_sh_degree = sh_degree # 允许的最大球谐阶数j
|
||||
|
||||
# 初始化3D高斯模型的各项参数
|
||||
self._xyz = torch.empty(0) # 3D高斯的中心位置(均值)
|
||||
self._xyz = torch.empty(0) # 3D高斯的 中心位置(均值)
|
||||
self._features_dc = torch.empty(0) # 第一个球谐系数,用于表示基础颜色
|
||||
self._features_rest = torch.empty(0) # 其余的球谐系数,用于表示颜色的细节和变化
|
||||
self._scaling = torch.empty(0) # 3D高斯的尺度参数,控制高斯的形状
|
||||
self._rotation = torch.empty(0) # 3D高斯的旋转参数(一系列四元数)
|
||||
self._features_rest = torch.empty(0) # 其余球谐系数,用于表示颜色的细节和变化
|
||||
self._scaling = torch.empty(0) # 3D高斯的尺度,控制高斯的形状
|
||||
self._rotation = torch.empty(0) # 3D高斯的旋转(一系列四元数)
|
||||
self._opacity = torch.empty(0) # 3D高斯的不透明度(sigmoid前的),控制可见性
|
||||
self.max_radii2D = torch.empty(0) # 在2D投影中,每个高斯的最大半径
|
||||
|
||||
self.xyz_gradient_accum = torch.empty(0) # 累积3D高斯中心位置的梯度,当它太大的时候要对Gaussian进行分裂,小时代表under要复制
|
||||
self.denom = torch.empty(0) # 与累积梯度配合使用,表示统计了多少次累积梯度,算平均梯度时除掉这个(denom = denominator,分母)
|
||||
|
||||
self.optimizer = None # 优化器,用于调整上述参数以改进模型(论文中采用Adam,见附录B Algorithm 1的伪代码)
|
||||
|
||||
self.percent_dense = 0 # 控制Gaussian密集程度的超参数
|
||||
self.spatial_lr_scale = 0 # 位置坐标的学习率要乘上这个,抵消在不同尺度下应用同一个学习率带来的问题
|
||||
|
||||
# 调用setup_functions来初始化一些处理函数
|
||||
# 调用 setup_functions,初始化处理高斯体模型参数的 激活函数
|
||||
self.setup_functions()
|
||||
|
||||
def capture(self):
|
||||
@ -139,24 +139,23 @@ class GaussianModel:
|
||||
|
||||
def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float):
|
||||
"""
|
||||
从点云数据初始化模型参数
|
||||
|
||||
:param pcd: 稀疏点云数据,包含点的位置和颜色
|
||||
:param spatial_lr_scale: 空间学习率缩放因子,影响 位置坐标参数的学习率
|
||||
从稀疏点云数据 初始化模型参数
|
||||
pcd: 稀疏点云,包含点的位置和颜色
|
||||
spatial_lr_scale: 空间学习率缩放因子,影响 位置坐标参数的学习率
|
||||
"""
|
||||
|
||||
# 根据scene.Scene.__init__ 以及 scene.dataset_readers.SceneInfo.nerf_normalization,即scene.dataset_readers.getNerfppNorm的代码,
|
||||
# 这个值似乎是训练相机中离它们的坐标平均值(即中心)最远距离的1.1倍,根据命名推断应该与学习率有关,防止固定的学习率适配不同尺度的场景时出现问题。
|
||||
self.spatial_lr_scale = spatial_lr_scale
|
||||
|
||||
# 将点云的位置和颜色数据从numpy数组转换为PyTorch张量,并传送到CUDA设备上
|
||||
# 将点云的 位置 和 颜色 数据从numpy数组转换为PyTorch张量,并传送到CUDA设备上
|
||||
fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda() # 稀疏点云的3D坐标,大小为(P, 3)
|
||||
fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda()) # 球谐的直流分量,大小为(P, 3),
|
||||
# RGB2SH(x) = (x - 0.5) / 0.28209479177387814看样子pcd.colors的原始范围应该是0到1。0.28209479177387814是1 / (2*sqrt(pi)),是直流分量Y(l=0,m=0)的值
|
||||
|
||||
# 初始化存储球谐系数的张量,RGB三通道球谐的所有系数,每个通道有(max_sh_degree + 1) ** 2个球谐系数
|
||||
# 初始化存储 球谐系数 的张量,RGB三通道球谐的所有系数,每个通道有(max_sh_degree + 1) ** 2个球谐系数
|
||||
features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda() # (P, 3, 16)
|
||||
features[:, :3, 0 ] = fused_color # 将RGB转换后的球谐系数C0项的系数(直流分量)存入
|
||||
features[:, :3, 0 ] = fused_color # 将RGB转换后的球谐系数C0项的系数(直流分量)存入每个3D点的直流分量球谐系数中
|
||||
features[:, 3:, 1:] = 0.0 # 其余球谐系数初始化为0
|
||||
|
||||
# 打印初始点的数量
|
||||
|
38
train.py
38
train.py
@ -34,18 +34,21 @@ except ImportError:
|
||||
|
||||
def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from):
|
||||
'''
|
||||
dataset: 只存储与Moedl相关参数的args
|
||||
opt: 优化相关参数
|
||||
pipe: 管道相关参数
|
||||
dataset: 存储着与 GaussianMoedl 相关参数 的args
|
||||
opt: 存储着与 优化 相关参数 的args
|
||||
pipe: 存储着与 管道相关参数 的args
|
||||
checkpoint: 已训练模型的路径
|
||||
debug_from: 从哪一个迭代开始debug
|
||||
'''
|
||||
first_iter = 0
|
||||
# 创建保存结果的文件夹,并保存模型相关的参数到cfg_args文件;尝试创建tensorboard_writer,记录训练过程
|
||||
# 创建保存结果的文件夹output/scene,并保存模型相关的参数到cfg_args文件;尝试创建tensorboard_writer,记录训练过程
|
||||
tb_writer = prepare_output_and_logger(dataset)
|
||||
|
||||
gaussians = GaussianModel(dataset.sh_degree) # 创建初始化高斯模型,用于表示场景中的每个点的3D高斯分布
|
||||
scene = Scene(dataset, gaussians) # 创建初始3D场景对象,加载数据集和对应的相机参数
|
||||
# 创建高斯模型对象,用于表示场景中的每个点的3D高斯分布
|
||||
gaussians = GaussianModel(dataset.sh_degree)
|
||||
# 创建初始3D场景对象,加载数据集和对应的相机参数
|
||||
scene = Scene(dataset, gaussians)
|
||||
|
||||
gaussians.training_setup(opt) # 为高斯模型参数设置优化器和学习率调度器
|
||||
|
||||
# 如果提供了checkpoint,则从checkpoint加载模型参数并恢复训练进度
|
||||
@ -131,7 +134,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
|
||||
print("\n[ITER {}] Saving Gaussians".format(iteration))
|
||||
scene.save(iteration)
|
||||
|
||||
# 在指定迭代区间内,对3D高斯模型进行增密和修剪,Densification
|
||||
# Densification,在指定迭代区间内,对3D高斯模型进行增密和修剪
|
||||
if iteration < opt.densify_until_iter:
|
||||
# Keep track of max radii in image-space for pruning
|
||||
gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter])
|
||||
@ -144,7 +147,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
|
||||
if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter):
|
||||
gaussians.reset_opacity()
|
||||
|
||||
# 执行优化器的一步,并准备下一次迭代,Optimizer step
|
||||
# Optimizer step,执行优化器的一步,并准备下一次迭代
|
||||
if iteration < opt.iterations:
|
||||
gaussians.optimizer.step()
|
||||
gaussians.optimizer.zero_grad(set_to_none = True)
|
||||
@ -219,12 +222,13 @@ if __name__ == "__main__":
|
||||
# Set up command line argument parser
|
||||
parser = ArgumentParser(description="Training script parameters")
|
||||
|
||||
model_prams = ModelParams(parser) # 定义存储 模型 相关参数的arg对象
|
||||
optim_prams = OptimizationParams(parser) # 定义存储 优化 相关参数的arg对象
|
||||
pipeline_prams = PipelineParams(parser) # 定义存储 渲染 相关参数的arg对象
|
||||
# 创建 模型、优化、渲染 相关参数的对象
|
||||
lp = ModelParams(parser)
|
||||
op = OptimizationParams(parser)
|
||||
pp = PipelineParams(parser)
|
||||
|
||||
parser.add_argument('--ip', type=str, default="127.0.0.1")
|
||||
parser.add_argument('--port', type=int, default=6009)
|
||||
parser.add_argument('--port', type=int, default=6007)
|
||||
parser.add_argument('--debug_from', type=int, default=-1) # 指定从哪一迭代(>= 0)开始debug
|
||||
parser.add_argument('--detect_anomaly', action='store_true', default=False) # action='store_true' 如果命令行中包含了这个参数,它的值将被设置为 True
|
||||
parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000])
|
||||
@ -233,7 +237,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[30_000])
|
||||
parser.add_argument("--start_checkpoint", type=str, default = None)
|
||||
|
||||
# 从命令行参数中解析出所有的参数值,并与上面设置的参数一起存储到 Namespace 对象中,即args
|
||||
# 从命令行参数覆盖parser内的参数,并存储到args
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
args.save_iterations.append(args.iterations)
|
||||
@ -248,8 +252,12 @@ if __name__ == "__main__":
|
||||
|
||||
torch.autograd.set_detect_anomaly(args.detect_anomaly) # 设置pytorch是否检测梯度异常
|
||||
|
||||
# model_prams.extract(args):将args中的属性,即命令行和预设的参数中 与 ModelParams类中定义的参数相匹配的值,并将它们封装到一个新的 GroupParams 对象中
|
||||
training(model_prams.extract(args), optim_prams.extract(args), pipeline_prams.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
|
||||
# lp.extract(args):args中参数 覆盖 模型、优化、渲染 的参数,并形成新的args
|
||||
lp_args = lp.extract(args)
|
||||
op_args = op.extract(args)
|
||||
pp_args = pp.extract(args)
|
||||
|
||||
training(lp_args, op_args, pp_args, args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from)
|
||||
|
||||
# All done
|
||||
print("\nTraining complete.")
|
||||
|
@ -52,6 +52,11 @@ def loadCam(args, id, cam_info, resolution_scale):
|
||||
image_name=cam_info.image_name, uid=id, data_device=args.data_device)
|
||||
|
||||
def cameraList_from_camInfos(cam_infos, resolution_scale, args):
|
||||
'''
|
||||
cam_infos: 训练或测试相机对象列表
|
||||
resolution_scale: 不同分辨率列表
|
||||
args: 高斯模型参数
|
||||
'''
|
||||
camera_list = []
|
||||
|
||||
for id, c in enumerate(cam_infos):
|
||||
|
@ -74,4 +74,8 @@ def fov2focal(fov, pixels):
|
||||
return pixels / (2 * math.tan(fov / 2))
|
||||
|
||||
def focal2fov(focal, pixels):
|
||||
return 2*math.atan(pixels/(2*focal))
|
||||
'''
|
||||
focal: fx 或 fy
|
||||
pixels: 宽度或高度,单位为像素
|
||||
'''
|
||||
return 2 * math.atan(pixels / (2 * focal))
|
Loading…
Reference in New Issue
Block a user