gaussian-splatting/arkit_utils/pose2tum_evo.py
2024-03-25 09:24:47 +00:00

183 lines
7.0 KiB
Python

import numpy as np
import struct
def convert_pose(C2W):
flip_yz = np.eye(4)
flip_yz[1, 1] = -1
flip_yz[2, 2] = -1
C2W = np.matmul(C2W, flip_yz)
return C2W
def qvec2rotmat(qvec):
return np.array([
[1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
def rotmat2qvec(R):
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
K = np.array([
[Rxx - Ryy - Rzz, 0, 0, 0],
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
eigvals, eigvecs = np.linalg.eigh(K)
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
if qvec[0] < 0:
qvec *= -1
return qvec
def read_pose_txt(path):
txt_path = path + "images.txt"
num_frames = 0
with open(txt_path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
num_frames += 1
print(f"num of frames : {num_frames}")
xyzs = np.empty((num_frames, 3))
qxyzs = np.empty((num_frames, 4))
count = 0
with open(txt_path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
qxyz = np.array(tuple(map(float, elems[1:5])))
xyz = np.array(tuple(map(float, elems[5:8])))
Twc = np.zeros((4, 4))
Twc[:3, :3] = qvec2rotmat(qxyz)
Twc[:3, 3] = xyz
Twc[3, 3] = 1.0
Twc = convert_pose(Twc)
Twc = np.array([[1, 0, 0, 0],
[0, 0, -1, 0],
[0, 1, 0, 0],
[0, 0, 0, 1]]) @ Twc
R = Twc[:3, :3]
qvec = rotmat2qvec(R)
tvec = Twc[:3, -1]
qxyzs[count] = qxyz
xyzs[count] = xyz
count+=1
write_2_TUM_format(num_frames, xyzs, qxyzs, path+"est_tum.txt")
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
"""Read and unpack the next bytes from a binary file.
:param fid:
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
:param endian_character: Any of {@, =, <, >, !}
:return: Tuple of read and unpacked values.
"""
data = fid.read(num_bytes)
return struct.unpack(endian_character + format_char_sequence, data)
def read_pose_bin(path):
num_frames = 0
bin_path = path + "images.bin"
with open(bin_path, "rb") as fid:
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_reg_images):
binary_image_properties = read_next_bytes(
fid, num_bytes=64, format_char_sequence="idddddddi")
image_id = binary_image_properties[0]
qvec = np.array(binary_image_properties[1:5])
tvec = np.array(binary_image_properties[5:8])
camera_id = binary_image_properties[8]
image_name = ""
current_char = read_next_bytes(fid, 1, "c")[0]
while current_char != b"\x00": # look for the ASCII 0 entry
image_name += current_char.decode("utf-8")
current_char = read_next_bytes(fid, 1, "c")[0]
num_points2D = read_next_bytes(fid, num_bytes=8,
format_char_sequence="Q")[0]
x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
format_char_sequence="ddq"*num_points2D)
xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
tuple(map(float, x_y_id_s[1::3]))])
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
num_frames += 1
print(f"num of frames : {num_frames}")
xyzs = np.empty((num_frames, 3))
qxyzs = np.empty((num_frames, 4))
count = 0
with open(bin_path, "rb") as fid:
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_reg_images):
binary_image_properties = read_next_bytes(
fid, num_bytes=64, format_char_sequence="idddddddi")
image_id = binary_image_properties[0]
qvec = np.array(binary_image_properties[1:5])
tvec = np.array(binary_image_properties[5:8])
camera_id = binary_image_properties[8]
image_name = ""
current_char = read_next_bytes(fid, 1, "c")[0]
while current_char != b"\x00": # look for the ASCII 0 entry
image_name += current_char.decode("utf-8")
current_char = read_next_bytes(fid, 1, "c")[0]
num_points2D = read_next_bytes(fid, num_bytes=8,
format_char_sequence="Q")[0]
x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
format_char_sequence="ddq"*num_points2D)
# COLMAP pose is in Tcw, we need Twc
Tcw = np.zeros((4, 4))
Tcw[:3, :3] = qvec2rotmat(qvec)
Tcw[:3, 3] = tvec
Tcw[3, 3] = 1.0
Twc = np.linalg.inv(Tcw)
R = Twc[:3, :3]
qvec = rotmat2qvec(R)
tvec = Twc[:3, -1]
# binary won't read as increasing order
qxyzs[image_id-1] = qvec
xyzs[image_id-1] = tvec
count+=1
write_2_TUM_format(num_frames, xyzs, qxyzs, path+"gt_tum.txt")
def write_2_TUM_format(n, xyzs, qxyzs, path):
'''
tum expect pose in Twc (camera to world)
'''
with open(path, "w") as f:
for i in range(n):
line = str(i) + " " + str(xyzs[i][0]) + " " + str(xyzs[i][1])+ " " + str(xyzs[i][2])+ " " + str(qxyzs[i][1])+ " " + str(qxyzs[i][2])+ " " + str(qxyzs[i][3]) + " " + str(qxyzs[i][0])
f.write(line + "\n")
if __name__ == "__main__":
# parser = argparse.ArgumentParser(description="transform ARKit pose to obj for meshLab visulization")
# parser.add_argument("--input_cameras_path", type=str)
# args = parser.parse_args()
# input_cameras_path = args.input_cameras_path
# read_pose_txt("data/arkit_pose/meeting_room_loop_closure/arkit_colmap2/colmap_arkit/raw/")
read_pose_bin("data/arkit_pose/meeting_room_loop_closure/arkit_colmap/colmap_arkit/raw/colmap_ba/")