mirror of
https://github.com/graphdeco-inria/gaussian-splatting
synced 2025-06-26 18:18:11 +00:00
Merge 54f1ca8815
into 8a70a8cd6f
This commit is contained in:
commit
7920c1f88d
@ -29,19 +29,13 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Set up rasterization configuration
|
|
||||||
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
|
|
||||||
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
|
|
||||||
|
|
||||||
raster_settings = GaussianRasterizationSettings(
|
raster_settings = GaussianRasterizationSettings(
|
||||||
image_height=int(viewpoint_camera.image_height),
|
image_height=int(viewpoint_camera.image_height),
|
||||||
image_width=int(viewpoint_camera.image_width),
|
image_width=int(viewpoint_camera.image_width),
|
||||||
tanfovx=tanfovx,
|
|
||||||
tanfovy=tanfovy,
|
|
||||||
bg=bg_color,
|
bg=bg_color,
|
||||||
scale_modifier=scaling_modifier,
|
scale_modifier=scaling_modifier,
|
||||||
viewmatrix=viewpoint_camera.world_view_transform,
|
viewmatrix=viewpoint_camera.world_view_transform,
|
||||||
projmatrix=viewpoint_camera.full_proj_transform,
|
projmatrix=viewpoint_camera.projection_matrix,
|
||||||
sh_degree=pc.active_sh_degree,
|
sh_degree=pc.active_sh_degree,
|
||||||
campos=viewpoint_camera.camera_center,
|
campos=viewpoint_camera.camera_center,
|
||||||
prefiltered=False,
|
prefiltered=False,
|
||||||
|
@ -15,7 +15,7 @@ import numpy as np
|
|||||||
from utils.graphics_utils import getWorld2View2, getProjectionMatrix
|
from utils.graphics_utils import getWorld2View2, getProjectionMatrix
|
||||||
|
|
||||||
class Camera(nn.Module):
|
class Camera(nn.Module):
|
||||||
def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
|
def __init__(self, colmap_id, R, T, FovXleft, FovXright, FovYtop, FovYbottom, image, gt_alpha_mask,
|
||||||
image_name, uid,
|
image_name, uid,
|
||||||
trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda"
|
trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda"
|
||||||
):
|
):
|
||||||
@ -25,8 +25,10 @@ class Camera(nn.Module):
|
|||||||
self.colmap_id = colmap_id
|
self.colmap_id = colmap_id
|
||||||
self.R = R
|
self.R = R
|
||||||
self.T = T
|
self.T = T
|
||||||
self.FoVx = FoVx
|
self.FovXleft = FovXleft
|
||||||
self.FoVy = FoVy
|
self.FovXright = FovXright
|
||||||
|
self.FovYtop = FovYtop
|
||||||
|
self.FovYbottom = FovYbottom
|
||||||
self.image_name = image_name
|
self.image_name = image_name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -52,7 +54,7 @@ class Camera(nn.Module):
|
|||||||
self.scale = scale
|
self.scale = scale
|
||||||
|
|
||||||
self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
|
self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
|
||||||
self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
|
self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovXleft=FovXleft, fovXright=FovXright, fovYtop=FovYtop, fovYbottom=FovYbottom).transpose(0,1).cuda()
|
||||||
self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
|
self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
|
||||||
self.camera_center = self.world_view_transform.inverse()[3, :3]
|
self.camera_center = self.world_view_transform.inverse()[3, :3]
|
||||||
|
|
||||||
@ -60,8 +62,8 @@ class MiniCam:
|
|||||||
def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
|
def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
|
||||||
self.image_width = width
|
self.image_width = width
|
||||||
self.image_height = height
|
self.image_height = height
|
||||||
self.FoVy = fovy
|
self.FovYtop = fovy
|
||||||
self.FoVx = fovx
|
self.FovXleft = fovx
|
||||||
self.znear = znear
|
self.znear = znear
|
||||||
self.zfar = zfar
|
self.zfar = zfar
|
||||||
self.world_view_transform = world_view_transform
|
self.world_view_transform = world_view_transform
|
||||||
|
@ -15,7 +15,7 @@ from PIL import Image
|
|||||||
from typing import NamedTuple
|
from typing import NamedTuple
|
||||||
from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \
|
from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \
|
||||||
read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text
|
read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text
|
||||||
from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal
|
from utils.graphics_utils import getWorld2View2, focal2sidefov, focal2fov, fov2focal
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -27,8 +27,10 @@ class CameraInfo(NamedTuple):
|
|||||||
uid: int
|
uid: int
|
||||||
R: np.array
|
R: np.array
|
||||||
T: np.array
|
T: np.array
|
||||||
FovY: np.array
|
FovYtop: np.array
|
||||||
FovX: np.array
|
FovYbottom: np.array
|
||||||
|
FovXleft: np.array
|
||||||
|
FovXright: np.array
|
||||||
image: np.array
|
image: np.array
|
||||||
image_path: str
|
image_path: str
|
||||||
image_name: str
|
image_name: str
|
||||||
@ -82,15 +84,23 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
|
|||||||
R = np.transpose(qvec2rotmat(extr.qvec))
|
R = np.transpose(qvec2rotmat(extr.qvec))
|
||||||
T = np.array(extr.tvec)
|
T = np.array(extr.tvec)
|
||||||
|
|
||||||
|
cx = intr.params[-2]
|
||||||
|
cy = intr.params[-1]
|
||||||
|
|
||||||
if intr.model=="SIMPLE_PINHOLE":
|
if intr.model=="SIMPLE_PINHOLE":
|
||||||
focal_length_x = intr.params[0]
|
focal_length_x = intr.params[0]
|
||||||
FovY = focal2fov(focal_length_x, height)
|
focal_length_y = intr.params[0]
|
||||||
FovX = focal2fov(focal_length_x, width)
|
FovYtop = focal2sidefov(focal_length_y, -cy) # usually negative
|
||||||
|
FovYbottom = focal2sidefov(focal_length_y, height - cy) # usually positive
|
||||||
|
FovXleft = focal2sidefov(focal_length_x, -cx) # usually negative
|
||||||
|
FovXright = focal2sidefov(focal_length_x, width - cx) # usually positive
|
||||||
elif intr.model=="PINHOLE":
|
elif intr.model=="PINHOLE":
|
||||||
focal_length_x = intr.params[0]
|
focal_length_x = intr.params[0]
|
||||||
focal_length_y = intr.params[1]
|
focal_length_y = intr.params[1]
|
||||||
FovY = focal2fov(focal_length_y, height)
|
FovYtop = focal2sidefov(focal_length_y, -cy) # usually negative
|
||||||
FovX = focal2fov(focal_length_x, width)
|
FovYbottom = focal2sidefov(focal_length_y, height - cy) # usually positive
|
||||||
|
FovXleft = focal2sidefov(focal_length_x, -cx) # usually negative
|
||||||
|
FovXright = focal2sidefov(focal_length_x, width - cx) # usually positive
|
||||||
else:
|
else:
|
||||||
assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
|
assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
|
||||||
|
|
||||||
@ -98,7 +108,7 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
|
|||||||
image_name = os.path.basename(image_path).split(".")[0]
|
image_name = os.path.basename(image_path).split(".")[0]
|
||||||
image = Image.open(image_path)
|
image = Image.open(image_path)
|
||||||
|
|
||||||
cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
|
cam_info = CameraInfo(uid=uid, R=R, T=T, FovYtop=FovYtop, FovYbottom=FovYbottom, FovXleft=FovXleft, FovXright=FovXright, image=image,
|
||||||
image_path=image_path, image_name=image_name, width=width, height=height)
|
image_path=image_path, image_name=image_name, width=width, height=height)
|
||||||
cam_infos.append(cam_info)
|
cam_infos.append(cam_info)
|
||||||
sys.stdout.write('\n')
|
sys.stdout.write('\n')
|
||||||
@ -181,7 +191,6 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension=
|
|||||||
|
|
||||||
with open(os.path.join(path, transformsfile)) as json_file:
|
with open(os.path.join(path, transformsfile)) as json_file:
|
||||||
contents = json.load(json_file)
|
contents = json.load(json_file)
|
||||||
fovx = contents["camera_angle_x"]
|
|
||||||
|
|
||||||
frames = contents["frames"]
|
frames = contents["frames"]
|
||||||
for idx, frame in enumerate(frames):
|
for idx, frame in enumerate(frames):
|
||||||
@ -209,13 +218,21 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension=
|
|||||||
arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
|
arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
|
||||||
image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
|
image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
|
||||||
|
|
||||||
fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
|
cx = frame["cx"] if "cx" in frame else contents["cx"] if "cx" in contents else image.size[0] / 2
|
||||||
FovY = fovy
|
cy = frame["cy"] if "cy" in frame else contents["cy"] if "cy" in contents else image.size[1] / 2
|
||||||
FovX = fovx
|
fl_y = frame["fl_y"] if "fl_y" in frame else contents["fl_y"] if "fl_y" in contents else None
|
||||||
|
fl_x = frame["fl_x"] if "fl_x" in frame else contents["fl_x"] if "fl_x" in contents else fl_y
|
||||||
|
fovx = frame["camera_angle_x"] if "camera_angle_x" in frame else contents["camera_angle_x"] if "camera_angle_x" in contents else None
|
||||||
|
fovy = frame["camera_angle_y"] if "camera_angle_y" in frame else contents["camera_angle_y"] if "camera_angle_y" in contents else focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
|
||||||
|
# priority is given to ("fl_x", "cx") over "camera_angle_x" because it can be frame specific:
|
||||||
|
fovYtop = focal2sidefov(fl_y, -cy) if fl_y else focal2sidefov(fov2focal(fovy, image.size[1]), -cy) # usually negative
|
||||||
|
fovYbottom = focal2sidefov(fl_y, image.size[1] - cy) if fl_y else focal2sidefov(fov2focal(fovy, image.size[1]), image.size[1] - cy) # usually positive
|
||||||
|
fovXleft = focal2sidefov(fl_x, -cx) if fl_x else focal2sidefov(fov2focal(fovx, image.size[0]), -cx) # usually negative
|
||||||
|
fovXright = focal2sidefov(fl_x, image.size[0] - cx) if fl_x else focal2sidefov(fov2focal(fovx, image.size[0]), image.size[0] - cx) # usually positive
|
||||||
|
|
||||||
cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
|
cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovYtop=fovYtop, FovYbottom=fovYbottom, FovXleft=fovXleft, FovXright=fovXright, image=image,
|
||||||
image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1]))
|
image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1]))
|
||||||
|
|
||||||
return cam_infos
|
return cam_infos
|
||||||
|
|
||||||
def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
|
def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
|
||||||
|
@ -1 +1 @@
|
|||||||
Subproject commit 59f5f77e3ddbac3ed9db93ec2cfe99ed6c5d121d
|
Subproject commit e8b24763806263493f74deadf5f18b68f7cab0e1
|
@ -13,6 +13,7 @@ from scene.cameras import Camera
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from utils.general_utils import PILtoTorch
|
from utils.general_utils import PILtoTorch
|
||||||
from utils.graphics_utils import fov2focal
|
from utils.graphics_utils import fov2focal
|
||||||
|
import math
|
||||||
|
|
||||||
WARNED = False
|
WARNED = False
|
||||||
|
|
||||||
@ -47,7 +48,7 @@ def loadCam(args, id, cam_info, resolution_scale):
|
|||||||
loaded_mask = resized_image_rgb[3:4, ...]
|
loaded_mask = resized_image_rgb[3:4, ...]
|
||||||
|
|
||||||
return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T,
|
return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T,
|
||||||
FoVx=cam_info.FovX, FoVy=cam_info.FovY,
|
FovXleft=cam_info.FovXleft, FovXright=cam_info.FovXright, FovYtop=cam_info.FovYtop, FovYbottom=cam_info.FovYbottom,
|
||||||
image=gt_image, gt_alpha_mask=loaded_mask,
|
image=gt_image, gt_alpha_mask=loaded_mask,
|
||||||
image_name=cam_info.image_name, uid=id, data_device=args.data_device)
|
image_name=cam_info.image_name, uid=id, data_device=args.data_device)
|
||||||
|
|
||||||
@ -76,7 +77,7 @@ def camera_to_JSON(id, camera : Camera):
|
|||||||
'height' : camera.height,
|
'height' : camera.height,
|
||||||
'position': pos.tolist(),
|
'position': pos.tolist(),
|
||||||
'rotation': serializable_array_2d,
|
'rotation': serializable_array_2d,
|
||||||
'fy' : fov2focal(camera.FovY, camera.height),
|
'fy' : camera.height / (2 * math.tan((camera.FovYbottom - camera.FovYtop) / 2)),
|
||||||
'fx' : fov2focal(camera.FovX, camera.width)
|
'fx' : camera.width / (2 * math.tan((camera.FovXright - camera.FovXleft) / 2))
|
||||||
}
|
}
|
||||||
return camera_entry
|
return camera_entry
|
||||||
|
@ -48,30 +48,39 @@ def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
|
|||||||
Rt = np.linalg.inv(C2W)
|
Rt = np.linalg.inv(C2W)
|
||||||
return np.float32(Rt)
|
return np.float32(Rt)
|
||||||
|
|
||||||
def getProjectionMatrix(znear, zfar, fovX, fovY):
|
def getProjectionMatrix(znear, zfar, fovXleft, fovXright, fovYtop, fovYbottom):
|
||||||
tanHalfFovY = math.tan((fovY / 2))
|
tanHalfFovYtop = math.tan(fovYtop)
|
||||||
tanHalfFovX = math.tan((fovX / 2))
|
tanHalfFovYbottom = math.tan(fovYbottom)
|
||||||
|
tanHalfFovXleft = math.tan(fovXleft)
|
||||||
|
tanHalfFovXright = math.tan(fovXright)
|
||||||
|
|
||||||
top = tanHalfFovY * znear
|
top = tanHalfFovYtop * znear
|
||||||
bottom = -top
|
bottom = tanHalfFovYbottom * znear
|
||||||
right = tanHalfFovX * znear
|
left = tanHalfFovXleft * znear
|
||||||
left = -right
|
right = tanHalfFovXright * znear
|
||||||
|
|
||||||
P = torch.zeros(4, 4)
|
P = torch.zeros(4, 4)
|
||||||
|
|
||||||
z_sign = 1.0
|
z_sign = 1.0
|
||||||
|
|
||||||
|
# note that my conventions are (fovXleft,fovYtop) negative and (fovXright,fovYbottom) positive
|
||||||
P[0, 0] = 2.0 * znear / (right - left)
|
P[0, 0] = 2.0 * znear / (right - left)
|
||||||
P[1, 1] = 2.0 * znear / (top - bottom)
|
P[1, 1] = 2.0 * znear / (bottom - top)
|
||||||
P[0, 2] = (right + left) / (right - left)
|
P[0, 2] = -(right + left) / (right - left)
|
||||||
P[1, 2] = (top + bottom) / (top - bottom)
|
P[1, 2] = -(top + bottom) / (bottom - top)
|
||||||
P[3, 2] = z_sign
|
P[3, 2] = z_sign
|
||||||
P[2, 2] = z_sign * zfar / (zfar - znear)
|
P[2, 2] = z_sign * zfar / (zfar - znear)
|
||||||
P[2, 3] = -(zfar * znear) / (zfar - znear)
|
P[2, 3] = -(zfar * znear) / (zfar - znear)
|
||||||
return P
|
return P
|
||||||
|
|
||||||
def fov2focal(fov, pixels):
|
def fov2focal(fov, pixels):
|
||||||
return pixels / (2 * math.tan(fov / 2))
|
return sidefov2focal(fov / 2, pixels / 2)
|
||||||
|
|
||||||
def focal2fov(focal, pixels):
|
def focal2fov(focal, pixels):
|
||||||
return 2*math.atan(pixels/(2*focal))
|
return 2 * focal2sidefov(focal, pixels / 2)
|
||||||
|
|
||||||
|
def sidefov2focal(sidefov, sidepixels):
|
||||||
|
return sidepixels / math.tan(sidefov)
|
||||||
|
|
||||||
|
def focal2sidefov(focal, sidepixels):
|
||||||
|
return math.atan(sidepixels / focal)
|
||||||
|
Loading…
Reference in New Issue
Block a user