diff --git a/.gitmodules b/.gitmodules index cafb9ac..8bb4a4d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,10 +4,10 @@ [submodule "submodules/diff-gaussian-rasterization"] path = submodules/diff-gaussian-rasterization url = https://github.com/graphdeco-inria/diff-gaussian-rasterization.git - branch = dr_aa + branch = 3dgs_accel # branch = dr_aa [submodule "SIBR_viewers"] path = SIBR_viewers url = https://gitlab.inria.fr/sibr/sibr_core.git [submodule "submodules/fused-ssim"] path = submodules/fused-ssim - url = https://github.com/rahul-goel/fused-ssim.git + url = https://github.com/rahul-goel/fused-ssim.git \ No newline at end of file diff --git a/arguments/__init__.py b/arguments/__init__.py index 98a177c..a49900b 100644 --- a/arguments/__init__.py +++ b/arguments/__init__.py @@ -56,10 +56,13 @@ class ModelParams(ParamGroup): self.train_test_exp = False self.data_device = "cuda" self.eval = False + # xy-new self.skybox_locked = False self.skybox_num = 0 self.scaffold_file = "" self.bounds_file = "" + self.is_lidar_depth = False + self.lidar_depth_max = 30.0 super().__init__(parser, "Loading Parameters", sentinel) # add parameters into parser def extract(self, args): diff --git a/scene/cameras.py b/scene/cameras.py index 63161b9..30a4979 100644 --- a/scene/cameras.py +++ b/scene/cameras.py @@ -20,7 +20,8 @@ class Camera(nn.Module): def __init__(self, resolution, colmap_id, R, T, FoVx, FoVy, depth_params, image, invdepthmap, image_name, uid, trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda", - train_test_exp = False, is_test_dataset = False, is_test_view = False + train_test_exp = False, is_test_dataset = False, is_test_view = False, + is_lidar_depth = False, max_depth = 30.0 ): super(Camera, self).__init__() @@ -39,13 +40,13 @@ class Camera(nn.Module): print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) self.data_device = torch.device("cuda") - resized_image_rgb = PILtoTorch(image, resolution) + resized_image_rgb = PILtoTorch(image, resolution)# [3,h,w] gt_image = resized_image_rgb[:3, ...] self.alpha_mask = None if resized_image_rgb.shape[0] == 4: self.alpha_mask = resized_image_rgb[3:4, ...].to(self.data_device) else: - self.alpha_mask = torch.ones_like(resized_image_rgb[0:1, ...].to(self.data_device)) + self.alpha_mask = torch.ones_like(resized_image_rgb[0:1, ...].to(self.data_device))# [1,h,w] all 1 if train_test_exp and is_test_view: if is_test_dataset: @@ -61,8 +62,19 @@ class Camera(nn.Module): self.depth_reliable = False if invdepthmap is not None: self.depth_mask = torch.ones_like(self.alpha_mask) - self.invdepthmap = cv2.resize(invdepthmap, resolution) - self.invdepthmap[self.invdepthmap < 0] = 0 + + # xy-new lidar depth to invdepth + if is_lidar_depth: + depth_raw = invdepthmap + depth_lin = depth_raw / 255.0 * max_depth + mask_np = depth_lin > 0 + invdepth_lidar = np.zeros_like(depth_lin) + invdepth_lidar[mask_np] = 1.0 / (depth_lin[mask_np] + 1e-6) + + self.depth_mask = torch.from_numpy(mask_np).to(self.data_device) + + self.invdepthmap = cv2.resize(invdepth_lidar, resolution) + self.invdepthmap[self.invdepthmap < 0] = 0 self.depth_reliable = True if depth_params is not None: @@ -73,9 +85,9 @@ class Camera(nn.Module): if depth_params["scale"] > 0: self.invdepthmap = self.invdepthmap * depth_params["scale"] + depth_params["offset"] - if self.invdepthmap.ndim != 2: - self.invdepthmap = self.invdepthmap[..., 0] - self.invdepthmap = torch.from_numpy(self.invdepthmap[None]).to(self.data_device) + if self.invdepthmap.ndim != 2: # [h,w,3] + self.invdepthmap = self.invdepthmap[..., 0] # [h,w] + self.invdepthmap = torch.from_numpy(self.invdepthmap[None]).to(self.data_device) # [1,h,w] self.zfar = 100.0 self.znear = 0.01 diff --git a/train-sky.py b/train-sky.py index e8cd446..fde162f 100644 --- a/train-sky.py +++ b/train-sky.py @@ -198,7 +198,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi relevant = (gaussians._opacity.grad != 0).squeeze() # points - opacity gradients != 0 gaussians.optimizer.step(relevant, gaussians.get_xyz.shape[0]) # no densification so need step relevant points gaussians.optimizer.zero_grad(set_to_none = True) - + if (iteration in checkpoint_iterations): print("\n[ITER {}] Saving Checkpoint".format(iteration)) torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth") diff --git a/utils/camera_utils.py b/utils/camera_utils.py index 590b1e6..318833a 100644 --- a/utils/camera_utils.py +++ b/utils/camera_utils.py @@ -24,6 +24,9 @@ def loadCam(args, id, cam_info, resolution_scale, is_nerf_synthetic, is_test_dat try: if is_nerf_synthetic: invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / 512 + elif args.is_lidar_depth: + raw_uint8 = cv2.imread(cam_info.depth_path, 0).astype(np.float32) + invdepthmap = raw_uint8 # to filter mask in Camera class else: invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / float(2**16) @@ -64,7 +67,8 @@ def loadCam(args, id, cam_info, resolution_scale, is_nerf_synthetic, is_test_dat FoVx=cam_info.FovX, FoVy=cam_info.FovY, depth_params=cam_info.depth_params, image=image, invdepthmap=invdepthmap, image_name=cam_info.image_name, uid=id, data_device=args.data_device, - train_test_exp=args.train_test_exp, is_test_dataset=is_test_dataset, is_test_view=cam_info.is_test) + train_test_exp=args.train_test_exp, is_test_dataset=is_test_dataset, is_test_view=cam_info.is_test, + is_lidar_depth=args.is_lidar_depth,max_depth=args.lidar_depth_max) def cameraList_from_camInfos(cam_infos, resolution_scale, args, is_nerf_synthetic, is_test_dataset): camera_list = [] diff --git a/xy_utils/experiment.ipynb b/xy_utils/experiment.ipynb new file mode 100644 index 0000000..1fc257d --- /dev/null +++ b/xy_utils/experiment.ipynb @@ -0,0 +1,733 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b66179c8", + "metadata": {}, + "source": [ + "# 1.预处理数据" + ] + }, + { + "cell_type": "markdown", + "id": "533db061", + "metadata": {}, + "source": [ + "## 1.1 对点云数据进行处理 - 滤除环视范围外过远的点" + ] + }, + { + "cell_type": "markdown", + "id": "864843b2", + "metadata": {}, + "source": [ + "1. 解析 images.txt,计算所有相机在世界坐标系下的光心(位姿)。 \n", + "2. 使用 Open3D 加载 .pcd 点云,并基于与最近相机光心的距离进行滤除。 \n", + "3. 输出相机光心的范围(坐标轴最小/最大值)以及被滤除的点云数量。 \n", + "4. 将保留的点云按 COLMAP points3D.txt 格式保存,并额外生成一个 PLY 文件,其中红色标记被删除的点,原色保留未删除点。" + ] + }, + { + "cell_type": "markdown", + "id": "5dc3ac56", + "metadata": {}, + "source": [ + "#### ipynb内部函数 -- 未进行地面与boundingbox对齐" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc2e8c18", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import open3d as o3d\n", + "from scipy.spatial.transform import Rotation as R\n", + "\n", + "def filter_point_cloud(images_txt, pcd_path, output_points_txt, output_points_ply, output_ply_path):\n", + " \"\"\"\n", + " 根据相机位姿过滤点云,并保存结果。\n", + "\n", + " Args:\n", + " images_txt (str): 包含相机位姿的 images.txt 文件路径。\n", + " pcd_path (str): 点云文件路径。\n", + " output_points_txt (str): 保存过滤后点云的 points3D.txt 文件路径。\n", + " output_ply_path (str): 保存带颜色标记的 PLY 文件路径。\n", + " \"\"\"\n", + " def load_camera_centers(images_txt):\n", + " centers = []\n", + " with open(images_txt, 'r') as f:\n", + " for line in f:\n", + " if line.startswith('#') or len(line.strip()) == 0:\n", + " continue\n", + " elems = line.strip().split()\n", + " if len(elems) < 10:\n", + " continue\n", + " qw, qx, qy, qz = map(float, elems[1:5])\n", + " tx, ty, tz = map(float, elems[5:8])\n", + " rot = R.from_quat([qx, qy, qz, qw]).as_matrix()\n", + " center = -rot.T @ np.array([tx, ty, tz])\n", + " centers.append(center)\n", + " return np.array(centers)\n", + "\n", + " # 1. 加载相机中心\n", + " camera_centers = load_camera_centers(images_txt)\n", + "\n", + " # 2. 计算平均中心和最大偏移\n", + " center_mean = camera_centers.mean(axis=0)\n", + " offsets = np.abs(camera_centers - center_mean)\n", + " max_offset = offsets.max(axis=0)\n", + "\n", + " # 3. 加载点云\n", + " pcd = o3d.io.read_point_cloud(pcd_path)\n", + " points = np.asarray(pcd.points)\n", + " colors = np.asarray(pcd.colors) if pcd.has_colors() else np.ones_like(points)\n", + "\n", + " # 4. 构建包围盒并裁剪点云\n", + " min_bound = center_mean - max_offset - 15\n", + " max_bound = center_mean + max_offset + 15\n", + " aabb = o3d.geometry.AxisAlignedBoundingBox(min_bound, max_bound)\n", + " pcd_cropped = pcd.crop(aabb)\n", + "\n", + " # 5. 统计信息\n", + " total_points = len(points)\n", + " kept_points = np.asarray(pcd_cropped.points)\n", + " kept_colors = np.asarray(pcd_cropped.colors) if pcd_cropped.has_colors() else np.ones_like(kept_points)\n", + " removed_mask = np.ones(total_points, dtype=bool)\n", + " kept_indices = aabb.get_point_indices_within_bounding_box(pcd.points)\n", + " removed_mask[kept_indices] = False\n", + " removed_points = points[removed_mask]\n", + " removed_colors = colors[removed_mask]\n", + "\n", + " print(\"相机中心范围:\")\n", + " print(f\" X: [{min_bound[0]:.3f}, {max_bound[0]:.3f}]\")\n", + " print(f\" Y: [{min_bound[1]:.3f}, {max_bound[1]:.3f}]\")\n", + " print(f\" Z: [{min_bound[2]:.3f}, {max_bound[2]:.3f}]\")\n", + " print(f\"总点数:{total_points}\")\n", + " print(f\"保留点数:{len(kept_points)}\")\n", + " print(f\"删除点数:{len(removed_points)}\")\n", + "\n", + " # 6. 保存 points3D.txt (速度太慢,直接使用 ply )\n", + " with open(output_points_txt, \"w\") as f:\n", + " f.write(\"# 3D point list with one line of data per point:\\n\")\n", + " f.write(\"# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[]\\n\")\n", + " for idx, (pt, col) in enumerate(zip(kept_points, kept_colors), start=1):\n", + " r, g, b = (col * 255).astype(int)\n", + " f.write(f\"{idx} {pt[0]:.6f} {pt[1]:.6f} {pt[2]:.6f} {r} {g} {b} 0\\n\")\n", + " # 保存为 ply 文件\n", + " pcd_kept = o3d.geometry.PointCloud()\n", + " pcd_kept.points = o3d.utility.Vector3dVector(kept_points)\n", + " pcd_kept.colors = o3d.utility.Vector3dVector(kept_colors)\n", + " o3d.io.write_point_cloud(output_points_ply, pcd_kept)\n", + " print(\"保留的点云已保存为 {output_points_ply}\")\n", + " \n", + " # 7. 保存带颜色的 PLY 文件\n", + " all_points = np.vstack((kept_points, removed_points))\n", + " removed_colors_red = np.tile([1.0, 0.0, 0.0], (len(removed_points), 1))\n", + " all_colors = np.vstack((kept_colors, removed_colors_red))\n", + " pcd_all = o3d.geometry.PointCloud()\n", + " pcd_all.points = o3d.utility.Vector3dVector(all_points)\n", + " pcd_all.colors = o3d.utility.Vector3dVector(all_colors)\n", + " o3d.io.write_point_cloud(output_ply_path, pcd_all)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "80b924be", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "相机中心范围:\n", + " X: [-0.200, 28.270]\n", + " Y: [-16.082, 1.699]\n", + " Z: [-9.392, 0.714]\n", + "总点数:16404321\n", + "保留点数:10684495\n", + "删除点数:5719826\n", + "保留的点云已保存为 {output_points_ply}\n" + ] + } + ], + "source": [ + "import os\n", + "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n", + "\n", + "images_txt = os.path.join(folder_path, 'sparse/0/images.txt')\n", + "pcd_path = os.path.join(folder_path, 'pcd/all_raw_points.pcd')\n", + "output_points_ply = os.path.join(folder_path, 'pcd/points3D.ply')\n", + "vis_ply_path = os.path.join(folder_path, 'pcd/vis_filter.ply')\n", + "output_points_txt = os.path.join(folder_path, 'pcd/points3D.txt')\n", + "filter_point_cloud(images_txt,pcd_path,output_points_txt, output_points_ply,vis_ply_path)" + ] + }, + { + "cell_type": "markdown", + "id": "1a4206e8", + "metadata": {}, + "source": [ + "#### 调用 crop_points.py 文件" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d8c55ed", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "总点数:16404321,保留:15254842,删除:1149479\n" + ] + } + ], + "source": [ + "!python crop_points.py \\\n", + " --images_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/sparse/0/images.txt \\\n", + " --pcd_path /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/all_raw_points.pcd \\\n", + " --output_points_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D.txt \\\n", + " --output_ply /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/filtered_colored.ply \\\n", + " --margin 10.0" + ] + }, + { + "cell_type": "markdown", + "id": "651ed72f", + "metadata": {}, + "source": [ + "## 1.2 利用体素化降采样" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "56f5cba7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "raw points len : 16404321\n", + "downsample points len : 4671298\n", + "降采样后的点云已保存到 /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_0.05.ply,体素大小:0.05\n" + ] + } + ], + "source": [ + "import os\n", + "from tools.points_utils import voxel_downsample_and_save\n", + "\n", + "voxel_size = 0.05\n", + "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n", + "input_ply_path = os.path.join(folder_path,'pcd/points3D.ply')\n", + "output_ply_path = os.path.join(folder_path,f'pcd/points3D_{voxel_size}.ply')\n", + "input_ply_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n", + "output_ply_path = f'/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_{voxel_size}.ply'\n", + "\n", + "voxel_downsample_and_save(voxel_size, input_ply_path, output_ply_path) # ply downsample to ply\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1df326d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8544bb3e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jupyter environment detected. Enabling Open3D WebVisualizer.\n", + "[Open3D INFO] WebRTC GUI backend enabled.\n", + "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n", + "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd to /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply\n" + ] + } + ], + "source": [ + "# pcd 2 ply\n", + "# 将原pcd点云,转换为ply点云\n", + "from tools.points_utils import pcd_2_ply\n", + "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd'\n", + "ply_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n", + "\n", + "pcd_2_ply(pcd_file,ply_file)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2bf16bb1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jupyter environment detected. Enabling Open3D WebVisualizer.\n", + "[Open3D INFO] WebRTC GUI backend enabled.\n", + "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n", + "正在写入COLMAP TXT文件: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n", + "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply to Colmap TXT in: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n" + ] + } + ], + "source": [ + "# pcd 2 txt\n", + "from tools.points_utils import pcd_2_colmap_txt\n", + "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply'\n", + "txt_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt'\n", + "\n", + "pcd_2_colmap_txt(pcd_file, txt_file, is_white=True)" + ] + }, + { + "cell_type": "markdown", + "id": "6fd0b306", + "metadata": {}, + "source": [ + "选择一个合适的 points3D.ply 文件复制到 sparse/0 下" + ] + }, + { + "cell_type": "markdown", + "id": "22e87bd3", + "metadata": {}, + "source": [ + "## 1.2 Resize for more images" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "91ce4cb8", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import cv2\n", + "from tqdm import tqdm\n", + "\n", + "def resize_images(input_dir, output_dir, extensions):\n", + " \"\"\"\n", + " 读取输入文件夹中的所有图片,调整为1/2大小后保存到输出文件夹\n", + " \n", + " Args:\n", + " input_dir: 输入图片文件夹路径\n", + " output_dir: 输出图片文件夹路径\n", + " extensions: 支持的图片扩展名列表\n", + " \"\"\"\n", + " # 确保输出目录存在\n", + " os.makedirs(output_dir, exist_ok=True)\n", + " \n", + " # 获取所有图片文件\n", + " image_files = []\n", + " for file in os.listdir(input_dir):\n", + " if any(file.lower().endswith(ext) for ext in extensions):\n", + " image_files.append(file)\n", + " \n", + " if not image_files:\n", + " print(f\"在 {input_dir} 中未找到支持的图片文件\")\n", + " return\n", + " \n", + " print(f\"找到 {len(image_files)} 张图片\")\n", + " \n", + " # 处理每张图片\n", + " count = 0\n", + " for file in tqdm(image_files, desc=\"处理中\"):\n", + " input_path = os.path.join(input_dir, file)\n", + " output_path = os.path.join(output_dir, file)\n", + " \n", + " try:\n", + " # 读取图片\n", + " img = cv2.imread(input_path)\n", + " if img is None:\n", + " print(f\"警告: 无法读取图片 {input_path},跳过\")\n", + " continue\n", + " \n", + " # 获取原始尺寸\n", + " height, width = img.shape[:2]\n", + " \n", + " # 计算新尺寸\n", + " new_width = width // 2\n", + " new_height = height // 2\n", + " \n", + " # 调整尺寸\n", + " resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)\n", + " \n", + " # 保存图片\n", + " cv2.imwrite(output_path, resized_img)\n", + " \n", + " # 输出尺寸信息\n", + " if count == 0:\n", + " print(f\"{file}: {width}x{height} -> {new_width}x{new_height}\")\n", + " count += 1\n", + " except Exception as e:\n", + " print(f\"错误: 处理图片 {input_path} 时出错: {str(e)}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b4da36", + "metadata": {}, + "outputs": [], + "source": [ + "input_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps'\n", + "output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps_2'\n", + "exts = ['jpg','jpeg','png']\n", + "\n", + "resize_images(input_path, output_path, exts)\n", + "print(\"所有图片处理完成!\") " + ] + }, + { + "cell_type": "markdown", + "id": "3830cb91", + "metadata": {}, + "source": [ + "# 2. LIVO2和Colmap的重建对比实验" + ] + }, + { + "cell_type": "markdown", + "id": "7dd6ff70", + "metadata": {}, + "source": [ + "colmap无法恢复相机位姿;所以这里我们使用livo2恢复位姿后,用colmap 进行三角测量获取关键点\n", + "https://www.cnblogs.com/Todd-Qi/p/15080968.html" + ] + }, + { + "cell_type": "markdown", + "id": "4ad9f127", + "metadata": {}, + "source": [ + "## 2.1 基于Livo2位姿进行稀疏重建" + ] + }, + { + "cell_type": "markdown", + "id": "40e8c7f1", + "metadata": {}, + "source": [ + "colmap无法恢复相机位姿;所以这里我们使用livo2恢复位姿后,用colmap 进行三角测量获取关键点\n", + "https://www.cnblogs.com/Todd-Qi/p/15080968.html" + ] + }, + { + "cell_type": "markdown", + "id": "497d715a", + "metadata": {}, + "source": [ + "1. 准备来自Livo2的位姿和相机数据 cameras.txt, images.txt\n", + " 将内参(camera intrinsics) 放入cameras.txt, 外参(camera extrinsics)放入 images.txt , points3D.txt 为空 \n", + " - images.txt 中全部 0.0 0.0 -1 删除; \n", + " - points3D.txt 内容清空;\n", + " - cameras.txt 中的内参进行修改 (对输入图像全部进行了 resize 操作,因此需要修改相机内参,将fx, fy, cx, cy 都除以2)" + ] + }, + { + "cell_type": "markdown", + "id": "497596e0", + "metadata": {}, + "source": [ + "2. 特征匹配与特征提取 \n", + "``` bash\n", + " colmap feature_extractor \\\n", + " --database_path /path/to/project/database.db \\ \n", + " --image_path /path/to/project/images\n", + "```\n", + "``` bash\n", + " colmap exhaustive_matcher \\\n", + " --database_path /path/to/project/database.db\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "590c5ba8", + "metadata": {}, + "source": [ + "3. 三角化重建 (保存的点云和其他文件均为bin格式)\n", + "``` bash\n", + " colmap point_triangulator \\\n", + " --database_path /path/to/project/database.db \\\n", + " --image_path /path/to/project/images \\\n", + " --input_path /path/to/sparse_model \\\n", + " --output_path /path/to/triangulated_model\n", + "\n", + "```\n", + "\n", + "查看txt结果\n", + "``` bash\n", + " colmap model_converter \\\n", + " --input_path 0 \\\n", + " --output_path 0_txt_from_livo2 \\\n", + " --output_type TXT\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "6c2b853f", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "d359ac68", + "metadata": {}, + "source": [ + "4. 稠密重建(optional)" + ] + }, + { + "cell_type": "markdown", + "id": "77250b4b", + "metadata": {}, + "source": [ + "# 3.训练" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d44f71f7", + "metadata": {}, + "outputs": [], + "source": [ + "# baseline raw gs for training\n", + "!CUDA_VISIBLE_DEVICES=1 python train.py \\\n", + " -s data/tree_01_livo2 \\\n", + " -m data/tree_01_livo2/outputs/3dgs_baseline\n", + " \n", + "# render\n", + "!CUDA_VISIBLE_DEVICES=1 python render.py \\\n", + " -s data/tree_01_colmap \\\n", + " -m data/tree_01_colmap/outputs/3dgs_baseline " + ] + }, + { + "cell_type": "markdown", + "id": "a363d15d", + "metadata": {}, + "source": [ + "# 4.深度实验" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1de51e3b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- depth_maps_2文件夹 ---\n", + "找到图片:00144.png\n", + "分辨率:1024 x 768 像素\n", + "通道数:3\n", + "通道说明:三通道(RGB)\n", + "--- images我文件夹 ---\n", + "找到图片:00144.png\n", + "分辨率:1024 x 768 像素\n", + "通道数:3\n", + "通道说明:三通道(RGB)\n" + ] + } + ], + "source": [ + "# 一、确认图像与深度图像尺寸是否一致\n", + "from tools.images_utils import get_first_image_info\n", + "\n", + "folder_path = \"/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2\"\n", + "print(\"--- depth_maps_2文件夹 ---\")\n", + "get_first_image_info(folder_path)\n", + "folder_path = \"/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/images\"\n", + "print(\"--- images我文件夹 ---\")\n", + "get_first_image_info(folder_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6ee3b672", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "成功生成 JSON 文件:/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/sparse/0/depth_params.json\n", + "共处理 539 个 PNG 文件\n" + ] + } + ], + "source": [ + "# 2.在 sparse/0 下生成一个 depth_params.json\n", + "\n", + "# 示例数据:\n", + "# {\n", + "# \"DSC_3893\": {\n", + "# \"scale\": 50.55648822378238,\n", + "# \"offset\": 0.01793079636288747\n", + "# },\n", + "# \"DSC_3902\": {\n", + "# \"scale\": 56.80526691363168,\n", + "# \"offset\": 0.027163276135205097\n", + "# }\n", + "# }\n", + "\n", + "from tools.images_utils import generate_depth_params_json\n", + "import os\n", + "\n", + "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/'\n", + "PNG_FOLDER = os.path.join(folder_path, 'depth_maps')\n", + "OUTPUT_PATH = os.path.join(folder_path,\"sparse/0/depth_params.json\")\n", + "# 生成 JSON 文件\n", + "generate_depth_params_json(PNG_FOLDER, OUTPUT_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b337a10c", + "metadata": {}, + "outputs": [], + "source": [ + "# 重命名 删去 -final的后缀\n", + "from tools.images_utils import batch_rename_files\n", + "\n", + "\n", + "FOLDER_PATH = \"/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/depth_maps\" # 请替换为实际的文件夹路径\n", + "batch_rename_files(FOLDER_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f60df4e", + "metadata": {}, + "outputs": [], + "source": [ + "# 如果有深度mesh缺失\n", + "# ~/git_project/livo2-data-utils/10-Mesh-acc$ \n", + "# python demo-acc.py -i \\\n", + "# /home/qinllgroup/hongxiangyu/git_project/livo2-data-utils/10-Mesh-acc/data/delete \\\n", + "# -o /home/qinllgroup/hongxiangyu/git_project/livo2-data-utils/10-Mesh-acc/data/delete \\\n", + "# --max_edge 15\n", + "\n", + "from tools.images_utils import resize_image\n", + "\n", + "input_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps/00466-final.png'\n", + "output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00466.png'\n", + "resize_image(input_path, output_path, scale=2, quality=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "024d1b19", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. 训练深度mask\n", + "!CUDA_VISIBLE_DEVICES=1 python train.py \\\n", + " -s data/tree_01_livo2 \\\n", + " -d data/tree_01_livo2/depth_map \\\n", + " -m data/tree_01_livo2/outputs/3dgs_baseline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "254f6278", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "成功将图片从 2048x1536 缩小到 1024x768\n", + "保存路径: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00466.png\n" + ] + }, + { + "data": { + "text/plain": [ + "'/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00466.png'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "markdown", + "id": "66d0f159", + "metadata": {}, + "source": [ + "## 4.1 深度读取" + ] + }, + { + "cell_type": "markdown", + "id": "7406df01", + "metadata": {}, + "source": [ + "# 5.可视化TensorBoard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b0acade", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/xy_utils/learning.ipynb b/xy_utils/learning.ipynb index 434e30e..06fd218 100644 --- a/xy_utils/learning.ipynb +++ b/xy_utils/learning.ipynb @@ -2,578 +2,19 @@ "cells": [ { "cell_type": "markdown", - "id": "b66179c8", + "id": "bf253aab", "metadata": {}, "source": [ - "# 1.预处理数据" - ] - }, - { - "cell_type": "markdown", - "id": "533db061", - "metadata": {}, - "source": [ - "## 1.1 对点云数据进行处理 - 滤除环视范围外过远的点" - ] - }, - { - "cell_type": "markdown", - "id": "864843b2", - "metadata": {}, - "source": [ - "1. 解析 images.txt,计算所有相机在世界坐标系下的光心(位姿)。 \n", - "2. 使用 Open3D 加载 .pcd 点云,并基于与最近相机光心的距离进行滤除。 \n", - "3. 输出相机光心的范围(坐标轴最小/最大值)以及被滤除的点云数量。 \n", - "4. 将保留的点云按 COLMAP points3D.txt 格式保存,并额外生成一个 PLY 文件,其中红色标记被删除的点,原色保留未删除点。" - ] - }, - { - "cell_type": "markdown", - "id": "5dc3ac56", - "metadata": {}, - "source": [ - "#### ipynb内部函数 -- 未进行地面与boundingbox对齐" + "# 基于CUDA的KNN计算" ] }, { "cell_type": "code", "execution_count": null, - "id": "cc2e8c18", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import open3d as o3d\n", - "from scipy.spatial.transform import Rotation as R\n", - "\n", - "def filter_point_cloud(images_txt, pcd_path, output_points_txt, output_points_ply, output_ply_path):\n", - " \"\"\"\n", - " 根据相机位姿过滤点云,并保存结果。\n", - "\n", - " Args:\n", - " images_txt (str): 包含相机位姿的 images.txt 文件路径。\n", - " pcd_path (str): 点云文件路径。\n", - " output_points_txt (str): 保存过滤后点云的 points3D.txt 文件路径。\n", - " output_ply_path (str): 保存带颜色标记的 PLY 文件路径。\n", - " \"\"\"\n", - " def load_camera_centers(images_txt):\n", - " centers = []\n", - " with open(images_txt, 'r') as f:\n", - " for line in f:\n", - " if line.startswith('#') or len(line.strip()) == 0:\n", - " continue\n", - " elems = line.strip().split()\n", - " if len(elems) < 10:\n", - " continue\n", - " qw, qx, qy, qz = map(float, elems[1:5])\n", - " tx, ty, tz = map(float, elems[5:8])\n", - " rot = R.from_quat([qx, qy, qz, qw]).as_matrix()\n", - " center = -rot.T @ np.array([tx, ty, tz])\n", - " centers.append(center)\n", - " return np.array(centers)\n", - "\n", - " # 1. 加载相机中心\n", - " camera_centers = load_camera_centers(images_txt)\n", - "\n", - " # 2. 计算平均中心和最大偏移\n", - " center_mean = camera_centers.mean(axis=0)\n", - " offsets = np.abs(camera_centers - center_mean)\n", - " max_offset = offsets.max(axis=0)\n", - "\n", - " # 3. 加载点云\n", - " pcd = o3d.io.read_point_cloud(pcd_path)\n", - " points = np.asarray(pcd.points)\n", - " colors = np.asarray(pcd.colors) if pcd.has_colors() else np.ones_like(points)\n", - "\n", - " # 4. 构建包围盒并裁剪点云\n", - " min_bound = center_mean - max_offset - 15\n", - " max_bound = center_mean + max_offset + 15\n", - " aabb = o3d.geometry.AxisAlignedBoundingBox(min_bound, max_bound)\n", - " pcd_cropped = pcd.crop(aabb)\n", - "\n", - " # 5. 统计信息\n", - " total_points = len(points)\n", - " kept_points = np.asarray(pcd_cropped.points)\n", - " kept_colors = np.asarray(pcd_cropped.colors) if pcd_cropped.has_colors() else np.ones_like(kept_points)\n", - " removed_mask = np.ones(total_points, dtype=bool)\n", - " kept_indices = aabb.get_point_indices_within_bounding_box(pcd.points)\n", - " removed_mask[kept_indices] = False\n", - " removed_points = points[removed_mask]\n", - " removed_colors = colors[removed_mask]\n", - "\n", - " print(\"相机中心范围:\")\n", - " print(f\" X: [{min_bound[0]:.3f}, {max_bound[0]:.3f}]\")\n", - " print(f\" Y: [{min_bound[1]:.3f}, {max_bound[1]:.3f}]\")\n", - " print(f\" Z: [{min_bound[2]:.3f}, {max_bound[2]:.3f}]\")\n", - " print(f\"总点数:{total_points}\")\n", - " print(f\"保留点数:{len(kept_points)}\")\n", - " print(f\"删除点数:{len(removed_points)}\")\n", - "\n", - " # 6. 保存 points3D.txt (速度太慢,直接使用 ply )\n", - " with open(output_points_txt, \"w\") as f:\n", - " f.write(\"# 3D point list with one line of data per point:\\n\")\n", - " f.write(\"# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[]\\n\")\n", - " for idx, (pt, col) in enumerate(zip(kept_points, kept_colors), start=1):\n", - " r, g, b = (col * 255).astype(int)\n", - " f.write(f\"{idx} {pt[0]:.6f} {pt[1]:.6f} {pt[2]:.6f} {r} {g} {b} 0\\n\")\n", - " # 保存为 ply 文件\n", - " pcd_kept = o3d.geometry.PointCloud()\n", - " pcd_kept.points = o3d.utility.Vector3dVector(kept_points)\n", - " pcd_kept.colors = o3d.utility.Vector3dVector(kept_colors)\n", - " o3d.io.write_point_cloud(output_points_ply, pcd_kept)\n", - " print(\"保留的点云已保存为 {output_points_ply}\")\n", - " \n", - " # 7. 保存带颜色的 PLY 文件\n", - " all_points = np.vstack((kept_points, removed_points))\n", - " removed_colors_red = np.tile([1.0, 0.0, 0.0], (len(removed_points), 1))\n", - " all_colors = np.vstack((kept_colors, removed_colors_red))\n", - " pcd_all = o3d.geometry.PointCloud()\n", - " pcd_all.points = o3d.utility.Vector3dVector(all_points)\n", - " pcd_all.colors = o3d.utility.Vector3dVector(all_colors)\n", - " o3d.io.write_point_cloud(output_ply_path, pcd_all)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "80b924be", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "相机中心范围:\n", - " X: [-0.200, 28.270]\n", - " Y: [-16.082, 1.699]\n", - " Z: [-9.392, 0.714]\n", - "总点数:16404321\n", - "保留点数:10684495\n", - "删除点数:5719826\n", - "保留的点云已保存为 {output_points_ply}\n" - ] - } - ], - "source": [ - "import os\n", - "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n", - "\n", - "images_txt = os.path.join(folder_path, 'sparse/0/images.txt')\n", - "pcd_path = os.path.join(folder_path, 'pcd/all_raw_points.pcd')\n", - "output_points_ply = os.path.join(folder_path, 'pcd/points3D.ply')\n", - "vis_ply_path = os.path.join(folder_path, 'pcd/vis_filter.ply')\n", - "output_points_txt = os.path.join(folder_path, 'pcd/points3D.txt')\n", - "filter_point_cloud(images_txt,pcd_path,output_points_txt, output_points_ply,vis_ply_path)" - ] - }, - { - "cell_type": "markdown", - "id": "1a4206e8", - "metadata": {}, - "source": [ - "#### 调用 crop_points.py 文件" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d8c55ed", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "总点数:16404321,保留:15254842,删除:1149479\n" - ] - } - ], - "source": [ - "!python crop_points.py \\\n", - " --images_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/sparse/0/images.txt \\\n", - " --pcd_path /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/all_raw_points.pcd \\\n", - " --output_points_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D.txt \\\n", - " --output_ply /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/filtered_colored.ply \\\n", - " --margin 10.0" - ] - }, - { - "cell_type": "markdown", - "id": "651ed72f", - "metadata": {}, - "source": [ - "## 1.2 利用体素化降采样" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "56f5cba7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "raw points len : 16404321\n", - "downsample points len : 4671298\n", - "降采样后的点云已保存到 /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_0.05.ply,体素大小:0.05\n" - ] - } - ], - "source": [ - "import os\n", - "from tools.points_utils import voxel_downsample_and_save\n", - "\n", - "voxel_size = 0.05\n", - "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n", - "input_ply_path = os.path.join(folder_path,'pcd/points3D.ply')\n", - "output_ply_path = os.path.join(folder_path,f'pcd/points3D_{voxel_size}.ply')\n", - "input_ply_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n", - "output_ply_path = f'/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_{voxel_size}.ply'\n", - "\n", - "voxel_downsample_and_save(voxel_size, input_ply_path, output_ply_path) # ply downsample to ply\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1df326d", + "id": "1ad23d69", "metadata": {}, "outputs": [], "source": [] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "8544bb3e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Jupyter environment detected. Enabling Open3D WebVisualizer.\n", - "[Open3D INFO] WebRTC GUI backend enabled.\n", - "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n", - "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd to /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply\n" - ] - } - ], - "source": [ - "# pcd 2 ply\n", - "# 将原pcd点云,转换为ply点云\n", - "from tools.points_utils import pcd_2_ply\n", - "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd'\n", - "ply_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n", - "\n", - "pcd_2_ply(pcd_file,ply_file)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "2bf16bb1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Jupyter environment detected. Enabling Open3D WebVisualizer.\n", - "[Open3D INFO] WebRTC GUI backend enabled.\n", - "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n", - "正在写入COLMAP TXT文件: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n", - "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply to Colmap TXT in: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n" - ] - } - ], - "source": [ - "# pcd 2 txt\n", - "from tools.points_utils import pcd_2_colmap_txt\n", - "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply'\n", - "txt_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt'\n", - "\n", - "pcd_2_colmap_txt(pcd_file, txt_file, is_white=True)" - ] - }, - { - "cell_type": "markdown", - "id": "6fd0b306", - "metadata": {}, - "source": [ - "选择一个合适的 points3D.ply 文件复制到 sparse/0 下" - ] - }, - { - "cell_type": "markdown", - "id": "22e87bd3", - "metadata": {}, - "source": [ - "## 1.2 Resize for more images" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "91ce4cb8", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import cv2\n", - "from tqdm import tqdm\n", - "\n", - "def resize_images(input_dir, output_dir, extensions):\n", - " \"\"\"\n", - " 读取输入文件夹中的所有图片,调整为1/2大小后保存到输出文件夹\n", - " \n", - " Args:\n", - " input_dir: 输入图片文件夹路径\n", - " output_dir: 输出图片文件夹路径\n", - " extensions: 支持的图片扩展名列表\n", - " \"\"\"\n", - " # 确保输出目录存在\n", - " os.makedirs(output_dir, exist_ok=True)\n", - " \n", - " # 获取所有图片文件\n", - " image_files = []\n", - " for file in os.listdir(input_dir):\n", - " if any(file.lower().endswith(ext) for ext in extensions):\n", - " image_files.append(file)\n", - " \n", - " if not image_files:\n", - " print(f\"在 {input_dir} 中未找到支持的图片文件\")\n", - " return\n", - " \n", - " print(f\"找到 {len(image_files)} 张图片\")\n", - " \n", - " # 处理每张图片\n", - " count = 0\n", - " for file in tqdm(image_files, desc=\"处理中\"):\n", - " input_path = os.path.join(input_dir, file)\n", - " output_path = os.path.join(output_dir, file)\n", - " \n", - " try:\n", - " # 读取图片\n", - " img = cv2.imread(input_path)\n", - " if img is None:\n", - " print(f\"警告: 无法读取图片 {input_path},跳过\")\n", - " continue\n", - " \n", - " # 获取原始尺寸\n", - " height, width = img.shape[:2]\n", - " \n", - " # 计算新尺寸\n", - " new_width = width // 2\n", - " new_height = height // 2\n", - " \n", - " # 调整尺寸\n", - " resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)\n", - " \n", - " # 保存图片\n", - " cv2.imwrite(output_path, resized_img)\n", - " \n", - " # 输出尺寸信息\n", - " if count == 0:\n", - " print(f\"{file}: {width}x{height} -> {new_width}x{new_height}\")\n", - " count += 1\n", - " except Exception as e:\n", - " print(f\"错误: 处理图片 {input_path} 时出错: {str(e)}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e9b4da36", - "metadata": {}, - "outputs": [], - "source": [ - "input_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps'\n", - "output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps_2'\n", - "exts = ['jpg','jpeg','png']\n", - "\n", - "resize_images(input_path, output_path, exts)\n", - "print(\"所有图片处理完成!\") " - ] - }, - { - "cell_type": "markdown", - "id": "3830cb91", - "metadata": {}, - "source": [ - "# 2. LIVO2和Colmap的重建对比实验" - ] - }, - { - "cell_type": "markdown", - "id": "7dd6ff70", - "metadata": {}, - "source": [ - "colmap无法恢复相机位姿;所以这里我们使用livo2恢复位姿后,用colmap 进行三角测量获取关键点\n", - "https://www.cnblogs.com/Todd-Qi/p/15080968.html" - ] - }, - { - "cell_type": "markdown", - "id": "4ad9f127", - "metadata": {}, - "source": [ - "## 2.1 基于Livo2位姿进行稀疏重建" - ] - }, - { - "cell_type": "markdown", - "id": "40e8c7f1", - "metadata": {}, - "source": [ - "colmap无法恢复相机位姿;所以这里我们使用livo2恢复位姿后,用colmap 进行三角测量获取关键点\n", - "https://www.cnblogs.com/Todd-Qi/p/15080968.html" - ] - }, - { - "cell_type": "markdown", - "id": "497d715a", - "metadata": {}, - "source": [ - "1. 准备来自Livo2的位姿和相机数据 cameras.txt, images.txt\n", - " 将内参(camera intrinsics) 放入cameras.txt, 外参(camera extrinsics)放入 images.txt , points3D.txt 为空 \n", - " - images.txt 中全部 0.0 0.0 -1 删除; \n", - " - points3D.txt 内容清空;\n", - " - cameras.txt 中的内参进行修改 (对输入图像全部进行了 resize 操作,因此需要修改相机内参,将fx, fy, cx, cy 都除以2)" - ] - }, - { - "cell_type": "markdown", - "id": "497596e0", - "metadata": {}, - "source": [ - "2. 特征匹配与特征提取 \n", - "``` bash\n", - " colmap feature_extractor \\\n", - " --database_path /path/to/project/database.db \\ \n", - " --image_path /path/to/project/images\n", - "```\n", - "``` bash\n", - " colmap exhaustive_matcher \\\n", - " --database_path /path/to/project/database.db\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "590c5ba8", - "metadata": {}, - "source": [ - "3. 三角化重建 (保存的点云和其他文件均为bin格式)\n", - "``` bash\n", - " colmap point_triangulator \\\n", - " --database_path /path/to/project/database.db \\\n", - " --image_path /path/to/project/images \\\n", - " --input_path /path/to/sparse_model \\\n", - " --output_path /path/to/triangulated_model\n", - "\n", - "```\n", - "\n", - "查看txt结果\n", - "``` bash\n", - " colmap model_converter \\\n", - " --input_path 0 \\\n", - " --output_path 0_txt_from_livo2 \\\n", - " --output_type TXT\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "6c2b853f", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "id": "d359ac68", - "metadata": {}, - "source": [ - "4. 稠密重建(optional)" - ] - }, - { - "cell_type": "markdown", - "id": "77250b4b", - "metadata": {}, - "source": [ - "# 3.训练" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d44f71f7", - "metadata": {}, - "outputs": [], - "source": [ - "# baseline raw gs for training\n", - "!CUDA_VISIBLE_DEVICES=1 python train.py \\\n", - " -s data/tree_01_livo2 \\\n", - " -m data/tree_01_livo2/outputs/3dgs_baseline\n", - " \n", - "# render\n", - "!CUDA_VISIBLE_DEVICES=1 python render.py \\\n", - " -s data/tree_01_colmap \\\n", - " -m data/tree_01_colmap/outputs/3dgs_baseline " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0156e92d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[2, 4, 5], [1, 5, 4]]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "iss = [[1,5,4],[2,4,5]]\n", - "iss.sort(key=lambda x: [x[1],x[0]], reverse=True)\n", - "iss" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf857aee", - "metadata": {}, - "outputs": [], - "source": [ - "class Solution(object):\n", - " def merge(self, intervals):\n", - " \"\"\"\n", - " :type intervals: List[List[int]]\n", - " :rtype: List[List[int]]\n", - " \"\"\"\n", - " if not intervals:\n", - " return []\n", - " # 先按区间的起始位置排序\n", - " intervals.sort(key=lambda x: x[0])\n", - " merged = [intervals[0]]\n", - " for i in range(1, len(intervals)):\n", - " # 如果当前区间与上一个区间重叠,则合并\n", - " if intervals[i][0] <= merged[-1][1]:\n", - " merged[-1][1] = max(merged[-1][1], intervals[i][1])\n", - " else:\n", - " merged.append(intervals[i])\n", - " return merged" - ] } ], "metadata": { @@ -583,15 +24,7 @@ "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", "version": "3.8.18" } }, diff --git a/xy_utils/tools/images_utils.py b/xy_utils/tools/images_utils.py new file mode 100644 index 0000000..36c360f --- /dev/null +++ b/xy_utils/tools/images_utils.py @@ -0,0 +1,183 @@ +import os +from PIL import Image +import json +import re +def get_first_image_info(folder_path): + # 检查文件夹是否存在 + if not os.path.exists(folder_path): + print(f"错误:文件夹 '{folder_path}' 不存在") + return + + # 获取文件夹中所有文件 + files = os.listdir(folder_path) + + # 查找第一个图片文件 + image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif'] + image_path = None + + for file in files: + file_extension = os.path.splitext(file)[1].lower() + if file_extension in image_extensions: + image_path = os.path.join(folder_path, file) + break + + # 如果没有找到图片文件 + if image_path is None: + print(f"错误:在文件夹 '{folder_path}' 中未找到图片文件") + return + + try: + # 打开图片并获取信息 + with Image.open(image_path) as img: + width, height = img.size + channels = len(img.getbands()) + + print(f"找到图片:{os.path.basename(image_path)}") + print(f"分辨率:{width} x {height} 像素") + print(f"通道数:{channels}") + if channels == 1: + print("通道说明:单通道(可能是灰度图)") + elif channels == 3: + print("通道说明:三通道(RGB)") + elif channels == 4: + print("通道说明:四通道(RGBA,包含Alpha通道)") + else: + print(f"通道说明:非常规通道数({channels}通道)") + + except Exception as e: + print(f"错误:无法处理图片 '{image_path}' - {str(e)}") + +def generate_depth_params_json(png_folder, output_path): + """ + 遍历指定文件夹中的所有 PNG 图像,提取文件名并生成 depth_params.json 文件。 + + 参数: + - png_folder: PNG 图像所在的文件夹路径 + - output_path: 生成的 JSON 文件的保存路径 + """ + # 检查 PNG 文件夹是否存在 + if not os.path.exists(png_folder): + print(f"错误:PNG 图像文件夹 '{png_folder}' 不存在") + return + + # 获取所有 PNG 文件的文件名(不包含扩展名) + png_files = [f for f in os.listdir(png_folder) + if os.path.isfile(os.path.join(png_folder, f)) + and f.lower().endswith('.png')] + + # 提取文件名(不含扩展名) + base_names = [os.path.splitext(f)[0] for f in png_files] + + # 如果没有找到 PNG 文件 + if not base_names: + print(f"错误:在文件夹 '{png_folder}' 中未找到 PNG 文件") + return + + # 构建 JSON 数据 + json_data = {} + for name in base_names: + json_data[name] = { + "scale": 0.0, + "offset": 0.0 + } + + # 创建输出文件所在的目录(如果不存在) + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # 写入 JSON 文件 + try: + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(json_data, f, indent=2) + print(f"成功生成 JSON 文件:{output_path}") + print(f"共处理 {len(base_names)} 个 PNG 文件") + except Exception as e: + print(f"错误:无法写入 JSON 文件 '{output_path}' - {str(e)}") + +def resize_image(input_path, output_path=None,scale=2, quality=100): + """ + input_path = '/home/qinllgroup/hongxiangyu/git_project/livo2-data-utils/10-Mesh-acc/data/delete/stack_acc_10_2/00142-final.png' + output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00142.png' + resize_image(input_path, output_path) + + 将图片分辨率缩小1/2 + + 参数: + - input_path: 输入图片路径 + - output_path: 输出图片路径,默认为在原文件名后加 '_resized' + - quality: 输出图片质量,范围0-100,默认为95 + """ + try: + # 打开图片 + with Image.open(input_path) as img: + # 获取原始尺寸 + width, height = img.size + + # 计算新尺寸(缩小1/2) + new_width = width // scale + new_height = height // scale + + # 使用高质量重采样方法 + resized_img = img.resize((new_width, new_height), Image.LANCZOS) + + # 如果没有指定输出路径,自动生成 + if output_path is None: + base, ext = os.path.splitext(input_path) + output_path = f"{base}_resized{ext}" + + # 保存图片,保持原始格式 + resized_img.save(output_path, quality=quality) + + print(f"成功将图片从 {width}x{height} 缩小到 {new_width}x{new_height}") + print(f"保存路径: {output_path}") + + return output_path + + except Exception as e: + print(f"错误: 无法处理图片 {input_path} - {str(e)}") + return None + +def batch_rename_files(folder_path): + """ + 批量重命名指定文件夹中的所有文件,移除文件名末尾的 -final + + 参数: + - folder_path: 要处理的文件夹路径 + """ + # 检查文件夹是否存在 + if not os.path.exists(folder_path): + print(f"错误:文件夹 '{folder_path}' 不存在") + return + + # 获取文件夹中的所有文件 + files = os.listdir(folder_path) + + # 用于匹配 -final 的正则表达式模式 + pattern = re.compile(r'^(.*?)-final(\.[^.]+)?$') + + renamed_count = 0 + + # 遍历所有文件并进行重命名 + for filename in files: + file_path = os.path.join(folder_path, filename) + + # 只处理文件,不处理文件夹 + if os.path.isfile(file_path): + # 使用正则表达式匹配文件名 + match = pattern.match(filename) + + if match: + # 获取新文件名 + new_name = match.group(1) + (match.group(2) or '') + new_path = os.path.join(folder_path, new_name) + + try: + # 执行重命名 + os.rename(file_path, new_path) + print(f"已重命名: {filename} -> {new_name}") + renamed_count += 1 + except Exception as e: + print(f"错误:无法重命名文件 '{filename}' - {str(e)}") + + print(f"重命名完成!共处理 {renamed_count} 个文件") \ No newline at end of file