add lidar depth with mesh

2025-06-26 18:18:11 +00:00 · 2025-05-26 14:54:16 +08:00 · 2025-05-26 14:54:16 +08:00 · b787991bb2
commit b787991bb2
parent 81590b1f57
8 changed files with 950 additions and 582 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -4,10 +4,10 @@
 [submodule "submodules/diff-gaussian-rasterization"]
 	path = submodules/diff-gaussian-rasterization
 	url = https://github.com/graphdeco-inria/diff-gaussian-rasterization.git
-	branch = dr_aa
+	branch = 3dgs_accel 	# branch = dr_aa
 [submodule "SIBR_viewers"]
 	path = SIBR_viewers
 	url = https://gitlab.inria.fr/sibr/sibr_core.git
 [submodule "submodules/fused-ssim"]
 	path = submodules/fused-ssim
-	url = https://github.com/rahul-goel/fused-ssim.git
+	url = https://github.com/rahul-goel/fused-ssim.git
--- a/arguments/init.py
+++ b/arguments/init.py
@ -56,10 +56,13 @@ class ModelParams(ParamGroup):
        self.train_test_exp = False
        self.data_device = "cuda"
        self.eval = False
+        # xy-new
        self.skybox_locked = False
        self.skybox_num = 0
        self.scaffold_file = ""
        self.bounds_file = ""
+        self.is_lidar_depth = False
+        self.lidar_depth_max = 30.0
        super().__init__(parser, "Loading Parameters", sentinel) # add parameters into parser

    def extract(self, args):
--- a/scene/cameras.py
+++ b/scene/cameras.py
@ -20,7 +20,8 @@ class Camera(nn.Module):
    def __init__(self, resolution, colmap_id, R, T, FoVx, FoVy, depth_params, image, invdepthmap,
                 image_name, uid,
                 trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda",
-                 train_test_exp = False, is_test_dataset = False, is_test_view = False
+                 train_test_exp = False, is_test_dataset = False, is_test_view = False,
+                 is_lidar_depth  = False, max_depth = 30.0
                 ):
        super(Camera, self).__init__()

@ -39,13 +40,13 @@ class Camera(nn.Module):
            print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" )
            self.data_device = torch.device("cuda")

-        resized_image_rgb = PILtoTorch(image, resolution)
+        resized_image_rgb = PILtoTorch(image, resolution)# [3,h,w]
        gt_image = resized_image_rgb[:3, ...]
        self.alpha_mask = None
        if resized_image_rgb.shape[0] == 4:
            self.alpha_mask = resized_image_rgb[3:4, ...].to(self.data_device)
        else: 
-            self.alpha_mask = torch.ones_like(resized_image_rgb[0:1, ...].to(self.data_device))
+            self.alpha_mask = torch.ones_like(resized_image_rgb[0:1, ...].to(self.data_device))# [1,h,w] all 1

        if train_test_exp and is_test_view:
            if is_test_dataset:
@ -61,8 +62,19 @@ class Camera(nn.Module):
        self.depth_reliable = False
        if invdepthmap is not None:
            self.depth_mask = torch.ones_like(self.alpha_mask)
-            self.invdepthmap = cv2.resize(invdepthmap, resolution)
-            self.invdepthmap[self.invdepthmap < 0] = 0
+            
+            # xy-new lidar depth to invdepth
+            if is_lidar_depth:
+                depth_raw = invdepthmap
+                depth_lin = depth_raw / 255.0 * max_depth
+                mask_np = depth_lin > 0
+                invdepth_lidar = np.zeros_like(depth_lin)
+                invdepth_lidar[mask_np] = 1.0 / (depth_lin[mask_np] + 1e-6)
+                
+                self.depth_mask = torch.from_numpy(mask_np).to(self.data_device)
+
+            self.invdepthmap = cv2.resize(invdepth_lidar, resolution)
+            self.invdepthmap[self.invdepthmap < 0] = 0            
            self.depth_reliable = True

            if depth_params is not None:
@ -73,9 +85,9 @@ class Camera(nn.Module):
                if depth_params["scale"] > 0:
                    self.invdepthmap = self.invdepthmap * depth_params["scale"] + depth_params["offset"]

-            if self.invdepthmap.ndim != 2:
-                self.invdepthmap = self.invdepthmap[..., 0]
-            self.invdepthmap = torch.from_numpy(self.invdepthmap[None]).to(self.data_device)
+            if self.invdepthmap.ndim != 2: # [h,w,3]
+                self.invdepthmap = self.invdepthmap[..., 0] # [h,w]
+            self.invdepthmap = torch.from_numpy(self.invdepthmap[None]).to(self.data_device) # [1,h,w]

        self.zfar = 100.0
        self.znear = 0.01
--- a/train-sky.py
+++ b/train-sky.py
@ -198,7 +198,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
                    relevant = (gaussians._opacity.grad != 0).squeeze() # points - opacity gradients != 0
                    gaussians.optimizer.step(relevant, gaussians.get_xyz.shape[0]) # no densification so need step relevant points
                    gaussians.optimizer.zero_grad(set_to_none = True)
-                                
+
            if (iteration in checkpoint_iterations):
                print("\n[ITER {}] Saving Checkpoint".format(iteration))
                torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")
--- a/utils/camera_utils.py
+++ b/utils/camera_utils.py
@ -24,6 +24,9 @@ def loadCam(args, id, cam_info, resolution_scale, is_nerf_synthetic, is_test_dat
        try:
            if is_nerf_synthetic:
                invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / 512
+            elif args.is_lidar_depth:
+                raw_uint8 = cv2.imread(cam_info.depth_path, 0).astype(np.float32)
+                invdepthmap = raw_uint8 # to filter mask in Camera class
            else:
                invdepthmap = cv2.imread(cam_info.depth_path, -1).astype(np.float32) / float(2**16)

@ -64,7 +67,8 @@ def loadCam(args, id, cam_info, resolution_scale, is_nerf_synthetic, is_test_dat
                  FoVx=cam_info.FovX, FoVy=cam_info.FovY, depth_params=cam_info.depth_params,
                  image=image, invdepthmap=invdepthmap,
                  image_name=cam_info.image_name, uid=id, data_device=args.data_device,
-                  train_test_exp=args.train_test_exp, is_test_dataset=is_test_dataset, is_test_view=cam_info.is_test)
+                  train_test_exp=args.train_test_exp, is_test_dataset=is_test_dataset, is_test_view=cam_info.is_test,
+                  is_lidar_depth=args.is_lidar_depth,max_depth=args.lidar_depth_max)

 def cameraList_from_camInfos(cam_infos, resolution_scale, args, is_nerf_synthetic, is_test_dataset):
    camera_list = []
--- a/xy_utils/experiment.ipynb
+++ b/xy_utils/experiment.ipynb
@ -0,0 +1,733 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b66179c8",
+   "metadata": {},
+   "source": [
+    "# 1.预处理数据"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "533db061",
+   "metadata": {},
+   "source": [
+    "## 1.1 对点云数据进行处理 - 滤除环视范围外过远的点"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "864843b2",
+   "metadata": {},
+   "source": [
+    "1. 解析 images.txt，计算所有相机在世界坐标系下的光心（位姿）。  \n",
+    "2.  使用 Open3D 加载 .pcd 点云，并基于与最近相机光心的距离进行滤除。  \n",
+    "3.  输出相机光心的范围（坐标轴最小/最大值）以及被滤除的点云数量。  \n",
+    "4.  将保留的点云按 COLMAP points3D.txt 格式保存，并额外生成一个 PLY 文件，其中红色标记被删除的点，原色保留未删除点。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5dc3ac56",
+   "metadata": {},
+   "source": [
+    "#### ipynb内部函数 -- 未进行地面与boundingbox对齐"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc2e8c18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import open3d as o3d\n",
+    "from scipy.spatial.transform import Rotation as R\n",
+    "\n",
+    "def filter_point_cloud(images_txt, pcd_path, output_points_txt, output_points_ply, output_ply_path):\n",
+    "    \"\"\"\n",
+    "    根据相机位姿过滤点云，并保存结果。\n",
+    "\n",
+    "    Args:\n",
+    "        images_txt (str): 包含相机位姿的 images.txt 文件路径。\n",
+    "        pcd_path (str): 点云文件路径。\n",
+    "        output_points_txt (str): 保存过滤后点云的 points3D.txt 文件路径。\n",
+    "        output_ply_path (str): 保存带颜色标记的 PLY 文件路径。\n",
+    "    \"\"\"\n",
+    "    def load_camera_centers(images_txt):\n",
+    "        centers = []\n",
+    "        with open(images_txt, 'r') as f:\n",
+    "            for line in f:\n",
+    "                if line.startswith('#') or len(line.strip()) == 0:\n",
+    "                    continue\n",
+    "                elems = line.strip().split()\n",
+    "                if len(elems) < 10:\n",
+    "                    continue\n",
+    "                qw, qx, qy, qz = map(float, elems[1:5])\n",
+    "                tx, ty, tz = map(float, elems[5:8])\n",
+    "                rot = R.from_quat([qx, qy, qz, qw]).as_matrix()\n",
+    "                center = -rot.T @ np.array([tx, ty, tz])\n",
+    "                centers.append(center)\n",
+    "        return np.array(centers)\n",
+    "\n",
+    "    # 1. 加载相机中心\n",
+    "    camera_centers = load_camera_centers(images_txt)\n",
+    "\n",
+    "    # 2. 计算平均中心和最大偏移\n",
+    "    center_mean = camera_centers.mean(axis=0)\n",
+    "    offsets = np.abs(camera_centers - center_mean)\n",
+    "    max_offset = offsets.max(axis=0)\n",
+    "\n",
+    "    # 3. 加载点云\n",
+    "    pcd = o3d.io.read_point_cloud(pcd_path)\n",
+    "    points = np.asarray(pcd.points)\n",
+    "    colors = np.asarray(pcd.colors) if pcd.has_colors() else np.ones_like(points)\n",
+    "\n",
+    "    # 4. 构建包围盒并裁剪点云\n",
+    "    min_bound = center_mean - max_offset - 15\n",
+    "    max_bound = center_mean + max_offset + 15\n",
+    "    aabb = o3d.geometry.AxisAlignedBoundingBox(min_bound, max_bound)\n",
+    "    pcd_cropped = pcd.crop(aabb)\n",
+    "\n",
+    "    # 5. 统计信息\n",
+    "    total_points = len(points)\n",
+    "    kept_points = np.asarray(pcd_cropped.points)\n",
+    "    kept_colors = np.asarray(pcd_cropped.colors) if pcd_cropped.has_colors() else np.ones_like(kept_points)\n",
+    "    removed_mask = np.ones(total_points, dtype=bool)\n",
+    "    kept_indices = aabb.get_point_indices_within_bounding_box(pcd.points)\n",
+    "    removed_mask[kept_indices] = False\n",
+    "    removed_points = points[removed_mask]\n",
+    "    removed_colors = colors[removed_mask]\n",
+    "\n",
+    "    print(\"相机中心范围：\")\n",
+    "    print(f\"  X: [{min_bound[0]:.3f}, {max_bound[0]:.3f}]\")\n",
+    "    print(f\"  Y: [{min_bound[1]:.3f}, {max_bound[1]:.3f}]\")\n",
+    "    print(f\"  Z: [{min_bound[2]:.3f}, {max_bound[2]:.3f}]\")\n",
+    "    print(f\"总点数：{total_points}\")\n",
+    "    print(f\"保留点数：{len(kept_points)}\")\n",
+    "    print(f\"删除点数：{len(removed_points)}\")\n",
+    "\n",
+    "    # 6. 保存 points3D.txt (速度太慢，直接使用 ply )\n",
+    "    with open(output_points_txt, \"w\") as f:\n",
+    "        f.write(\"# 3D point list with one line of data per point:\\n\")\n",
+    "        f.write(\"# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[]\\n\")\n",
+    "        for idx, (pt, col) in enumerate(zip(kept_points, kept_colors), start=1):\n",
+    "            r, g, b = (col * 255).astype(int)\n",
+    "            f.write(f\"{idx} {pt[0]:.6f} {pt[1]:.6f} {pt[2]:.6f} {r} {g} {b} 0\\n\")\n",
+    "    # 保存为 ply 文件\n",
+    "    pcd_kept = o3d.geometry.PointCloud()\n",
+    "    pcd_kept.points = o3d.utility.Vector3dVector(kept_points)\n",
+    "    pcd_kept.colors = o3d.utility.Vector3dVector(kept_colors)\n",
+    "    o3d.io.write_point_cloud(output_points_ply, pcd_kept)\n",
+    "    print(\"保留的点云已保存为 {output_points_ply}\")\n",
+    "    \n",
+    "    # 7. 保存带颜色的 PLY 文件\n",
+    "    all_points = np.vstack((kept_points, removed_points))\n",
+    "    removed_colors_red = np.tile([1.0, 0.0, 0.0], (len(removed_points), 1))\n",
+    "    all_colors = np.vstack((kept_colors, removed_colors_red))\n",
+    "    pcd_all = o3d.geometry.PointCloud()\n",
+    "    pcd_all.points = o3d.utility.Vector3dVector(all_points)\n",
+    "    pcd_all.colors = o3d.utility.Vector3dVector(all_colors)\n",
+    "    o3d.io.write_point_cloud(output_ply_path, pcd_all)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "80b924be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "相机中心范围：\n",
+      "  X: [-0.200, 28.270]\n",
+      "  Y: [-16.082, 1.699]\n",
+      "  Z: [-9.392, 0.714]\n",
+      "总点数：16404321\n",
+      "保留点数：10684495\n",
+      "删除点数：5719826\n",
+      "保留的点云已保存为 {output_points_ply}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n",
+    "\n",
+    "images_txt = os.path.join(folder_path, 'sparse/0/images.txt')\n",
+    "pcd_path = os.path.join(folder_path, 'pcd/all_raw_points.pcd')\n",
+    "output_points_ply = os.path.join(folder_path, 'pcd/points3D.ply')\n",
+    "vis_ply_path = os.path.join(folder_path, 'pcd/vis_filter.ply')\n",
+    "output_points_txt = os.path.join(folder_path, 'pcd/points3D.txt')\n",
+    "filter_point_cloud(images_txt,pcd_path,output_points_txt, output_points_ply,vis_ply_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a4206e8",
+   "metadata": {},
+   "source": [
+    "#### 调用 crop_points.py 文件"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d8c55ed",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "总点数：16404321，保留：15254842，删除：1149479\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python crop_points.py \\\n",
+    "    --images_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/sparse/0/images.txt \\\n",
+    "    --pcd_path /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/all_raw_points.pcd \\\n",
+    "    --output_points_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D.txt \\\n",
+    "    --output_ply /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/filtered_colored.ply \\\n",
+    "    --margin 10.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "651ed72f",
+   "metadata": {},
+   "source": [
+    "## 1.2 利用体素化降采样"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "56f5cba7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "raw points len : 16404321\n",
+      "downsample points len : 4671298\n",
+      "降采样后的点云已保存到 /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_0.05.ply，体素大小：0.05\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from tools.points_utils import voxel_downsample_and_save\n",
+    "\n",
+    "voxel_size = 0.05\n",
+    "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n",
+    "input_ply_path = os.path.join(folder_path,'pcd/points3D.ply')\n",
+    "output_ply_path = os.path.join(folder_path,f'pcd/points3D_{voxel_size}.ply')\n",
+    "input_ply_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n",
+    "output_ply_path = f'/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_{voxel_size}.ply'\n",
+    "\n",
+    "voxel_downsample_and_save(voxel_size, input_ply_path, output_ply_path) # ply downsample to ply\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1df326d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8544bb3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jupyter environment detected. Enabling Open3D WebVisualizer.\n",
+      "[Open3D INFO] WebRTC GUI backend enabled.\n",
+      "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n",
+      "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd to /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply\n"
+     ]
+    }
+   ],
+   "source": [
+    "# pcd 2 ply\n",
+    "# 将原pcd点云，转换为ply点云\n",
+    "from tools.points_utils import pcd_2_ply\n",
+    "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd'\n",
+    "ply_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n",
+    "\n",
+    "pcd_2_ply(pcd_file,ply_file)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2bf16bb1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Jupyter environment detected. Enabling Open3D WebVisualizer.\n",
+      "[Open3D INFO] WebRTC GUI backend enabled.\n",
+      "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n",
+      "正在写入COLMAP TXT文件: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n",
+      "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply to Colmap TXT in: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "# pcd 2 txt\n",
+    "from tools.points_utils import pcd_2_colmap_txt\n",
+    "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply'\n",
+    "txt_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt'\n",
+    "\n",
+    "pcd_2_colmap_txt(pcd_file, txt_file, is_white=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6fd0b306",
+   "metadata": {},
+   "source": [
+    "选择一个合适的 points3D.ply 文件复制到 sparse/0 下"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22e87bd3",
+   "metadata": {},
+   "source": [
+    "## 1.2 Resize for more images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "91ce4cb8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import cv2\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "def resize_images(input_dir, output_dir, extensions):\n",
+    "    \"\"\"\n",
+    "    读取输入文件夹中的所有图片，调整为1/2大小后保存到输出文件夹\n",
+    "    \n",
+    "    Args:\n",
+    "        input_dir: 输入图片文件夹路径\n",
+    "        output_dir: 输出图片文件夹路径\n",
+    "        extensions: 支持的图片扩展名列表\n",
+    "    \"\"\"\n",
+    "    # 确保输出目录存在\n",
+    "    os.makedirs(output_dir, exist_ok=True)\n",
+    "    \n",
+    "    # 获取所有图片文件\n",
+    "    image_files = []\n",
+    "    for file in os.listdir(input_dir):\n",
+    "        if any(file.lower().endswith(ext) for ext in extensions):\n",
+    "            image_files.append(file)\n",
+    "    \n",
+    "    if not image_files:\n",
+    "        print(f\"在 {input_dir} 中未找到支持的图片文件\")\n",
+    "        return\n",
+    "    \n",
+    "    print(f\"找到 {len(image_files)} 张图片\")\n",
+    "    \n",
+    "    # 处理每张图片\n",
+    "    count = 0\n",
+    "    for file in tqdm(image_files, desc=\"处理中\"):\n",
+    "        input_path = os.path.join(input_dir, file)\n",
+    "        output_path = os.path.join(output_dir, file)\n",
+    "        \n",
+    "        try:\n",
+    "            # 读取图片\n",
+    "            img = cv2.imread(input_path)\n",
+    "            if img is None:\n",
+    "                print(f\"警告: 无法读取图片 {input_path}，跳过\")\n",
+    "                continue\n",
+    "                \n",
+    "            # 获取原始尺寸\n",
+    "            height, width = img.shape[:2]\n",
+    "            \n",
+    "            # 计算新尺寸\n",
+    "            new_width = width // 2\n",
+    "            new_height = height // 2\n",
+    "            \n",
+    "            # 调整尺寸\n",
+    "            resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)\n",
+    "            \n",
+    "            # 保存图片\n",
+    "            cv2.imwrite(output_path, resized_img)\n",
+    "            \n",
+    "            # 输出尺寸信息\n",
+    "            if count == 0:\n",
+    "                print(f\"{file}: {width}x{height} -> {new_width}x{new_height}\")\n",
+    "            count += 1\n",
+    "        except Exception as e:\n",
+    "            print(f\"错误: 处理图片 {input_path} 时出错: {str(e)}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9b4da36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps'\n",
+    "output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps_2'\n",
+    "exts = ['jpg','jpeg','png']\n",
+    "\n",
+    "resize_images(input_path, output_path, exts)\n",
+    "print(\"所有图片处理完成！\")    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3830cb91",
+   "metadata": {},
+   "source": [
+    "# 2. LIVO2和Colmap的重建对比实验"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7dd6ff70",
+   "metadata": {},
+   "source": [
+    "colmap无法恢复相机位姿；所以这里我们使用livo2恢复位姿后，用colmap 进行三角测量获取关键点\n",
+    "https://www.cnblogs.com/Todd-Qi/p/15080968.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4ad9f127",
+   "metadata": {},
+   "source": [
+    "## 2.1 基于Livo2位姿进行稀疏重建"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40e8c7f1",
+   "metadata": {},
+   "source": [
+    "colmap无法恢复相机位姿；所以这里我们使用livo2恢复位姿后，用colmap 进行三角测量获取关键点\n",
+    "https://www.cnblogs.com/Todd-Qi/p/15080968.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "497d715a",
+   "metadata": {},
+   "source": [
+    "1. 准备来自Livo2的位姿和相机数据 cameras.txt, images.txt\n",
+    "    将内参(camera intrinsics) 放入cameras.txt， 外参(camera extrinsics)放入 images.txt , points3D.txt 为空  \n",
+    "    - images.txt 中全部 0.0 0.0 -1 删除;  \n",
+    "    - points3D.txt 内容清空;\n",
+    "    - cameras.txt 中的内参进行修改 (对输入图像全部进行了 resize 操作，因此需要修改相机内参，将fx, fy, cx, cy 都除以2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "497596e0",
+   "metadata": {},
+   "source": [
+    "2. 特征匹配与特征提取  \n",
+    "``` bash\n",
+    "        colmap feature_extractor \\\n",
+    "            --database_path /path/to/project/database.db \\  \n",
+    "            --image_path    /path/to/project/images\n",
+    "```\n",
+    "``` bash\n",
+    "        colmap exhaustive_matcher \\\n",
+    "            --database_path /path/to/project/database.db\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "590c5ba8",
+   "metadata": {},
+   "source": [
+    "3. 三角化重建 (保存的点云和其他文件均为bin格式)\n",
+    "``` bash\n",
+    "        colmap point_triangulator \\\n",
+    "            --database_path /path/to/project/database.db \\\n",
+    "            --image_path    /path/to/project/images \\\n",
+    "            --input_path    /path/to/sparse_model \\\n",
+    "            --output_path   /path/to/triangulated_model\n",
+    "\n",
+    "```\n",
+    "\n",
+    "查看txt结果\n",
+    "``` bash\n",
+    "        colmap model_converter \\\n",
+    "            --input_path 0 \\\n",
+    "            --output_path 0_txt_from_livo2 \\\n",
+    "            --output_type TXT\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6c2b853f",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d359ac68",
+   "metadata": {},
+   "source": [
+    "4. 稠密重建(optional)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77250b4b",
+   "metadata": {},
+   "source": [
+    "# 3.训练"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d44f71f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# baseline raw gs for training\n",
+    "!CUDA_VISIBLE_DEVICES=1 python train.py \\\n",
+    "    -s data/tree_01_livo2 \\\n",
+    "    -m data/tree_01_livo2/outputs/3dgs_baseline\n",
+    "    \n",
+    "# render\n",
+    "!CUDA_VISIBLE_DEVICES=1 python render.py \\\n",
+    "    -s data/tree_01_colmap \\\n",
+    "    -m data/tree_01_colmap/outputs/3dgs_baseline    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a363d15d",
+   "metadata": {},
+   "source": [
+    "# 4.深度实验"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1de51e3b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--- depth_maps_2文件夹 ---\n",
+      "找到图片：00144.png\n",
+      "分辨率：1024 x 768 像素\n",
+      "通道数：3\n",
+      "通道说明：三通道（RGB）\n",
+      "--- images我文件夹 ---\n",
+      "找到图片：00144.png\n",
+      "分辨率：1024 x 768 像素\n",
+      "通道数：3\n",
+      "通道说明：三通道（RGB）\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 一、确认图像与深度图像尺寸是否一致\n",
+    "from tools.images_utils import get_first_image_info\n",
+    "\n",
+    "folder_path = \"/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2\"\n",
+    "print(\"--- depth_maps_2文件夹 ---\")\n",
+    "get_first_image_info(folder_path)\n",
+    "folder_path = \"/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/images\"\n",
+    "print(\"--- images我文件夹 ---\")\n",
+    "get_first_image_info(folder_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "6ee3b672",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "成功生成 JSON 文件：/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/sparse/0/depth_params.json\n",
+      "共处理 539 个 PNG 文件\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 2.在 sparse/0 下生成一个 depth_params.json\n",
+    "\n",
+    "# 示例数据:\n",
+    "# {\n",
+    "# \"DSC_3893\": {\n",
+    "#     \"scale\": 50.55648822378238,\n",
+    "#     \"offset\": 0.01793079636288747\n",
+    "# },\n",
+    "# \"DSC_3902\": {\n",
+    "#     \"scale\": 56.80526691363168,\n",
+    "#     \"offset\": 0.027163276135205097\n",
+    "# }\n",
+    "# }\n",
+    "\n",
+    "from tools.images_utils import generate_depth_params_json\n",
+    "import os\n",
+    "\n",
+    "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/'\n",
+    "PNG_FOLDER = os.path.join(folder_path, 'depth_maps')\n",
+    "OUTPUT_PATH = os.path.join(folder_path,\"sparse/0/depth_params.json\")\n",
+    "# 生成 JSON 文件\n",
+    "generate_depth_params_json(PNG_FOLDER, OUTPUT_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b337a10c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 重命名 删去 -final的后缀\n",
+    "from tools.images_utils import batch_rename_files\n",
+    "\n",
+    "\n",
+    "FOLDER_PATH = \"/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3_depth/depth_maps\"  # 请替换为实际的文件夹路径\n",
+    "batch_rename_files(FOLDER_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5f60df4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 如果有深度mesh缺失\n",
+    "# ~/git_project/livo2-data-utils/10-Mesh-acc$ \n",
+    "#     python demo-acc.py -i \\\n",
+    "#         /home/qinllgroup/hongxiangyu/git_project/livo2-data-utils/10-Mesh-acc/data/delete \\\n",
+    "#         -o /home/qinllgroup/hongxiangyu/git_project/livo2-data-utils/10-Mesh-acc/data/delete \\\n",
+    "#         --max_edge 15\n",
+    "\n",
+    "from tools.images_utils import resize_image\n",
+    "\n",
+    "input_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps/00466-final.png'\n",
+    "output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00466.png'\n",
+    "resize_image(input_path, output_path, scale=2, quality=100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "024d1b19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 3. 训练深度mask\n",
+    "!CUDA_VISIBLE_DEVICES=1 python train.py \\\n",
+    "    -s data/tree_01_livo2 \\\n",
+    "    -d data/tree_01_livo2/depth_map \\\n",
+    "    -m data/tree_01_livo2/outputs/3dgs_baseline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "254f6278",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "成功将图片从 2048x1536 缩小到 1024x768\n",
+      "保存路径: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00466.png\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00466.png'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66d0f159",
+   "metadata": {},
+   "source": [
+    "## 4.1 深度读取"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7406df01",
+   "metadata": {},
+   "source": [
+    "# 5.可视化TensorBoard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b0acade",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/xy_utils/learning.ipynb
+++ b/xy_utils/learning.ipynb
@ -2,578 +2,19 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "id": "b66179c8",
+   "id": "bf253aab",
   "metadata": {},
   "source": [
-    "# 1.预处理数据"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "533db061",
-   "metadata": {},
-   "source": [
-    "## 1.1 对点云数据进行处理 - 滤除环视范围外过远的点"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "864843b2",
-   "metadata": {},
-   "source": [
-    "1. 解析 images.txt，计算所有相机在世界坐标系下的光心（位姿）。  \n",
-    "2.  使用 Open3D 加载 .pcd 点云，并基于与最近相机光心的距离进行滤除。  \n",
-    "3.  输出相机光心的范围（坐标轴最小/最大值）以及被滤除的点云数量。  \n",
-    "4.  将保留的点云按 COLMAP points3D.txt 格式保存，并额外生成一个 PLY 文件，其中红色标记被删除的点，原色保留未删除点。"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5dc3ac56",
-   "metadata": {},
-   "source": [
-    "#### ipynb内部函数 -- 未进行地面与boundingbox对齐"
+    "# 基于CUDA的KNN计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "cc2e8c18",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import open3d as o3d\n",
-    "from scipy.spatial.transform import Rotation as R\n",
-    "\n",
-    "def filter_point_cloud(images_txt, pcd_path, output_points_txt, output_points_ply, output_ply_path):\n",
-    "    \"\"\"\n",
-    "    根据相机位姿过滤点云，并保存结果。\n",
-    "\n",
-    "    Args:\n",
-    "        images_txt (str): 包含相机位姿的 images.txt 文件路径。\n",
-    "        pcd_path (str): 点云文件路径。\n",
-    "        output_points_txt (str): 保存过滤后点云的 points3D.txt 文件路径。\n",
-    "        output_ply_path (str): 保存带颜色标记的 PLY 文件路径。\n",
-    "    \"\"\"\n",
-    "    def load_camera_centers(images_txt):\n",
-    "        centers = []\n",
-    "        with open(images_txt, 'r') as f:\n",
-    "            for line in f:\n",
-    "                if line.startswith('#') or len(line.strip()) == 0:\n",
-    "                    continue\n",
-    "                elems = line.strip().split()\n",
-    "                if len(elems) < 10:\n",
-    "                    continue\n",
-    "                qw, qx, qy, qz = map(float, elems[1:5])\n",
-    "                tx, ty, tz = map(float, elems[5:8])\n",
-    "                rot = R.from_quat([qx, qy, qz, qw]).as_matrix()\n",
-    "                center = -rot.T @ np.array([tx, ty, tz])\n",
-    "                centers.append(center)\n",
-    "        return np.array(centers)\n",
-    "\n",
-    "    # 1. 加载相机中心\n",
-    "    camera_centers = load_camera_centers(images_txt)\n",
-    "\n",
-    "    # 2. 计算平均中心和最大偏移\n",
-    "    center_mean = camera_centers.mean(axis=0)\n",
-    "    offsets = np.abs(camera_centers - center_mean)\n",
-    "    max_offset = offsets.max(axis=0)\n",
-    "\n",
-    "    # 3. 加载点云\n",
-    "    pcd = o3d.io.read_point_cloud(pcd_path)\n",
-    "    points = np.asarray(pcd.points)\n",
-    "    colors = np.asarray(pcd.colors) if pcd.has_colors() else np.ones_like(points)\n",
-    "\n",
-    "    # 4. 构建包围盒并裁剪点云\n",
-    "    min_bound = center_mean - max_offset - 15\n",
-    "    max_bound = center_mean + max_offset + 15\n",
-    "    aabb = o3d.geometry.AxisAlignedBoundingBox(min_bound, max_bound)\n",
-    "    pcd_cropped = pcd.crop(aabb)\n",
-    "\n",
-    "    # 5. 统计信息\n",
-    "    total_points = len(points)\n",
-    "    kept_points = np.asarray(pcd_cropped.points)\n",
-    "    kept_colors = np.asarray(pcd_cropped.colors) if pcd_cropped.has_colors() else np.ones_like(kept_points)\n",
-    "    removed_mask = np.ones(total_points, dtype=bool)\n",
-    "    kept_indices = aabb.get_point_indices_within_bounding_box(pcd.points)\n",
-    "    removed_mask[kept_indices] = False\n",
-    "    removed_points = points[removed_mask]\n",
-    "    removed_colors = colors[removed_mask]\n",
-    "\n",
-    "    print(\"相机中心范围：\")\n",
-    "    print(f\"  X: [{min_bound[0]:.3f}, {max_bound[0]:.3f}]\")\n",
-    "    print(f\"  Y: [{min_bound[1]:.3f}, {max_bound[1]:.3f}]\")\n",
-    "    print(f\"  Z: [{min_bound[2]:.3f}, {max_bound[2]:.3f}]\")\n",
-    "    print(f\"总点数：{total_points}\")\n",
-    "    print(f\"保留点数：{len(kept_points)}\")\n",
-    "    print(f\"删除点数：{len(removed_points)}\")\n",
-    "\n",
-    "    # 6. 保存 points3D.txt (速度太慢，直接使用 ply )\n",
-    "    with open(output_points_txt, \"w\") as f:\n",
-    "        f.write(\"# 3D point list with one line of data per point:\\n\")\n",
-    "        f.write(\"# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[]\\n\")\n",
-    "        for idx, (pt, col) in enumerate(zip(kept_points, kept_colors), start=1):\n",
-    "            r, g, b = (col * 255).astype(int)\n",
-    "            f.write(f\"{idx} {pt[0]:.6f} {pt[1]:.6f} {pt[2]:.6f} {r} {g} {b} 0\\n\")\n",
-    "    # 保存为 ply 文件\n",
-    "    pcd_kept = o3d.geometry.PointCloud()\n",
-    "    pcd_kept.points = o3d.utility.Vector3dVector(kept_points)\n",
-    "    pcd_kept.colors = o3d.utility.Vector3dVector(kept_colors)\n",
-    "    o3d.io.write_point_cloud(output_points_ply, pcd_kept)\n",
-    "    print(\"保留的点云已保存为 {output_points_ply}\")\n",
-    "    \n",
-    "    # 7. 保存带颜色的 PLY 文件\n",
-    "    all_points = np.vstack((kept_points, removed_points))\n",
-    "    removed_colors_red = np.tile([1.0, 0.0, 0.0], (len(removed_points), 1))\n",
-    "    all_colors = np.vstack((kept_colors, removed_colors_red))\n",
-    "    pcd_all = o3d.geometry.PointCloud()\n",
-    "    pcd_all.points = o3d.utility.Vector3dVector(all_points)\n",
-    "    pcd_all.colors = o3d.utility.Vector3dVector(all_colors)\n",
-    "    o3d.io.write_point_cloud(output_ply_path, pcd_all)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "80b924be",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "相机中心范围：\n",
-      "  X: [-0.200, 28.270]\n",
-      "  Y: [-16.082, 1.699]\n",
-      "  Z: [-9.392, 0.714]\n",
-      "总点数：16404321\n",
-      "保留点数：10684495\n",
-      "删除点数：5719826\n",
-      "保留的点云已保存为 {output_points_ply}\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n",
-    "\n",
-    "images_txt = os.path.join(folder_path, 'sparse/0/images.txt')\n",
-    "pcd_path = os.path.join(folder_path, 'pcd/all_raw_points.pcd')\n",
-    "output_points_ply = os.path.join(folder_path, 'pcd/points3D.ply')\n",
-    "vis_ply_path = os.path.join(folder_path, 'pcd/vis_filter.ply')\n",
-    "output_points_txt = os.path.join(folder_path, 'pcd/points3D.txt')\n",
-    "filter_point_cloud(images_txt,pcd_path,output_points_txt, output_points_ply,vis_ply_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1a4206e8",
-   "metadata": {},
-   "source": [
-    "#### 调用 crop_points.py 文件"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8d8c55ed",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "总点数：16404321，保留：15254842，删除：1149479\n"
-     ]
-    }
-   ],
-   "source": [
-    "!python crop_points.py \\\n",
-    "    --images_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/sparse/0/images.txt \\\n",
-    "    --pcd_path /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/all_raw_points.pcd \\\n",
-    "    --output_points_txt /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D.txt \\\n",
-    "    --output_ply /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/filtered_colored.ply \\\n",
-    "    --margin 10.0"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "651ed72f",
-   "metadata": {},
-   "source": [
-    "## 1.2 利用体素化降采样"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "56f5cba7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "raw points len : 16404321\n",
-      "downsample points len : 4671298\n",
-      "降采样后的点云已保存到 /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_0.05.ply，体素大小：0.05\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "from tools.points_utils import voxel_downsample_and_save\n",
-    "\n",
-    "voxel_size = 0.05\n",
-    "folder_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results'\n",
-    "input_ply_path = os.path.join(folder_path,'pcd/points3D.ply')\n",
-    "output_ply_path = os.path.join(folder_path,f'pcd/points3D_{voxel_size}.ply')\n",
-    "input_ply_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n",
-    "output_ply_path = f'/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points_{voxel_size}.ply'\n",
-    "\n",
-    "voxel_downsample_and_save(voxel_size, input_ply_path, output_ply_path) # ply downsample to ply\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d1df326d",
+   "id": "1ad23d69",
   "metadata": {},
   "outputs": [],
   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "8544bb3e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Jupyter environment detected. Enabling Open3D WebVisualizer.\n",
-      "[Open3D INFO] WebRTC GUI backend enabled.\n",
-      "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n",
-      "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd to /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply\n"
-     ]
-    }
-   ],
-   "source": [
-    "# pcd 2 ply\n",
-    "# 将原pcd点云，转换为ply点云\n",
-    "from tools.points_utils import pcd_2_ply\n",
-    "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.pcd'\n",
-    "ply_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_debug/mini3/sparse/0/raw/all_raw_points.ply'\n",
-    "\n",
-    "pcd_2_ply(pcd_file,ply_file)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "2bf16bb1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Jupyter environment detected. Enabling Open3D WebVisualizer.\n",
-      "[Open3D INFO] WebRTC GUI backend enabled.\n",
-      "[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.\n",
-      "正在写入COLMAP TXT文件: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n",
-      "Converted /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply to Colmap TXT in: /home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt\n"
-     ]
-    }
-   ],
-   "source": [
-    "# pcd 2 txt\n",
-    "from tools.points_utils import pcd_2_colmap_txt\n",
-    "pcd_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/sparse/0/points3D_filter.ply'\n",
-    "txt_file = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_livo2/livo2_results/pcd/points3D_filter_white.txt'\n",
-    "\n",
-    "pcd_2_colmap_txt(pcd_file, txt_file, is_white=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6fd0b306",
-   "metadata": {},
-   "source": [
-    "选择一个合适的 points3D.ply 文件复制到 sparse/0 下"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "22e87bd3",
-   "metadata": {},
-   "source": [
-    "## 1.2 Resize for more images"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "91ce4cb8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import cv2\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "def resize_images(input_dir, output_dir, extensions):\n",
-    "    \"\"\"\n",
-    "    读取输入文件夹中的所有图片，调整为1/2大小后保存到输出文件夹\n",
-    "    \n",
-    "    Args:\n",
-    "        input_dir: 输入图片文件夹路径\n",
-    "        output_dir: 输出图片文件夹路径\n",
-    "        extensions: 支持的图片扩展名列表\n",
-    "    \"\"\"\n",
-    "    # 确保输出目录存在\n",
-    "    os.makedirs(output_dir, exist_ok=True)\n",
-    "    \n",
-    "    # 获取所有图片文件\n",
-    "    image_files = []\n",
-    "    for file in os.listdir(input_dir):\n",
-    "        if any(file.lower().endswith(ext) for ext in extensions):\n",
-    "            image_files.append(file)\n",
-    "    \n",
-    "    if not image_files:\n",
-    "        print(f\"在 {input_dir} 中未找到支持的图片文件\")\n",
-    "        return\n",
-    "    \n",
-    "    print(f\"找到 {len(image_files)} 张图片\")\n",
-    "    \n",
-    "    # 处理每张图片\n",
-    "    count = 0\n",
-    "    for file in tqdm(image_files, desc=\"处理中\"):\n",
-    "        input_path = os.path.join(input_dir, file)\n",
-    "        output_path = os.path.join(output_dir, file)\n",
-    "        \n",
-    "        try:\n",
-    "            # 读取图片\n",
-    "            img = cv2.imread(input_path)\n",
-    "            if img is None:\n",
-    "                print(f\"警告: 无法读取图片 {input_path}，跳过\")\n",
-    "                continue\n",
-    "                \n",
-    "            # 获取原始尺寸\n",
-    "            height, width = img.shape[:2]\n",
-    "            \n",
-    "            # 计算新尺寸\n",
-    "            new_width = width // 2\n",
-    "            new_height = height // 2\n",
-    "            \n",
-    "            # 调整尺寸\n",
-    "            resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)\n",
-    "            \n",
-    "            # 保存图片\n",
-    "            cv2.imwrite(output_path, resized_img)\n",
-    "            \n",
-    "            # 输出尺寸信息\n",
-    "            if count == 0:\n",
-    "                print(f\"{file}: {width}x{height} -> {new_width}x{new_height}\")\n",
-    "            count += 1\n",
-    "        except Exception as e:\n",
-    "            print(f\"错误: 处理图片 {input_path} 时出错: {str(e)}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e9b4da36",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps'\n",
-    "output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01/depth_maps_2'\n",
-    "exts = ['jpg','jpeg','png']\n",
-    "\n",
-    "resize_images(input_path, output_path, exts)\n",
-    "print(\"所有图片处理完成！\")    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3830cb91",
-   "metadata": {},
-   "source": [
-    "# 2. LIVO2和Colmap的重建对比实验"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7dd6ff70",
-   "metadata": {},
-   "source": [
-    "colmap无法恢复相机位姿；所以这里我们使用livo2恢复位姿后，用colmap 进行三角测量获取关键点\n",
-    "https://www.cnblogs.com/Todd-Qi/p/15080968.html"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4ad9f127",
-   "metadata": {},
-   "source": [
-    "## 2.1 基于Livo2位姿进行稀疏重建"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "40e8c7f1",
-   "metadata": {},
-   "source": [
-    "colmap无法恢复相机位姿；所以这里我们使用livo2恢复位姿后，用colmap 进行三角测量获取关键点\n",
-    "https://www.cnblogs.com/Todd-Qi/p/15080968.html"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "497d715a",
-   "metadata": {},
-   "source": [
-    "1. 准备来自Livo2的位姿和相机数据 cameras.txt, images.txt\n",
-    "    将内参(camera intrinsics) 放入cameras.txt， 外参(camera extrinsics)放入 images.txt , points3D.txt 为空  \n",
-    "    - images.txt 中全部 0.0 0.0 -1 删除;  \n",
-    "    - points3D.txt 内容清空;\n",
-    "    - cameras.txt 中的内参进行修改 (对输入图像全部进行了 resize 操作，因此需要修改相机内参，将fx, fy, cx, cy 都除以2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "497596e0",
-   "metadata": {},
-   "source": [
-    "2. 特征匹配与特征提取  \n",
-    "``` bash\n",
-    "        colmap feature_extractor \\\n",
-    "            --database_path /path/to/project/database.db \\  \n",
-    "            --image_path    /path/to/project/images\n",
-    "```\n",
-    "``` bash\n",
-    "        colmap exhaustive_matcher \\\n",
-    "            --database_path /path/to/project/database.db\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "590c5ba8",
-   "metadata": {},
-   "source": [
-    "3. 三角化重建 (保存的点云和其他文件均为bin格式)\n",
-    "``` bash\n",
-    "        colmap point_triangulator \\\n",
-    "            --database_path /path/to/project/database.db \\\n",
-    "            --image_path    /path/to/project/images \\\n",
-    "            --input_path    /path/to/sparse_model \\\n",
-    "            --output_path   /path/to/triangulated_model\n",
-    "\n",
-    "```\n",
-    "\n",
-    "查看txt结果\n",
-    "``` bash\n",
-    "        colmap model_converter \\\n",
-    "            --input_path 0 \\\n",
-    "            --output_path 0_txt_from_livo2 \\\n",
-    "            --output_type TXT\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6c2b853f",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d359ac68",
-   "metadata": {},
-   "source": [
-    "4. 稠密重建(optional)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "77250b4b",
-   "metadata": {},
-   "source": [
-    "# 3.训练"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d44f71f7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# baseline raw gs for training\n",
-    "!CUDA_VISIBLE_DEVICES=1 python train.py \\\n",
-    "    -s data/tree_01_livo2 \\\n",
-    "    -m data/tree_01_livo2/outputs/3dgs_baseline\n",
-    "    \n",
-    "# render\n",
-    "!CUDA_VISIBLE_DEVICES=1 python render.py \\\n",
-    "    -s data/tree_01_colmap \\\n",
-    "    -m data/tree_01_colmap/outputs/3dgs_baseline    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0156e92d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[[2, 4, 5], [1, 5, 4]]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "iss = [[1,5,4],[2,4,5]]\n",
-    "iss.sort(key=lambda x: [x[1],x[0]], reverse=True)\n",
-    "iss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bf857aee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class Solution(object):\n",
-    "    def merge(self, intervals):\n",
-    "        \"\"\"\n",
-    "        :type intervals: List[List[int]]\n",
-    "        :rtype: List[List[int]]\n",
-    "        \"\"\"\n",
-    "        if not intervals:\n",
-    "            return []\n",
-    "        # 先按区间的起始位置排序\n",
-    "        intervals.sort(key=lambda x: x[0])\n",
-    "        merged = [intervals[0]]\n",
-    "        for i in range(1, len(intervals)):\n",
-    "            # 如果当前区间与上一个区间重叠，则合并\n",
-    "            if intervals[i][0] <= merged[-1][1]:\n",
-    "                merged[-1][1] = max(merged[-1][1], intervals[i][1])\n",
-    "            else:\n",
-    "                merged.append(intervals[i])\n",
-    "        return merged"
-   ]
  }
 ],
 "metadata": {
@ -583,15 +24,7 @@
   "name": "python3"
  },
  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
--- a/xy_utils/tools/images_utils.py
+++ b/xy_utils/tools/images_utils.py
@ -0,0 +1,183 @@
+import os
+from PIL import Image
+import json
+import re
+def get_first_image_info(folder_path):
+    # 检查文件夹是否存在
+    if not os.path.exists(folder_path):
+        print(f"错误：文件夹 '{folder_path}' 不存在")
+        return
+    
+    # 获取文件夹中所有文件
+    files = os.listdir(folder_path)
+    
+    # 查找第一个图片文件
+    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']
+    image_path = None
+    
+    for file in files:
+        file_extension = os.path.splitext(file)[1].lower()
+        if file_extension in image_extensions:
+            image_path = os.path.join(folder_path, file)
+            break
+    
+    # 如果没有找到图片文件
+    if image_path is None:
+        print(f"错误：在文件夹 '{folder_path}' 中未找到图片文件")
+        return
+    
+    try:
+        # 打开图片并获取信息
+        with Image.open(image_path) as img:
+            width, height = img.size
+            channels = len(img.getbands())
+            
+            print(f"找到图片：{os.path.basename(image_path)}")
+            print(f"分辨率：{width} x {height} 像素")
+            print(f"通道数：{channels}")
+            if channels == 1:
+                print("通道说明：单通道（可能是灰度图）")
+            elif channels == 3:
+                print("通道说明：三通道（RGB）")
+            elif channels == 4:
+                print("通道说明：四通道（RGBA，包含Alpha通道）")
+            else:
+                print(f"通道说明：非常规通道数（{channels}通道）")
+                
+    except Exception as e:
+        print(f"错误：无法处理图片 '{image_path}' - {str(e)}")
+
+def generate_depth_params_json(png_folder, output_path):
+    """
+    遍历指定文件夹中的所有 PNG 图像，提取文件名并生成 depth_params.json 文件。
+    
+    参数:
+    - png_folder: PNG 图像所在的文件夹路径
+    - output_path: 生成的 JSON 文件的保存路径
+    """
+    # 检查 PNG 文件夹是否存在
+    if not os.path.exists(png_folder):
+        print(f"错误：PNG 图像文件夹 '{png_folder}' 不存在")
+        return
+    
+    # 获取所有 PNG 文件的文件名（不包含扩展名）
+    png_files = [f for f in os.listdir(png_folder) 
+                if os.path.isfile(os.path.join(png_folder, f)) 
+                and f.lower().endswith('.png')]
+    
+    # 提取文件名（不含扩展名）
+    base_names = [os.path.splitext(f)[0] for f in png_files]
+    
+    # 如果没有找到 PNG 文件
+    if not base_names:
+        print(f"错误：在文件夹 '{png_folder}' 中未找到 PNG 文件")
+        return
+    
+    # 构建 JSON 数据
+    json_data = {}
+    for name in base_names:
+        json_data[name] = {
+            "scale": 0.0,
+            "offset": 0.0
+        }
+    
+    # 创建输出文件所在的目录（如果不存在）
+    output_dir = os.path.dirname(output_path)
+    if output_dir and not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    
+    # 写入 JSON 文件
+    try:
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(json_data, f, indent=2)
+        print(f"成功生成 JSON 文件：{output_path}")
+        print(f"共处理 {len(base_names)} 个 PNG 文件")
+    except Exception as e:
+        print(f"错误：无法写入 JSON 文件 '{output_path}' - {str(e)}")
+        
+def resize_image(input_path, output_path=None,scale=2, quality=100):
+    """
+    input_path = '/home/qinllgroup/hongxiangyu/git_project/livo2-data-utils/10-Mesh-acc/data/delete/stack_acc_10_2/00142-final.png'
+    output_path = '/home/qinllgroup/hongxiangyu/git_project/gaussian-splatting-xy/data/tree_01_save_w_depth/depth_maps_2/00142.png'
+    resize_image(input_path, output_path)
+    
+    将图片分辨率缩小1/2
+    
+    参数:
+    - input_path: 输入图片路径
+    - output_path: 输出图片路径，默认为在原文件名后加 '_resized'
+    - quality: 输出图片质量，范围0-100，默认为95
+    """
+    try:
+        # 打开图片
+        with Image.open(input_path) as img:
+            # 获取原始尺寸
+            width, height = img.size
+            
+            # 计算新尺寸（缩小1/2）
+            new_width = width // scale
+            new_height = height // scale
+            
+            # 使用高质量重采样方法
+            resized_img = img.resize((new_width, new_height), Image.LANCZOS)
+            
+            # 如果没有指定输出路径，自动生成
+            if output_path is None:
+                base, ext = os.path.splitext(input_path)
+                output_path = f"{base}_resized{ext}"
+            
+            # 保存图片，保持原始格式
+            resized_img.save(output_path, quality=quality)
+            
+            print(f"成功将图片从 {width}x{height} 缩小到 {new_width}x{new_height}")
+            print(f"保存路径: {output_path}")
+            
+            return output_path
+            
+    except Exception as e:
+        print(f"错误: 无法处理图片 {input_path} - {str(e)}")
+        return None
+    
+def batch_rename_files(folder_path):
+    """
+    批量重命名指定文件夹中的所有文件，移除文件名末尾的 -final
+    
+    参数:
+    - folder_path: 要处理的文件夹路径
+    """
+    # 检查文件夹是否存在
+    if not os.path.exists(folder_path):
+        print(f"错误：文件夹 '{folder_path}' 不存在")
+        return
+    
+    # 获取文件夹中的所有文件
+    files = os.listdir(folder_path)
+    
+    # 用于匹配 -final 的正则表达式模式
+    pattern = re.compile(r'^(.*?)-final(\.[^.]+)?$')
+    
+    renamed_count = 0
+    
+    # 遍历所有文件并进行重命名
+    for filename in files:
+        file_path = os.path.join(folder_path, filename)
+        
+        # 只处理文件，不处理文件夹
+        if os.path.isfile(file_path):
+            # 使用正则表达式匹配文件名
+            match = pattern.match(filename)
+            
+            if match:
+                # 获取新文件名
+                new_name = match.group(1) + (match.group(2) or '')
+                new_path = os.path.join(folder_path, new_name)
+                
+                try:
+                    # 执行重命名
+                    os.rename(file_path, new_path)
+                    print(f"已重命名: {filename} -> {new_name}")
+                    renamed_count += 1
+                except Exception as e:
+                    print(f"错误：无法重命名文件 '{filename}' - {str(e)}")
+    
+    print(f"重命名完成！共处理 {renamed_count} 个文件")