edit submoudels/diff-gaussian-rasterization remote github url

2025-06-26 18:18:11 +00:00 · 2025-05-28 00:12:40 +08:00 · 2025-05-28 00:12:40 +08:00 · 05d4ca6548
commit 05d4ca6548
parent b787991bb2
3 changed files with 197 additions and 3 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -3,8 +3,9 @@
 	url = https://gitlab.inria.fr/bkerbl/simple-knn.git
 [submodule "submodules/diff-gaussian-rasterization"]
 	path = submodules/diff-gaussian-rasterization
-	url = https://github.com/graphdeco-inria/diff-gaussian-rasterization.git
-	branch = 3dgs_accel 	# branch = dr_aa
+	# url = https://github.com/graphdeco-inria/diff-gaussian-rasterization.git
+	url = https://github.com/Xun2001/diff-gaussian-rasterization-xy.git
+	branch = main # branch = 3dgs_accel 	# branch = dr_aa
 [submodule "SIBR_viewers"]
 	path = SIBR_viewers
 	url = https://gitlab.inria.fr/sibr/sibr_core.git
--- a/submodules/diff-gaussian-rasterization
+++ b/submodules/diff-gaussian-rasterization
@ -1 +1 @@
-Subproject commit 26ce026ae9d3cfa56a103279b863a9f320c3e555
+Subproject commit b8c710ee02939dcc7acbd1ad207406e80965a3d9
--- a/xy_utils/scripts/generate_sky_mask.py
+++ b/xy_utils/scripts/generate_sky_mask.py
@ -0,0 +1,193 @@
+# copy from street GS script/waymo/generate_sky_mask.py
+
+import os, sys
+
+sys.path.append(os.getcwd())
+import argparse
+import os
+import copy
+import imageio
+import numpy as np
+import torch
+from PIL import Image, ImageDraw, ImageFont
+from torchvision.ops import box_convert
+from huggingface_hub import hf_hub_download
+from tqdm import tqdm
+from termcolor import colored
+from glob import glob
+
+# Grounding DINO
+import groundingdino.datasets.transforms as T
+from groundingdino.models import build_model
+from groundingdino.util import box_ops
+from groundingdino.util.slconfig import SLConfig
+from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
+from groundingdino.util.inference import annotate, load_image, predict
+import supervision as sv
+
+# segment anything
+from segment_anything import build_sam, SamPredictor 
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+
+def setup(args):
+    # ======================== Load Grounding DINO model ========================
+    print(colored('Load Grounding DINO model', 'green'))
+    def load_model_hf(repo_id, filename, ckpt_config_filename, device='cpu'):
+        cache_config_file = hf_hub_download(repo_id=repo_id, filename=ckpt_config_filename)
+
+        args = SLConfig.fromfile(cache_config_file) 
+        model = build_model(args)
+        args.device = device
+
+        cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
+        checkpoint = torch.load(cache_file, map_location='cpu')
+        log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
+        print("Model loaded from {} \n => {}".format(cache_file, log))
+        _ = model.eval()
+        return model   
+
+    # Use this command for evaluate the Grounding DINO model
+    # Or you can download the model by yourself
+    ckpt_repo_id = "ShilongLiu/GroundingDINO"
+    ckpt_filenmae = "groundingdino_swinb_cogcoor.pth"
+    ckpt_config_filename = "GroundingDINO_SwinB.cfg.py"
+    global groundingdino_model 
+    groundingdino_model = load_model_hf(ckpt_repo_id, ckpt_filenmae, ckpt_config_filename)
+
+    # ======================== Load Segment Anything model ========================
+    print(colored('Load SAM model', 'green'))
+    # sam_checkpoint = '/nas/home/yanyunzhi/segment-anything/sam_vit_h_4b8939.pth.1'
+    sam = build_sam(checkpoint=args.sam_checkpoint)
+    sam.cuda()
+    global sam_predictor
+    sam_predictor = SamPredictor(sam)
+
+image_filename_to_cam = lambda x: int(x.split('.')[0][-1])
+image_filename_to_frame = lambda x: int(x.split('.')[0][:6])
+
+def add_to_mask_dict(masks_dict, mask_path):
+    basename = os.path.basename(mask_path)
+    cam = image_filename_to_cam(basename)
+    frame = image_filename_to_frame(basename)
+    mask = cv2.imread(mask_path) 
+    if frame not in masks_dict:
+        masks_dict[frame] = [None] * 3 # FRONT_LEFT, FRONT, FRONT_RIGHT 1, 0, 2
+    if cam == 1:
+        masks_dict[frame][0] = mask
+    elif cam == 0:
+        masks_dict[frame][1] = mask
+    elif cam == 2:
+        masks_dict[frame][2] = mask
+    
+
+
+def segment_with_text_prompt(datadir, BOX_TRESHOLD, TEXT_TRESHOLD, ignore_exists):
+    save_dir = os.path.join(datadir, 'sky_mask')
+    os.makedirs(save_dir, exist_ok=True)
+
+    image_dir = os.path.join(datadir, 'images')
+    image_files = glob(image_dir + "/*.jpg") 
+    image_files += glob(image_dir + "/*.png")
+    image_files = sorted(image_files)
+    
+    masks_dict = dict()
+    for image_path in tqdm(image_files):
+        image_base_name = os.path.basename(image_path)
+        output_mask = os.path.join(save_dir, image_base_name)
+                        
+        if os.path.exists(output_mask) and ignore_exists:
+            add_to_mask_dict(masks_dict, output_mask)
+            print(f'{output_mask} exists, skip')
+            continue
+        
+        cam = image_filename_to_cam(image_base_name)
+        box_threshold = BOX_TRESHOLD[cam]
+        
+        image_source, image = load_image(image_path)
+        boxes, logits, phrases = predict(
+            model=groundingdino_model, 
+            image=image, 
+            caption='sky', 
+            box_threshold=box_threshold, 
+            text_threshold=TEXT_TRESHOLD
+        )
+
+        print(f'detecting {boxes.shape[0]} boxed of sky in {image_path}, box_threshold: {box_threshold}, logits: {logits}')
+        if boxes.shape[0] != 0:
+            H, W, _ = image_source.shape
+            boxes = box_ops.box_cxcywh_to_xyxy(boxes)
+            boxes_xyxy = boxes * torch.Tensor([W, H, W, H])
+            # assume that the box prompt for sky should be close to the top edge of the image
+            #  --------------  top edge 
+            # | x ----       |
+            # | |    |       |
+            # |  ----x       |
+            #  --------------
+            boxes_mask = boxes_xyxy[:, 1] < 100 # 100 pixels
+            boxes_xyxy = boxes_xyxy[boxes_mask]
+        else:
+            boxes_xyxy = []
+        
+        num_boxes = len(boxes_xyxy)
+
+        if num_boxes == 0:                
+            mask = np.zeros_like(image_source[..., 0])
+        else:
+            sam_predictor.set_image(image_source)
+            transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_xyxy, image_source.shape[:2]).cuda()
+            masks, _, _ = sam_predictor.predict_torch(
+                        point_coords = None,
+                        point_labels = None,
+                        boxes = transformed_boxes,
+                        multimask_output = False,
+                    )
+            
+            torch.cuda.empty_cache()
+            mask_final = torch.zeros_like(masks[0, 0]).bool()
+            for mask in masks[:, 0]:
+                mask_final = mask_final | mask.bool()
+                
+            mask = mask_final.cpu().numpy()
+            
+        cv2.imwrite(output_mask, mask * 255)
+        add_to_mask_dict(masks_dict, output_mask)
+
+    print('saving sky mask video')
+    masks_dict = dict(sorted(masks_dict.items(), key=lambda x: x[0]))
+    merge_masks = []
+    for frame, masks in masks_dict.items():
+        merge_mask = np.concatenate(masks, axis=1)
+        text = f'frame: {frame}'
+        cv2.putText(merge_mask, text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 2) 
+        merge_masks.append(merge_mask)
+    merge_masks_path = os.path.join(save_dir, 'mask.mp4')
+    imageio.mimwrite(merge_masks_path, merge_masks, fps=24)
+    
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--datadir', required=True, type=str)
+    parser.add_argument('--box_threshold', nargs='+', type=float, default=[0.3]) # Change this to your threshold
+    parser.add_argument("--text_threshold", type=float, default=0.25)
+    parser.add_argument("--ignore_exists", action='store_true')
+    parser.add_argument("--sam_checkpoint", type=str)
+    
+    args = parser.parse_args()
+    setup(args)
+
+    assert isinstance(args.box_threshold, list)
+    if len(args.box_threshold) == 1:
+        box_threshold = [args.box_threshold[0]] * 5
+    else:
+        assert len(args.box_threshold) == 5
+        box_threshold = args.box_threshold
+    print('box_threshold: ', box_threshold)
+        
+    segment_with_text_prompt(
+        datadir=args.datadir, 
+        BOX_TRESHOLD=box_threshold,
+        TEXT_TRESHOLD=args.text_threshold,
+        ignore_exists=args.ignore_exists,
+    )