docs: update README

2025-06-26 18:25:49 +00:00 · 2023-12-17 00:15:09 +08:00
parent 50ecd13a88
commit 624aa2b8ce
44 changed files with 1113 additions and 65 deletions
--- a/preprocess_image.py
+++ b/preprocess_image.py
@@ -130,75 +130,70 @@ def preprocess_single_image(img_path, args):
    print(img_path)
    image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    carved_image = None
-    # debug
+
    if image.shape[-1] == 4:
-        if args.do_rm_bg_force:
-            image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
-        else:
-            carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
-            image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
+        carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
+        image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)

    else:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

-    if args.do_seg:
-        if carved_image is None:
-            # carve background
-            print(f'[INFO] background removal...')
-            carved_image = BackgroundRemoval()(image) # [H, W, 4]
-        mask = carved_image[..., -1] > 0
+    if carved_image is None:
+        # carve background
+        print(f'[INFO] background removal...')
+        carved_image = BackgroundRemoval()(image) # [H, W, 4]
+    mask = carved_image[..., -1] > 0

-        # predict depth
-        print(f'[INFO] depth estimation...')
-        dpt_depth_model = DPT(task='depth')
-        depth = dpt_depth_model(image)[0]
-        depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
-        depth[~mask] = 0
-        depth = (depth * 255).astype(np.uint8)
-        del dpt_depth_model
+    # predict depth
+    print(f'[INFO] depth estimation...')
+    dpt_depth_model = DPT(task='depth')
+    depth = dpt_depth_model(image)[0]
+    depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
+    depth[~mask] = 0
+    depth = (depth * 255).astype(np.uint8)
+    del dpt_depth_model

-        # predict normal
-        print(f'[INFO] normal estimation...')
-        dpt_normal_model = DPT(task='normal')
-        normal = dpt_normal_model(image)[0]
-        normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
-        normal[~mask] = 0
-        del dpt_normal_model
+    # predict normal
+    print(f'[INFO] normal estimation...')
+    dpt_normal_model = DPT(task='normal')
+    normal = dpt_normal_model(image)[0]
+    normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
+    normal[~mask] = 0
+    del dpt_normal_model

-        opt.recenter=False
-        # recenter
-        if opt.recenter:
-            print(f'[INFO] recenter...')
-            final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
-            final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
-            final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
+    # recenter
+    if opt.recenter:
+        print(f'[INFO] recenter...')
+        final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
+        final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
+        final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)

-            coords = np.nonzero(mask)
-            x_min, x_max = coords[0].min(), coords[0].max()
-            y_min, y_max = coords[1].min(), coords[1].max()
-            h = x_max - x_min
-            w = y_max - y_min
-            desired_size = int(opt.size * (1 - opt.border_ratio))
-            scale = desired_size / max(h, w)
-            h2 = int(h * scale)
-            w2 = int(w * scale)
-            x2_min = (opt.size - h2) // 2
-            x2_max = x2_min + h2
-            y2_min = (opt.size - w2) // 2
-            y2_max = y2_min + w2
-            final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
-            final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
-            final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
+        coords = np.nonzero(mask)
+        x_min, x_max = coords[0].min(), coords[0].max()
+        y_min, y_max = coords[1].min(), coords[1].max()
+        h = x_max - x_min
+        w = y_max - y_min
+        desired_size = int(opt.size * (1 - opt.border_ratio))
+        scale = desired_size / max(h, w)
+        h2 = int(h * scale)
+        w2 = int(w * scale)
+        x2_min = (opt.size - h2) // 2
+        x2_max = x2_min + h2
+        y2_min = (opt.size - w2) // 2
+        y2_max = y2_min + w2
+        final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
+        final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
+        final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)

-        else:
-            final_rgba = carved_image
-            final_depth = depth
-            final_normal = normal
+    else:
+        final_rgba = carved_image
+        final_depth = depth
+        final_normal = normal

-        # write output
-        cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
-        cv2.imwrite(out_depth, final_depth)
-        cv2.imwrite(out_normal, final_normal)
+    # write output
+    cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
+    cv2.imwrite(out_depth, final_depth)
+    cv2.imwrite(out_normal, final_normal)

    if opt.do_caption:
        # predict caption (it's too slow... use your brain instead)
@@ -215,11 +210,8 @@ if __name__ == '__main__':
    parser.add_argument('path', type=str, help="path to image (png, jpeg, etc.)")
    parser.add_argument('--size', default=1024, type=int, help="output resolution")
    parser.add_argument('--border_ratio', default=0.1, type=float, help="output border ratio")
-    parser.add_argument('--recenter', type=bool, default=False, help="recenter, potentially not helpful for multiview zero123")
-    parser.add_argument('--dont_recenter', dest='recenter', action='store_false')
-    parser.add_argument('--do_caption', type=bool, default=False, help="do text captioning")
-    parser.add_argument('--do_seg', type=bool, default=True)
-    parser.add_argument('--do_rm_bg_force', type=bool, default=False)
+    parser.add_argument('--recenter', action='store_true', help="recenter, potentially not helpful for multiview zero123")
+    parser.add_argument('--do_caption', action='store_true', help="do text captioning")
    
    opt = parser.parse_args()