mirror of
https://github.com/deepseek-ai/DreamCraft3D
synced 2025-06-26 18:25:49 +00:00
docs: update README
This commit is contained in:
@@ -130,75 +130,70 @@ def preprocess_single_image(img_path, args):
|
||||
print(img_path)
|
||||
image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
|
||||
carved_image = None
|
||||
# debug
|
||||
|
||||
if image.shape[-1] == 4:
|
||||
if args.do_rm_bg_force:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
|
||||
else:
|
||||
carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
|
||||
carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
|
||||
|
||||
else:
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
if args.do_seg:
|
||||
if carved_image is None:
|
||||
# carve background
|
||||
print(f'[INFO] background removal...')
|
||||
carved_image = BackgroundRemoval()(image) # [H, W, 4]
|
||||
mask = carved_image[..., -1] > 0
|
||||
if carved_image is None:
|
||||
# carve background
|
||||
print(f'[INFO] background removal...')
|
||||
carved_image = BackgroundRemoval()(image) # [H, W, 4]
|
||||
mask = carved_image[..., -1] > 0
|
||||
|
||||
# predict depth
|
||||
print(f'[INFO] depth estimation...')
|
||||
dpt_depth_model = DPT(task='depth')
|
||||
depth = dpt_depth_model(image)[0]
|
||||
depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
|
||||
depth[~mask] = 0
|
||||
depth = (depth * 255).astype(np.uint8)
|
||||
del dpt_depth_model
|
||||
# predict depth
|
||||
print(f'[INFO] depth estimation...')
|
||||
dpt_depth_model = DPT(task='depth')
|
||||
depth = dpt_depth_model(image)[0]
|
||||
depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
|
||||
depth[~mask] = 0
|
||||
depth = (depth * 255).astype(np.uint8)
|
||||
del dpt_depth_model
|
||||
|
||||
# predict normal
|
||||
print(f'[INFO] normal estimation...')
|
||||
dpt_normal_model = DPT(task='normal')
|
||||
normal = dpt_normal_model(image)[0]
|
||||
normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
|
||||
normal[~mask] = 0
|
||||
del dpt_normal_model
|
||||
# predict normal
|
||||
print(f'[INFO] normal estimation...')
|
||||
dpt_normal_model = DPT(task='normal')
|
||||
normal = dpt_normal_model(image)[0]
|
||||
normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
|
||||
normal[~mask] = 0
|
||||
del dpt_normal_model
|
||||
|
||||
opt.recenter=False
|
||||
# recenter
|
||||
if opt.recenter:
|
||||
print(f'[INFO] recenter...')
|
||||
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
|
||||
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
|
||||
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
|
||||
# recenter
|
||||
if opt.recenter:
|
||||
print(f'[INFO] recenter...')
|
||||
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
|
||||
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
|
||||
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
|
||||
|
||||
coords = np.nonzero(mask)
|
||||
x_min, x_max = coords[0].min(), coords[0].max()
|
||||
y_min, y_max = coords[1].min(), coords[1].max()
|
||||
h = x_max - x_min
|
||||
w = y_max - y_min
|
||||
desired_size = int(opt.size * (1 - opt.border_ratio))
|
||||
scale = desired_size / max(h, w)
|
||||
h2 = int(h * scale)
|
||||
w2 = int(w * scale)
|
||||
x2_min = (opt.size - h2) // 2
|
||||
x2_max = x2_min + h2
|
||||
y2_min = (opt.size - w2) // 2
|
||||
y2_max = y2_min + w2
|
||||
final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
|
||||
final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
|
||||
final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
|
||||
coords = np.nonzero(mask)
|
||||
x_min, x_max = coords[0].min(), coords[0].max()
|
||||
y_min, y_max = coords[1].min(), coords[1].max()
|
||||
h = x_max - x_min
|
||||
w = y_max - y_min
|
||||
desired_size = int(opt.size * (1 - opt.border_ratio))
|
||||
scale = desired_size / max(h, w)
|
||||
h2 = int(h * scale)
|
||||
w2 = int(w * scale)
|
||||
x2_min = (opt.size - h2) // 2
|
||||
x2_max = x2_min + h2
|
||||
y2_min = (opt.size - w2) // 2
|
||||
y2_max = y2_min + w2
|
||||
final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
|
||||
final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
|
||||
final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
|
||||
|
||||
else:
|
||||
final_rgba = carved_image
|
||||
final_depth = depth
|
||||
final_normal = normal
|
||||
else:
|
||||
final_rgba = carved_image
|
||||
final_depth = depth
|
||||
final_normal = normal
|
||||
|
||||
# write output
|
||||
cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
|
||||
cv2.imwrite(out_depth, final_depth)
|
||||
cv2.imwrite(out_normal, final_normal)
|
||||
# write output
|
||||
cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
|
||||
cv2.imwrite(out_depth, final_depth)
|
||||
cv2.imwrite(out_normal, final_normal)
|
||||
|
||||
if opt.do_caption:
|
||||
# predict caption (it's too slow... use your brain instead)
|
||||
@@ -215,11 +210,8 @@ if __name__ == '__main__':
|
||||
parser.add_argument('path', type=str, help="path to image (png, jpeg, etc.)")
|
||||
parser.add_argument('--size', default=1024, type=int, help="output resolution")
|
||||
parser.add_argument('--border_ratio', default=0.1, type=float, help="output border ratio")
|
||||
parser.add_argument('--recenter', type=bool, default=False, help="recenter, potentially not helpful for multiview zero123")
|
||||
parser.add_argument('--dont_recenter', dest='recenter', action='store_false')
|
||||
parser.add_argument('--do_caption', type=bool, default=False, help="do text captioning")
|
||||
parser.add_argument('--do_seg', type=bool, default=True)
|
||||
parser.add_argument('--do_rm_bg_force', type=bool, default=False)
|
||||
parser.add_argument('--recenter', action='store_true', help="recenter, potentially not helpful for multiview zero123")
|
||||
parser.add_argument('--do_caption', action='store_true', help="do text captioning")
|
||||
|
||||
opt = parser.parse_args()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user