docs: update README

This commit is contained in:
MrTornado24
2023-12-17 00:15:09 +08:00
parent 50ecd13a88
commit 624aa2b8ce
44 changed files with 1113 additions and 65 deletions

View File

@@ -130,75 +130,70 @@ def preprocess_single_image(img_path, args):
print(img_path)
image = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
carved_image = None
# debug
if image.shape[-1] == 4:
if args.do_rm_bg_force:
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
else:
carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
carved_image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGBA)
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
else:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if args.do_seg:
if carved_image is None:
# carve background
print(f'[INFO] background removal...')
carved_image = BackgroundRemoval()(image) # [H, W, 4]
mask = carved_image[..., -1] > 0
if carved_image is None:
# carve background
print(f'[INFO] background removal...')
carved_image = BackgroundRemoval()(image) # [H, W, 4]
mask = carved_image[..., -1] > 0
# predict depth
print(f'[INFO] depth estimation...')
dpt_depth_model = DPT(task='depth')
depth = dpt_depth_model(image)[0]
depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
depth[~mask] = 0
depth = (depth * 255).astype(np.uint8)
del dpt_depth_model
# predict depth
print(f'[INFO] depth estimation...')
dpt_depth_model = DPT(task='depth')
depth = dpt_depth_model(image)[0]
depth[mask] = (depth[mask] - depth[mask].min()) / (depth[mask].max() - depth[mask].min() + 1e-9)
depth[~mask] = 0
depth = (depth * 255).astype(np.uint8)
del dpt_depth_model
# predict normal
print(f'[INFO] normal estimation...')
dpt_normal_model = DPT(task='normal')
normal = dpt_normal_model(image)[0]
normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
normal[~mask] = 0
del dpt_normal_model
# predict normal
print(f'[INFO] normal estimation...')
dpt_normal_model = DPT(task='normal')
normal = dpt_normal_model(image)[0]
normal = (normal * 255).astype(np.uint8).transpose(1, 2, 0)
normal[~mask] = 0
del dpt_normal_model
opt.recenter=False
# recenter
if opt.recenter:
print(f'[INFO] recenter...')
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
# recenter
if opt.recenter:
print(f'[INFO] recenter...')
final_rgba = np.zeros((opt.size, opt.size, 4), dtype=np.uint8)
final_depth = np.zeros((opt.size, opt.size), dtype=np.uint8)
final_normal = np.zeros((opt.size, opt.size, 3), dtype=np.uint8)
coords = np.nonzero(mask)
x_min, x_max = coords[0].min(), coords[0].max()
y_min, y_max = coords[1].min(), coords[1].max()
h = x_max - x_min
w = y_max - y_min
desired_size = int(opt.size * (1 - opt.border_ratio))
scale = desired_size / max(h, w)
h2 = int(h * scale)
w2 = int(w * scale)
x2_min = (opt.size - h2) // 2
x2_max = x2_min + h2
y2_min = (opt.size - w2) // 2
y2_max = y2_min + w2
final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
coords = np.nonzero(mask)
x_min, x_max = coords[0].min(), coords[0].max()
y_min, y_max = coords[1].min(), coords[1].max()
h = x_max - x_min
w = y_max - y_min
desired_size = int(opt.size * (1 - opt.border_ratio))
scale = desired_size / max(h, w)
h2 = int(h * scale)
w2 = int(w * scale)
x2_min = (opt.size - h2) // 2
x2_max = x2_min + h2
y2_min = (opt.size - w2) // 2
y2_max = y2_min + w2
final_rgba[x2_min:x2_max, y2_min:y2_max] = cv2.resize(carved_image[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_depth[x2_min:x2_max, y2_min:y2_max] = cv2.resize(depth[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
final_normal[x2_min:x2_max, y2_min:y2_max] = cv2.resize(normal[x_min:x_max, y_min:y_max], (w2, h2), interpolation=cv2.INTER_AREA)
else:
final_rgba = carved_image
final_depth = depth
final_normal = normal
else:
final_rgba = carved_image
final_depth = depth
final_normal = normal
# write output
cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
cv2.imwrite(out_depth, final_depth)
cv2.imwrite(out_normal, final_normal)
# write output
cv2.imwrite(out_rgba, cv2.cvtColor(final_rgba, cv2.COLOR_RGBA2BGRA))
cv2.imwrite(out_depth, final_depth)
cv2.imwrite(out_normal, final_normal)
if opt.do_caption:
# predict caption (it's too slow... use your brain instead)
@@ -215,11 +210,8 @@ if __name__ == '__main__':
parser.add_argument('path', type=str, help="path to image (png, jpeg, etc.)")
parser.add_argument('--size', default=1024, type=int, help="output resolution")
parser.add_argument('--border_ratio', default=0.1, type=float, help="output border ratio")
parser.add_argument('--recenter', type=bool, default=False, help="recenter, potentially not helpful for multiview zero123")
parser.add_argument('--dont_recenter', dest='recenter', action='store_false')
parser.add_argument('--do_caption', type=bool, default=False, help="do text captioning")
parser.add_argument('--do_seg', type=bool, default=True)
parser.add_argument('--do_rm_bg_force', type=bool, default=False)
parser.add_argument('--recenter', action='store_true', help="recenter, potentially not helpful for multiview zero123")
parser.add_argument('--do_caption', action='store_true', help="do text captioning")
opt = parser.parse_args()