Added tensorboard logging of time elapsed during rendering and back propogation

This commit is contained in:
Tomas Dougan 2025-04-30 16:16:33 -04:00
parent 54c035f783
commit 4be62a26b9

View File

@ -57,8 +57,16 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
# Telemetry
iter_start = torch.cuda.Event(enable_timing = True)
iter_end = torch.cuda.Event(enable_timing = True)
render_start = torch.cuda.Event(enable_timing = True)
render_end = torch.cuda.Event(enable_timing = True)
bp_start = torch.cuda.Event(enable_timing = True)
bp_end = torch.cuda.Event(enable_timing = True)
use_sparse_adam = opt.optimizer_type == "sparse_adam" and SPARSE_ADAM_AVAILABLE
depth_l1_weight = get_expon_lr_func(opt.depth_l1_weight_init, opt.depth_l1_weight_final, max_steps=opt.iterations)
@ -103,6 +111,9 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
vind = viewpoint_indices.pop(rand_idx)
# Render
render_start.record()
if (iteration - 1) == debug_from:
pipe.debug = True
@ -115,7 +126,12 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
alpha_mask = viewpoint_cam.alpha_mask.cuda()
image *= alpha_mask
render_end.record()
# Loss
bp_start.record()
gt_image = viewpoint_cam.original_image.cuda()
Ll1 = l1_loss(image, gt_image)
if FUSED_SSIM_AVAILABLE:
@ -141,6 +157,8 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
loss.backward()
bp_end.record()
iter_end.record()
with torch.no_grad():
@ -155,7 +173,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
progress_bar.close()
# Log and save
training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background, 1., SPARSE_ADAM_AVAILABLE, None, dataset.train_test_exp), dataset.train_test_exp)
training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), render_start.elapsed_time(render_end), bp_start.elapsed_time(bp_end), testing_iterations, scene, render, (pipe, background, 1., SPARSE_ADAM_AVAILABLE, None, dataset.train_test_exp), dataset.train_test_exp)
if (iteration in saving_iterations):
print("\n[ITER {}] Saving Gaussians".format(iteration))
scene.save(iteration)
@ -211,11 +229,13 @@ def prepare_output_and_logger(args):
print("Tensorboard not available: not logging progress")
return tb_writer
def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, train_test_exp):
def training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_elapsed, render_elapsed, bp_elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, train_test_exp):
if tb_writer:
tb_writer.add_scalar('train_loss_patches/l1_loss', Ll1.item(), iteration)
tb_writer.add_scalar('train_loss_patches/total_loss', loss.item(), iteration)
tb_writer.add_scalar('iter_time', elapsed, iteration)
tb_writer.add_scalar('iter_time', iter_elapsed, iteration)
tb_writer.add_scalar('render_time', render_elapsed, iteration)
tb_writer.add_scalar('bp_time', bp_elapsed, iteration)
# Report test and samples of training set
if iteration in testing_iterations: