From 4be62a26b9f2f462dc24da702d5ea353387d3ebd Mon Sep 17 00:00:00 2001 From: Tomas Dougan Date: Wed, 30 Apr 2025 16:16:33 -0400 Subject: [PATCH] Added tensorboard logging of time elapsed during rendering and back propogation --- train.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 8206903..86c0b80 100644 --- a/train.py +++ b/train.py @@ -57,8 +57,16 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0] background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") + # Telemetry + iter_start = torch.cuda.Event(enable_timing = True) iter_end = torch.cuda.Event(enable_timing = True) + + render_start = torch.cuda.Event(enable_timing = True) + render_end = torch.cuda.Event(enable_timing = True) + + bp_start = torch.cuda.Event(enable_timing = True) + bp_end = torch.cuda.Event(enable_timing = True) use_sparse_adam = opt.optimizer_type == "sparse_adam" and SPARSE_ADAM_AVAILABLE depth_l1_weight = get_expon_lr_func(opt.depth_l1_weight_init, opt.depth_l1_weight_final, max_steps=opt.iterations) @@ -103,6 +111,9 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi vind = viewpoint_indices.pop(rand_idx) # Render + + render_start.record() + if (iteration - 1) == debug_from: pipe.debug = True @@ -115,7 +126,12 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi alpha_mask = viewpoint_cam.alpha_mask.cuda() image *= alpha_mask + render_end.record() + # Loss + + bp_start.record() + gt_image = viewpoint_cam.original_image.cuda() Ll1 = l1_loss(image, gt_image) if FUSED_SSIM_AVAILABLE: @@ -141,6 +157,8 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi loss.backward() + bp_end.record() + iter_end.record() with torch.no_grad(): @@ -155,7 +173,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi progress_bar.close() # Log and save - training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background, 1., SPARSE_ADAM_AVAILABLE, None, dataset.train_test_exp), dataset.train_test_exp) + training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), render_start.elapsed_time(render_end), bp_start.elapsed_time(bp_end), testing_iterations, scene, render, (pipe, background, 1., SPARSE_ADAM_AVAILABLE, None, dataset.train_test_exp), dataset.train_test_exp) if (iteration in saving_iterations): print("\n[ITER {}] Saving Gaussians".format(iteration)) scene.save(iteration) @@ -211,11 +229,13 @@ def prepare_output_and_logger(args): print("Tensorboard not available: not logging progress") return tb_writer -def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, train_test_exp): +def training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_elapsed, render_elapsed, bp_elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, train_test_exp): if tb_writer: tb_writer.add_scalar('train_loss_patches/l1_loss', Ll1.item(), iteration) tb_writer.add_scalar('train_loss_patches/total_loss', loss.item(), iteration) - tb_writer.add_scalar('iter_time', elapsed, iteration) + tb_writer.add_scalar('iter_time', iter_elapsed, iteration) + tb_writer.add_scalar('render_time', render_elapsed, iteration) + tb_writer.add_scalar('bp_time', bp_elapsed, iteration) # Report test and samples of training set if iteration in testing_iterations: