Added tensorboard logging of time elapsed during rendering and back propogation

2025-06-26 18:18:11 +00:00 · 2025-04-30 16:16:33 -04:00 · 2025-04-30 16:16:33 -04:00 · 4be62a26b9
commit 4be62a26b9
parent 54c035f783
1 changed files with 23 additions and 3 deletions
--- a/train.py
+++ b/train.py
@ -57,8 +57,16 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
    bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
    background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")

+    # Telemetry
+
    iter_start = torch.cuda.Event(enable_timing = True)
    iter_end = torch.cuda.Event(enable_timing = True)
+    
+    render_start = torch.cuda.Event(enable_timing = True)
+    render_end = torch.cuda.Event(enable_timing = True)
+
+    bp_start = torch.cuda.Event(enable_timing = True)
+    bp_end = torch.cuda.Event(enable_timing = True)

    use_sparse_adam = opt.optimizer_type == "sparse_adam" and SPARSE_ADAM_AVAILABLE 
    depth_l1_weight = get_expon_lr_func(opt.depth_l1_weight_init, opt.depth_l1_weight_final, max_steps=opt.iterations)
@ -103,6 +111,9 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
        vind = viewpoint_indices.pop(rand_idx)

        # Render
+
+        render_start.record()
+
        if (iteration - 1) == debug_from:
            pipe.debug = True

@ -115,7 +126,12 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
            alpha_mask = viewpoint_cam.alpha_mask.cuda()
            image *= alpha_mask

+        render_end.record()
+
        # Loss
+
+        bp_start.record()
+
        gt_image = viewpoint_cam.original_image.cuda()
        Ll1 = l1_loss(image, gt_image)
        if FUSED_SSIM_AVAILABLE:
@ -141,6 +157,8 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi

        loss.backward()

+        bp_end.record()
+
        iter_end.record()

        with torch.no_grad():
@ -155,7 +173,7 @@ def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoi
                progress_bar.close()

            # Log and save
-            training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background, 1., SPARSE_ADAM_AVAILABLE, None, dataset.train_test_exp), dataset.train_test_exp)
+            training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), render_start.elapsed_time(render_end), bp_start.elapsed_time(bp_end), testing_iterations, scene, render, (pipe, background, 1., SPARSE_ADAM_AVAILABLE, None, dataset.train_test_exp), dataset.train_test_exp)
            if (iteration in saving_iterations):
                print("\n[ITER {}] Saving Gaussians".format(iteration))
                scene.save(iteration)
@ -211,11 +229,13 @@ def prepare_output_and_logger(args):
        print("Tensorboard not available: not logging progress")
    return tb_writer

-def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, train_test_exp):
+def training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_elapsed, render_elapsed, bp_elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, train_test_exp):
    if tb_writer:
        tb_writer.add_scalar('train_loss_patches/l1_loss', Ll1.item(), iteration)
        tb_writer.add_scalar('train_loss_patches/total_loss', loss.item(), iteration)
-        tb_writer.add_scalar('iter_time', elapsed, iteration)
+        tb_writer.add_scalar('iter_time', iter_elapsed, iteration)
+        tb_writer.add_scalar('render_time', render_elapsed, iteration)
+        tb_writer.add_scalar('bp_time', bp_elapsed, iteration)

    # Report test and samples of training set
    if iteration in testing_iterations: